""" Google Translate Provider - Production-ready implementation. Extends TranslationProvider base class with robust error handling, retry logic, and health monitoring. Features: - Specific error codes for all Google API errors - Retry logic with exponential backoff for transient errors - Timeout configuration - Health check with caching - Structlog-compatible logging (no document content in logs) """ import os import socket import threading import time from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError from datetime import datetime, timezone from typing import Any, Dict, List, Optional from core.logging import get_logger logger = get_logger(__name__) from .base import TranslationProvider from .schemas import ( BatchTranslationRequest, BatchTranslationResponse, ProviderHealthStatus, TranslationRequest, TranslationResponse, ) GOOGLE_QUOTA_EXCEEDED = "GOOGLE_QUOTA_EXCEEDED" GOOGLE_INVALID_KEY = "GOOGLE_INVALID_KEY" GOOGLE_NETWORK_ERROR = "GOOGLE_NETWORK_ERROR" GOOGLE_UNSUPPORTED_LANGUAGE = "GOOGLE_UNSUPPORTED_LANGUAGE" GOOGLE_TEXT_TOO_LONG = "GOOGLE_TEXT_TOO_LONG" # Align with services.translation_service.GoogleTranslationProvider (deep_translator codes) _GOOGLE_LANG_MAP: dict[str, str] = { "zh": "zh-CN", "zh-cn": "zh-CN", "zh-tw": "zh-TW", "iw": "he", "he": "iw", "jv": "jw", "nb": "no", } def _normalize_lang_for_google(code: str) -> str: if not code or code == "auto": return "auto" return _GOOGLE_LANG_MAP.get(code, _GOOGLE_LANG_MAP.get(code.lower(), code)) _RETRYABLE_ERRORS = {GOOGLE_NETWORK_ERROR, GOOGLE_QUOTA_EXCEEDED} class GoogleProviderError(Exception): """Exception raised for Google Translate API errors.""" def __init__( self, code: str, message: str, details: Optional[Dict[str, Any]] = None ): self.code = code self.message = message self.details = details or {} super().__init__(message) def to_dict(self) -> Dict[str, Any]: """Convert error to dictionary format.""" result = { "error": self.code, "message": self.message, } if self.details: result["details"] = self.details return result class GoogleTranslationProvider(TranslationProvider): """ Google Translate implementation using deep_translator library. Features: - Thread-safe translator instances per thread - Caching support (uses global cache from translation_service) - Batch translation with optimized processing - Robust error handling with specific error codes - Retry logic with exponential backoff - Configurable timeout - Health check with result caching """ def __init__( self, use_cache: bool = True, timeout: int = 30, max_retries: int = 3, retry_delay: float = 1.0, ): """ Initialize Google Translate provider. Args: use_cache: Whether to use translation caching (default: True) timeout: Request timeout in seconds (default: 30) max_retries: Maximum retry attempts for transient errors (default: 3) retry_delay: Initial retry delay in seconds (default: 1.0) """ self._local = threading.local() self._use_cache = use_cache self._provider_name = "google" self._cache = None self.timeout = timeout self.max_retries = max_retries self.retry_delay = retry_delay self._health_cache: Dict[str, Any] = {} self._health_cache_ttl = 60 self._health_cache_lock = threading.Lock() if use_cache: self._init_cache() def _init_cache(self): """Initialize or get the translation cache.""" from services.translation_service import _translation_cache self._cache = _translation_cache def _get_translator(self, source_language: str, target_language: str): """Get or create a translator instance for the current thread.""" from deep_translator import GoogleTranslator src = _normalize_lang_for_google(source_language) tgt = _normalize_lang_for_google(target_language) key = f"{src}_{tgt}" if not hasattr(self._local, "translators"): self._local.translators = {} if key not in self._local.translators: self._local.translators[key] = GoogleTranslator(source=src, target=tgt) return self._local.translators[key] def _make_api_request( self, text: str, source_language: str, target_language: str ) -> str: """ Make API request with error mapping. Raises: GoogleProviderError: For any API errors with specific codes """ if len(text) > 5000: raise GoogleProviderError( code=GOOGLE_TEXT_TOO_LONG, message="Texte trop long (max 5000 caractères par requête).", details={"text_length": len(text), "max_length": 5000}, ) try: translator = self._get_translator(source_language, target_language) # Apply timeout via executor (deep_translator has no timeout parameter) with ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(translator.translate, text) return future.result(timeout=self.timeout) except Exception as e: error_str = str(e).lower() if "quota" in error_str or "limit" in error_str or "429" in error_str: raise GoogleProviderError( code=GOOGLE_QUOTA_EXCEEDED, message="Quota Google Translate dépassé. Réessayez demain.", details={"provider": "google"}, ) elif "api" in error_str and ( "key" in error_str or "invalid" in error_str or "401" in error_str or "403" in error_str ): raise GoogleProviderError( code=GOOGLE_INVALID_KEY, message="Clé API Google invalide. Contactez l'administrateur.", details={"provider": "google"}, ) elif "language" in error_str or "not supported" in error_str: raise GoogleProviderError( code=GOOGLE_UNSUPPORTED_LANGUAGE, message=f"Langue '{target_language}' non supportée par Google.", details={"unsupported_language": target_language}, ) elif ( isinstance(e, (socket.timeout, TimeoutError, FuturesTimeoutError)) or "timeout" in error_str ): raise GoogleProviderError( code=GOOGLE_NETWORK_ERROR, message="Service Google Translate indisponible. Réessayez.", details={"provider": "google", "error_type": "timeout"}, ) else: raise GoogleProviderError( code=GOOGLE_NETWORK_ERROR, message="Service Google Translate indisponible. Réessayez.", details={"provider": "google", "original_error": str(e)[:100]}, ) def get_name(self) -> str: """Return provider name.""" return self._provider_name def is_available(self) -> bool: """ Check if Google Translate is available. Uses cached result if available and not expired. """ current_time = time.time() with self._health_cache_lock: if "is_available" in self._health_cache: cached = self._health_cache["is_available"] if current_time - cached["timestamp"] < self._health_cache_ttl: return cached["value"] try: translator = self._get_translator("auto", "en") available = translator is not None except Exception as e: logger.warning( "google_availability_check_failed", error=str(e)[:100], ) available = False with self._health_cache_lock: self._health_cache["is_available"] = { "value": available, "timestamp": current_time, } return available def translate_text(self, request: TranslationRequest) -> TranslationResponse: """ Translate a single text string using Google Translate. API Usage Notes: - Google Translate free tier: 500,000 characters/month - 5,000 characters max per request - Cost: ~$20 per million characters (paid tier) Optimization: Skips API call if source == target language. Args: request: TranslationRequest with text and language info Returns: TranslationResponse with translated text """ text = request.text target_language = request.target_language source_language = request.source_language or "auto" if not text or not text.strip(): return TranslationResponse( translated_text=text, provider_name=self._provider_name, from_cache=False, ) # Optimization: Skip if source and target are the same if source_language != "auto" and source_language == target_language: logger.info( "google_translation_skip", source_target_lang=target_language, text_length=len(text), ) return TranslationResponse( translated_text=text, provider_name=self._provider_name, from_cache=False, source_language=source_language, ) if self._use_cache and self._cache: cached = self._cache.get( text, target_language, source_language, self._provider_name ) if cached is not None: return TranslationResponse( translated_text=cached, provider_name=self._provider_name, from_cache=True, ) last_error: Optional[GoogleProviderError] = None retries = 0 while retries <= self.max_retries: try: result = self._make_api_request(text, source_language, target_language) if self._use_cache and self._cache: self._cache.set( text, target_language, source_language, self._provider_name, result, ) # Log usage metrics (character count, API call) logger.info( "google_translation_success", chars=len(text), source_lang=source_language, target_lang=target_language, retries=retries, ) return TranslationResponse( translated_text=result, provider_name=self._provider_name, from_cache=False, ) except GoogleProviderError as e: last_error = e if e.code not in _RETRYABLE_ERRORS: break retries += 1 if retries <= self.max_retries: delay = self.retry_delay * (2 ** (retries - 1)) logger.info( "google_translation_retry", attempt=retries, delay_s=round(delay, 2), error_code=e.code, text_length=len(text), source_lang=source_language, target_lang=target_language, ) time.sleep(delay) except Exception as e: last_error = GoogleProviderError( code=GOOGLE_NETWORK_ERROR, message="Service Google Translate indisponible. Réessayez.", details={"original_error": str(e)[:100]}, ) retries += 1 if retries <= self.max_retries: delay = self.retry_delay * (2 ** (retries - 1)) time.sleep(delay) if last_error: logger.error( "google_translation_failed", error_code=last_error.code, text_length=len(text), source_lang=source_language, target_lang=target_language, retries=retries, ) return TranslationResponse( translated_text=text, provider_name=self._provider_name, from_cache=False, error=last_error.message, error_code=last_error.code, error_details=last_error.details, ) return TranslationResponse( translated_text=text, provider_name=self._provider_name, from_cache=False, error="Unknown error", error_code=GOOGLE_NETWORK_ERROR, ) def translate_batch( self, requests: List[TranslationRequest] ) -> List[TranslationResponse]: """ Translate multiple texts with optimized batch processing. When all requests share the same source/target languages, delegates to services.translation_service.GoogleTranslationProvider.translate_batch (batched deep_translator calls). Otherwise falls back to per-item translate_text. """ if not requests: return [] tgt0 = requests[0].target_language src0 = requests[0].source_language or "auto" uniform = all( r.target_language == tgt0 and (r.source_language or "auto") == src0 for r in requests ) if uniform: try: from services.translation_service import ( GoogleTranslationProvider as LegacyGoogle, ) texts = [r.text for r in requests] outs = LegacyGoogle().translate_batch(texts, tgt0, src0) return [ TranslationResponse( translated_text=out, provider_name=self._provider_name, from_cache=False, ) for out in outs ] except Exception as e: logger.warning( "google_batch_legacy_fallback", error_type=type(e).__name__, ) return [self.translate_text(req) for req in requests] def health_check(self) -> ProviderHealthStatus: """ Return health status details for the provider. Performs a lightweight check to verify the provider is operational. Includes cached result for efficiency. Returns: ProviderHealthStatus with availability and latency information """ current_time = time.time() with self._health_cache_lock: if "health_check" in self._health_cache: cached = self._health_cache["health_check"] if current_time - cached["timestamp"] < self._health_cache_ttl: return cached["value"] start_time = time.time() last_check_iso = datetime.now(timezone.utc).isoformat() try: available = self.is_available() latency_ms = (time.time() - start_time) * 1000 status = ProviderHealthStatus( name=self._provider_name, available=available, latency_ms=round(latency_ms, 2), error=None if available else "Provider not available", last_check=last_check_iso, ) except Exception as e: latency_ms = (time.time() - start_time) * 1000 status = ProviderHealthStatus( name=self._provider_name, available=False, latency_ms=round(latency_ms, 2), error=str(e)[:100], last_check=last_check_iso, ) with self._health_cache_lock: self._health_cache["health_check"] = { "value": status, "timestamp": current_time, } return status def register_google_provider(): """ Register the Google provider in the global registry. This function should be called during module initialization to make the provider available through the registry. """ from .registry import registry provider = get_google_provider() registry.register("google", provider) return provider _provider_instance = None def get_google_provider() -> GoogleTranslationProvider: """Get or create the Google provider instance (reads config from env).""" global _provider_instance if _provider_instance is None: from .config import ProvidersConfig _provider_instance = GoogleTranslationProvider( use_cache=True, timeout=ProvidersConfig.GOOGLE_TRANSLATE_TIMEOUT, max_retries=ProvidersConfig.GOOGLE_TRANSLATE_MAX_RETRIES, retry_delay=ProvidersConfig.GOOGLE_TRANSLATE_RETRY_DELAY, ) return _provider_instance class LegacyGoogleAdapter: """ Exposes the new GoogleTranslationProvider via the legacy interface used by translation_service: .translate(text, target_lang, source_lang) -> str and .translate_batch(texts, target_lang, source_lang) -> List[str]. Raises TranslationProviderError on failure so the API can return 4xx/502. """ def __init__(self): self._provider = get_google_provider() self.provider_name = "google" def translate( self, text: str, target_language: str, source_language: str = "auto" ) -> str: req = TranslationRequest( text=text, target_language=target_language, source_language=source_language, ) resp = self._provider.translate_text(req) if resp.error: from utils.exceptions import TranslationProviderError raise TranslationProviderError( resp.error_code or "UNKNOWN", resp.error or "Translation failed", resp.error_details, ) return resp.translated_text def translate_batch( self, texts: List[str], target_language: str, source_language: str = "auto", batch_size: int = 50, ) -> List[str]: requests = [ TranslationRequest( text=t, target_language=target_language, source_language=source_language, ) for t in texts ] responses = self._provider.translate_batch(requests) result = [] for r in responses: if r.error: from utils.exceptions import TranslationProviderError raise TranslationProviderError( r.error_code or "UNKNOWN", r.error or "Translation failed", r.error_details, ) result.append(r.translated_text) return result def get_legacy_google_adapter() -> LegacyGoogleAdapter: """Return an adapter so the legacy translation_service can use the new provider.""" return LegacyGoogleAdapter()