Files
office_translator/services/providers/google_provider.py
2026-03-07 11:42:58 +01:00

535 lines
18 KiB
Python

"""
Google Translate Provider - Production-ready implementation.
Extends TranslationProvider base class with robust error handling,
retry logic, and health monitoring.
Features:
- Specific error codes for all Google API errors
- Retry logic with exponential backoff for transient errors
- Timeout configuration
- Health check with caching
- Structlog-compatible logging (no document content in logs)
"""
import os
import socket
import threading
import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional
try:
import structlog
logger = structlog.get_logger(__name__)
except ImportError:
import logging
logger = logging.getLogger(__name__)
from .base import TranslationProvider
from .schemas import (
BatchTranslationRequest,
BatchTranslationResponse,
ProviderHealthStatus,
TranslationRequest,
TranslationResponse,
)
GOOGLE_QUOTA_EXCEEDED = "GOOGLE_QUOTA_EXCEEDED"
GOOGLE_INVALID_KEY = "GOOGLE_INVALID_KEY"
GOOGLE_NETWORK_ERROR = "GOOGLE_NETWORK_ERROR"
GOOGLE_UNSUPPORTED_LANGUAGE = "GOOGLE_UNSUPPORTED_LANGUAGE"
GOOGLE_TEXT_TOO_LONG = "GOOGLE_TEXT_TOO_LONG"
_RETRYABLE_ERRORS = {GOOGLE_NETWORK_ERROR, GOOGLE_QUOTA_EXCEEDED}
class GoogleProviderError(Exception):
"""Exception raised for Google Translate API errors."""
def __init__(
self, code: str, message: str, details: Optional[Dict[str, Any]] = None
):
self.code = code
self.message = message
self.details = details or {}
super().__init__(message)
def to_dict(self) -> Dict[str, Any]:
"""Convert error to dictionary format."""
result = {
"error": self.code,
"message": self.message,
}
if self.details:
result["details"] = self.details
return result
class GoogleTranslationProvider(TranslationProvider):
"""
Google Translate implementation using deep_translator library.
Features:
- Thread-safe translator instances per thread
- Caching support (uses global cache from translation_service)
- Batch translation with optimized processing
- Robust error handling with specific error codes
- Retry logic with exponential backoff
- Configurable timeout
- Health check with result caching
"""
def __init__(
self,
use_cache: bool = True,
timeout: int = 30,
max_retries: int = 3,
retry_delay: float = 1.0,
):
"""
Initialize Google Translate provider.
Args:
use_cache: Whether to use translation caching (default: True)
timeout: Request timeout in seconds (default: 30)
max_retries: Maximum retry attempts for transient errors (default: 3)
retry_delay: Initial retry delay in seconds (default: 1.0)
"""
self._local = threading.local()
self._use_cache = use_cache
self._provider_name = "google"
self._cache = None
self.timeout = timeout
self.max_retries = max_retries
self.retry_delay = retry_delay
self._health_cache: Dict[str, Any] = {}
self._health_cache_ttl = 60
self._health_cache_lock = threading.Lock()
if use_cache:
self._init_cache()
def _init_cache(self):
"""Initialize or get the translation cache."""
from services.translation_service import _translation_cache
self._cache = _translation_cache
def _get_translator(self, source_language: str, target_language: str):
"""Get or create a translator instance for the current thread."""
from deep_translator import GoogleTranslator
key = f"{source_language}_{target_language}"
if not hasattr(self._local, "translators"):
self._local.translators = {}
if key not in self._local.translators:
self._local.translators[key] = GoogleTranslator(
source=source_language, target=target_language
)
return self._local.translators[key]
def _make_api_request(
self, text: str, source_language: str, target_language: str
) -> str:
"""
Make API request with error mapping.
Raises:
GoogleProviderError: For any API errors with specific codes
"""
if len(text) > 5000:
raise GoogleProviderError(
code=GOOGLE_TEXT_TOO_LONG,
message="Texte trop long (max 5000 caractères par requête).",
details={"text_length": len(text), "max_length": 5000},
)
try:
translator = self._get_translator(source_language, target_language)
# Apply timeout via executor (deep_translator has no timeout parameter)
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(translator.translate, text)
return future.result(timeout=self.timeout)
except Exception as e:
error_str = str(e).lower()
if "quota" in error_str or "limit" in error_str or "429" in error_str:
raise GoogleProviderError(
code=GOOGLE_QUOTA_EXCEEDED,
message="Quota Google Translate dépassé. Réessayez demain.",
details={"provider": "google"},
)
elif "api" in error_str and (
"key" in error_str
or "invalid" in error_str
or "401" in error_str
or "403" in error_str
):
raise GoogleProviderError(
code=GOOGLE_INVALID_KEY,
message="Clé API Google invalide. Contactez l'administrateur.",
details={"provider": "google"},
)
elif "language" in error_str or "not supported" in error_str:
raise GoogleProviderError(
code=GOOGLE_UNSUPPORTED_LANGUAGE,
message=f"Langue '{target_language}' non supportée par Google.",
details={"unsupported_language": target_language},
)
elif (
isinstance(e, (socket.timeout, TimeoutError, FuturesTimeoutError))
or "timeout" in error_str
):
raise GoogleProviderError(
code=GOOGLE_NETWORK_ERROR,
message="Service Google Translate indisponible. Réessayez.",
details={"provider": "google", "error_type": "timeout"},
)
else:
raise GoogleProviderError(
code=GOOGLE_NETWORK_ERROR,
message="Service Google Translate indisponible. Réessayez.",
details={"provider": "google", "original_error": str(e)[:100]},
)
def get_name(self) -> str:
"""Return provider name."""
return self._provider_name
def is_available(self) -> bool:
"""
Check if Google Translate is available.
Uses cached result if available and not expired.
"""
current_time = time.time()
with self._health_cache_lock:
if "is_available" in self._health_cache:
cached = self._health_cache["is_available"]
if current_time - cached["timestamp"] < self._health_cache_ttl:
return cached["value"]
try:
translator = self._get_translator("auto", "en")
available = translator is not None
except Exception as e:
logger.warning(
"google_availability_check_failed",
error=str(e)[:100],
)
available = False
with self._health_cache_lock:
self._health_cache["is_available"] = {
"value": available,
"timestamp": current_time,
}
return available
def translate_text(self, request: TranslationRequest) -> TranslationResponse:
"""
Translate a single text string using Google Translate.
API Usage Notes:
- Google Translate free tier: 500,000 characters/month
- 5,000 characters max per request
- Cost: ~$20 per million characters (paid tier)
Optimization: Skips API call if source == target language.
Args:
request: TranslationRequest with text and language info
Returns:
TranslationResponse with translated text
"""
text = request.text
target_language = request.target_language
source_language = request.source_language or "auto"
if not text or not text.strip():
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
)
# Optimization: Skip if source and target are the same
if source_language != "auto" and source_language == target_language:
logger.info(
"google_translation_skip",
source_target_lang=target_language,
text_length=len(text),
)
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
source_language=source_language,
)
if self._use_cache and self._cache:
cached = self._cache.get(
text, target_language, source_language, self._provider_name
)
if cached is not None:
return TranslationResponse(
translated_text=cached,
provider_name=self._provider_name,
from_cache=True,
)
last_error: Optional[GoogleProviderError] = None
retries = 0
while retries <= self.max_retries:
try:
result = self._make_api_request(text, source_language, target_language)
if self._use_cache and self._cache:
self._cache.set(
text,
target_language,
source_language,
self._provider_name,
result,
)
# Log usage metrics (character count, API call)
logger.info(
"google_translation_success",
chars=len(text),
source_lang=source_language,
target_lang=target_language,
retries=retries,
)
return TranslationResponse(
translated_text=result,
provider_name=self._provider_name,
from_cache=False,
)
except GoogleProviderError as e:
last_error = e
if e.code not in _RETRYABLE_ERRORS:
break
retries += 1
if retries <= self.max_retries:
delay = self.retry_delay * (2 ** (retries - 1))
logger.info(
"google_translation_retry",
attempt=retries,
delay_s=round(delay, 2),
error_code=e.code,
text_length=len(text),
source_lang=source_language,
target_lang=target_language,
)
time.sleep(delay)
except Exception as e:
last_error = GoogleProviderError(
code=GOOGLE_NETWORK_ERROR,
message="Service Google Translate indisponible. Réessayez.",
details={"original_error": str(e)[:100]},
)
retries += 1
if retries <= self.max_retries:
delay = self.retry_delay * (2 ** (retries - 1))
time.sleep(delay)
if last_error:
logger.error(
"google_translation_failed",
error_code=last_error.code,
text_length=len(text),
source_lang=source_language,
target_lang=target_language,
retries=retries,
)
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
error=last_error.message,
error_code=last_error.code,
error_details=last_error.details,
)
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
error="Unknown error",
error_code=GOOGLE_NETWORK_ERROR,
)
def translate_batch(
self, requests: List[TranslationRequest]
) -> List[TranslationResponse]:
"""
Translate multiple texts with optimized batch processing.
Args:
requests: List of TranslationRequest objects
Returns:
List of TranslationResponse objects
"""
if not requests:
return []
return [self.translate_text(req) for req in requests]
def health_check(self) -> ProviderHealthStatus:
"""
Return health status details for the provider.
Performs a lightweight check to verify the provider is operational.
Includes cached result for efficiency.
Returns:
ProviderHealthStatus with availability and latency information
"""
current_time = time.time()
with self._health_cache_lock:
if "health_check" in self._health_cache:
cached = self._health_cache["health_check"]
if current_time - cached["timestamp"] < self._health_cache_ttl:
return cached["value"]
start_time = time.time()
last_check_iso = datetime.now(timezone.utc).isoformat()
try:
available = self.is_available()
latency_ms = (time.time() - start_time) * 1000
status = ProviderHealthStatus(
name=self._provider_name,
available=available,
latency_ms=round(latency_ms, 2),
error=None if available else "Provider not available",
last_check=last_check_iso,
)
except Exception as e:
latency_ms = (time.time() - start_time) * 1000
status = ProviderHealthStatus(
name=self._provider_name,
available=False,
latency_ms=round(latency_ms, 2),
error=str(e)[:100],
last_check=last_check_iso,
)
with self._health_cache_lock:
self._health_cache["health_check"] = {
"value": status,
"timestamp": current_time,
}
return status
def register_google_provider():
"""
Register the Google provider in the global registry.
This function should be called during module initialization
to make the provider available through the registry.
"""
from .registry import registry
provider = get_google_provider()
registry.register("google", provider)
return provider
_provider_instance = None
def get_google_provider() -> GoogleTranslationProvider:
"""Get or create the Google provider instance (reads config from env)."""
global _provider_instance
if _provider_instance is None:
from .config import ProvidersConfig
_provider_instance = GoogleTranslationProvider(
use_cache=True,
timeout=ProvidersConfig.GOOGLE_TRANSLATE_TIMEOUT,
max_retries=ProvidersConfig.GOOGLE_TRANSLATE_MAX_RETRIES,
retry_delay=ProvidersConfig.GOOGLE_TRANSLATE_RETRY_DELAY,
)
return _provider_instance
class LegacyGoogleAdapter:
"""
Exposes the new GoogleTranslationProvider via the legacy interface used by
translation_service: .translate(text, target_lang, source_lang) -> str and
.translate_batch(texts, target_lang, source_lang) -> List[str].
Raises TranslationProviderError on failure so the API can return 4xx/502.
"""
def __init__(self):
self._provider = get_google_provider()
self.provider_name = "google"
def translate(
self, text: str, target_language: str, source_language: str = "auto"
) -> str:
req = TranslationRequest(
text=text,
target_language=target_language,
source_language=source_language,
)
resp = self._provider.translate_text(req)
if resp.error:
from utils.exceptions import TranslationProviderError
raise TranslationProviderError(
resp.error_code or "UNKNOWN",
resp.error or "Translation failed",
resp.error_details,
)
return resp.translated_text
def translate_batch(
self,
texts: List[str],
target_language: str,
source_language: str = "auto",
batch_size: int = 50,
) -> List[str]:
requests = [
TranslationRequest(
text=t,
target_language=target_language,
source_language=source_language,
)
for t in texts
]
responses = self._provider.translate_batch(requests)
result = []
for r in responses:
if r.error:
from utils.exceptions import TranslationProviderError
raise TranslationProviderError(
r.error_code or "UNKNOWN",
r.error or "Translation failed",
r.error_details,
)
result.append(r.translated_text)
return result
def get_legacy_google_adapter() -> LegacyGoogleAdapter:
"""Return an adapter so the legacy translation_service can use the new provider."""
return LegacyGoogleAdapter()