Major changes across backend, frontend, infrastructure: - Provider system with model selection (Google, DeepL, OpenAI, Ollama, Google Cloud) - Admin panel: user management, pricing, settings - Glossary system with CSV import/export - Subscription and tier quota management - Security hardening (rate limiting, API key auth, path traversal fixes) - Docker compose for dev, prod, and IONOS deployment - Alembic migrations for new tables - Frontend: dashboard, pricing page, landing page, i18n (en/fr) - Test suite and verification scripts Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
576 lines
20 KiB
Python
576 lines
20 KiB
Python
"""
|
|
Google Translate Provider - Production-ready implementation.
|
|
|
|
Extends TranslationProvider base class with robust error handling,
|
|
retry logic, and health monitoring.
|
|
|
|
Features:
|
|
- Specific error codes for all Google API errors
|
|
- Retry logic with exponential backoff for transient errors
|
|
- Timeout configuration
|
|
- Health check with caching
|
|
- Structlog-compatible logging (no document content in logs)
|
|
"""
|
|
|
|
import os
|
|
import socket
|
|
import threading
|
|
import time
|
|
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from core.logging import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
from .base import TranslationProvider
|
|
from .schemas import (
|
|
BatchTranslationRequest,
|
|
BatchTranslationResponse,
|
|
ProviderHealthStatus,
|
|
TranslationRequest,
|
|
TranslationResponse,
|
|
)
|
|
|
|
GOOGLE_QUOTA_EXCEEDED = "GOOGLE_QUOTA_EXCEEDED"
|
|
GOOGLE_INVALID_KEY = "GOOGLE_INVALID_KEY"
|
|
GOOGLE_NETWORK_ERROR = "GOOGLE_NETWORK_ERROR"
|
|
GOOGLE_UNSUPPORTED_LANGUAGE = "GOOGLE_UNSUPPORTED_LANGUAGE"
|
|
GOOGLE_TEXT_TOO_LONG = "GOOGLE_TEXT_TOO_LONG"
|
|
|
|
# Align with services.translation_service.GoogleTranslationProvider (deep_translator codes)
|
|
_GOOGLE_LANG_MAP: dict[str, str] = {
|
|
"zh": "zh-CN",
|
|
"zh-cn": "zh-CN",
|
|
"zh-tw": "zh-TW",
|
|
"iw": "he",
|
|
"he": "iw",
|
|
"jv": "jw",
|
|
"nb": "no",
|
|
}
|
|
|
|
|
|
def _normalize_lang_for_google(code: str) -> str:
|
|
if not code or code == "auto":
|
|
return "auto"
|
|
return _GOOGLE_LANG_MAP.get(code, _GOOGLE_LANG_MAP.get(code.lower(), code))
|
|
|
|
_RETRYABLE_ERRORS = {GOOGLE_NETWORK_ERROR, GOOGLE_QUOTA_EXCEEDED}
|
|
|
|
|
|
class GoogleProviderError(Exception):
|
|
"""Exception raised for Google Translate API errors."""
|
|
|
|
def __init__(
|
|
self, code: str, message: str, details: Optional[Dict[str, Any]] = None
|
|
):
|
|
self.code = code
|
|
self.message = message
|
|
self.details = details or {}
|
|
super().__init__(message)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert error to dictionary format."""
|
|
result = {
|
|
"error": self.code,
|
|
"message": self.message,
|
|
}
|
|
if self.details:
|
|
result["details"] = self.details
|
|
return result
|
|
|
|
|
|
class GoogleTranslationProvider(TranslationProvider):
|
|
"""
|
|
Google Translate implementation using deep_translator library.
|
|
|
|
Features:
|
|
- Thread-safe translator instances per thread
|
|
- Caching support (uses global cache from translation_service)
|
|
- Batch translation with optimized processing
|
|
- Robust error handling with specific error codes
|
|
- Retry logic with exponential backoff
|
|
- Configurable timeout
|
|
- Health check with result caching
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
use_cache: bool = True,
|
|
timeout: int = 30,
|
|
max_retries: int = 3,
|
|
retry_delay: float = 1.0,
|
|
):
|
|
"""
|
|
Initialize Google Translate provider.
|
|
|
|
Args:
|
|
use_cache: Whether to use translation caching (default: True)
|
|
timeout: Request timeout in seconds (default: 30)
|
|
max_retries: Maximum retry attempts for transient errors (default: 3)
|
|
retry_delay: Initial retry delay in seconds (default: 1.0)
|
|
"""
|
|
self._local = threading.local()
|
|
self._use_cache = use_cache
|
|
self._provider_name = "google"
|
|
self._cache = None
|
|
self.timeout = timeout
|
|
self.max_retries = max_retries
|
|
self.retry_delay = retry_delay
|
|
self._health_cache: Dict[str, Any] = {}
|
|
self._health_cache_ttl = 60
|
|
self._health_cache_lock = threading.Lock()
|
|
|
|
if use_cache:
|
|
self._init_cache()
|
|
|
|
def _init_cache(self):
|
|
"""Initialize or get the translation cache."""
|
|
from services.translation_service import _translation_cache
|
|
|
|
self._cache = _translation_cache
|
|
|
|
def _get_translator(self, source_language: str, target_language: str):
|
|
"""Get or create a translator instance for the current thread."""
|
|
from deep_translator import GoogleTranslator
|
|
|
|
src = _normalize_lang_for_google(source_language)
|
|
tgt = _normalize_lang_for_google(target_language)
|
|
key = f"{src}_{tgt}"
|
|
if not hasattr(self._local, "translators"):
|
|
self._local.translators = {}
|
|
if key not in self._local.translators:
|
|
self._local.translators[key] = GoogleTranslator(source=src, target=tgt)
|
|
return self._local.translators[key]
|
|
|
|
def _make_api_request(
|
|
self, text: str, source_language: str, target_language: str
|
|
) -> str:
|
|
"""
|
|
Make API request with error mapping.
|
|
|
|
Raises:
|
|
GoogleProviderError: For any API errors with specific codes
|
|
"""
|
|
if len(text) > 5000:
|
|
raise GoogleProviderError(
|
|
code=GOOGLE_TEXT_TOO_LONG,
|
|
message="Texte trop long (max 5000 caractères par requête).",
|
|
details={"text_length": len(text), "max_length": 5000},
|
|
)
|
|
|
|
try:
|
|
translator = self._get_translator(source_language, target_language)
|
|
# Apply timeout via executor (deep_translator has no timeout parameter)
|
|
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
future = executor.submit(translator.translate, text)
|
|
return future.result(timeout=self.timeout)
|
|
except Exception as e:
|
|
error_str = str(e).lower()
|
|
|
|
if "quota" in error_str or "limit" in error_str or "429" in error_str:
|
|
raise GoogleProviderError(
|
|
code=GOOGLE_QUOTA_EXCEEDED,
|
|
message="Quota Google Translate dépassé. Réessayez demain.",
|
|
details={"provider": "google"},
|
|
)
|
|
elif "api" in error_str and (
|
|
"key" in error_str
|
|
or "invalid" in error_str
|
|
or "401" in error_str
|
|
or "403" in error_str
|
|
):
|
|
raise GoogleProviderError(
|
|
code=GOOGLE_INVALID_KEY,
|
|
message="Clé API Google invalide. Contactez l'administrateur.",
|
|
details={"provider": "google"},
|
|
)
|
|
elif "language" in error_str or "not supported" in error_str:
|
|
raise GoogleProviderError(
|
|
code=GOOGLE_UNSUPPORTED_LANGUAGE,
|
|
message=f"Langue '{target_language}' non supportée par Google.",
|
|
details={"unsupported_language": target_language},
|
|
)
|
|
elif (
|
|
isinstance(e, (socket.timeout, TimeoutError, FuturesTimeoutError))
|
|
or "timeout" in error_str
|
|
):
|
|
raise GoogleProviderError(
|
|
code=GOOGLE_NETWORK_ERROR,
|
|
message="Service Google Translate indisponible. Réessayez.",
|
|
details={"provider": "google", "error_type": "timeout"},
|
|
)
|
|
else:
|
|
raise GoogleProviderError(
|
|
code=GOOGLE_NETWORK_ERROR,
|
|
message="Service Google Translate indisponible. Réessayez.",
|
|
details={"provider": "google", "original_error": str(e)[:100]},
|
|
)
|
|
|
|
def get_name(self) -> str:
|
|
"""Return provider name."""
|
|
return self._provider_name
|
|
|
|
def is_available(self) -> bool:
|
|
"""
|
|
Check if Google Translate is available.
|
|
|
|
Uses cached result if available and not expired.
|
|
"""
|
|
current_time = time.time()
|
|
|
|
with self._health_cache_lock:
|
|
if "is_available" in self._health_cache:
|
|
cached = self._health_cache["is_available"]
|
|
if current_time - cached["timestamp"] < self._health_cache_ttl:
|
|
return cached["value"]
|
|
|
|
try:
|
|
translator = self._get_translator("auto", "en")
|
|
available = translator is not None
|
|
except Exception as e:
|
|
logger.warning(
|
|
"google_availability_check_failed",
|
|
error=str(e)[:100],
|
|
)
|
|
available = False
|
|
|
|
with self._health_cache_lock:
|
|
self._health_cache["is_available"] = {
|
|
"value": available,
|
|
"timestamp": current_time,
|
|
}
|
|
|
|
return available
|
|
|
|
def translate_text(self, request: TranslationRequest) -> TranslationResponse:
|
|
"""
|
|
Translate a single text string using Google Translate.
|
|
|
|
API Usage Notes:
|
|
- Google Translate free tier: 500,000 characters/month
|
|
- 5,000 characters max per request
|
|
- Cost: ~$20 per million characters (paid tier)
|
|
|
|
Optimization: Skips API call if source == target language.
|
|
|
|
Args:
|
|
request: TranslationRequest with text and language info
|
|
|
|
Returns:
|
|
TranslationResponse with translated text
|
|
"""
|
|
text = request.text
|
|
target_language = request.target_language
|
|
source_language = request.source_language or "auto"
|
|
|
|
if not text or not text.strip():
|
|
return TranslationResponse(
|
|
translated_text=text,
|
|
provider_name=self._provider_name,
|
|
from_cache=False,
|
|
)
|
|
|
|
# Optimization: Skip if source and target are the same
|
|
if source_language != "auto" and source_language == target_language:
|
|
logger.info(
|
|
"google_translation_skip",
|
|
source_target_lang=target_language,
|
|
text_length=len(text),
|
|
)
|
|
return TranslationResponse(
|
|
translated_text=text,
|
|
provider_name=self._provider_name,
|
|
from_cache=False,
|
|
source_language=source_language,
|
|
)
|
|
|
|
if self._use_cache and self._cache:
|
|
cached = self._cache.get(
|
|
text, target_language, source_language, self._provider_name
|
|
)
|
|
if cached is not None:
|
|
return TranslationResponse(
|
|
translated_text=cached,
|
|
provider_name=self._provider_name,
|
|
from_cache=True,
|
|
)
|
|
|
|
last_error: Optional[GoogleProviderError] = None
|
|
retries = 0
|
|
|
|
while retries <= self.max_retries:
|
|
try:
|
|
result = self._make_api_request(text, source_language, target_language)
|
|
|
|
if self._use_cache and self._cache:
|
|
self._cache.set(
|
|
text,
|
|
target_language,
|
|
source_language,
|
|
self._provider_name,
|
|
result,
|
|
)
|
|
|
|
# Log usage metrics (character count, API call)
|
|
logger.info(
|
|
"google_translation_success",
|
|
chars=len(text),
|
|
source_lang=source_language,
|
|
target_lang=target_language,
|
|
retries=retries,
|
|
)
|
|
|
|
return TranslationResponse(
|
|
translated_text=result,
|
|
provider_name=self._provider_name,
|
|
from_cache=False,
|
|
)
|
|
|
|
except GoogleProviderError as e:
|
|
last_error = e
|
|
|
|
if e.code not in _RETRYABLE_ERRORS:
|
|
break
|
|
|
|
retries += 1
|
|
if retries <= self.max_retries:
|
|
delay = self.retry_delay * (2 ** (retries - 1))
|
|
logger.info(
|
|
"google_translation_retry",
|
|
attempt=retries,
|
|
delay_s=round(delay, 2),
|
|
error_code=e.code,
|
|
text_length=len(text),
|
|
source_lang=source_language,
|
|
target_lang=target_language,
|
|
)
|
|
time.sleep(delay)
|
|
|
|
except Exception as e:
|
|
last_error = GoogleProviderError(
|
|
code=GOOGLE_NETWORK_ERROR,
|
|
message="Service Google Translate indisponible. Réessayez.",
|
|
details={"original_error": str(e)[:100]},
|
|
)
|
|
retries += 1
|
|
if retries <= self.max_retries:
|
|
delay = self.retry_delay * (2 ** (retries - 1))
|
|
time.sleep(delay)
|
|
|
|
if last_error:
|
|
logger.error(
|
|
"google_translation_failed",
|
|
error_code=last_error.code,
|
|
text_length=len(text),
|
|
source_lang=source_language,
|
|
target_lang=target_language,
|
|
retries=retries,
|
|
)
|
|
return TranslationResponse(
|
|
translated_text=text,
|
|
provider_name=self._provider_name,
|
|
from_cache=False,
|
|
error=last_error.message,
|
|
error_code=last_error.code,
|
|
error_details=last_error.details,
|
|
)
|
|
|
|
return TranslationResponse(
|
|
translated_text=text,
|
|
provider_name=self._provider_name,
|
|
from_cache=False,
|
|
error="Unknown error",
|
|
error_code=GOOGLE_NETWORK_ERROR,
|
|
)
|
|
|
|
def translate_batch(
|
|
self, requests: List[TranslationRequest]
|
|
) -> List[TranslationResponse]:
|
|
"""
|
|
Translate multiple texts with optimized batch processing.
|
|
|
|
When all requests share the same source/target languages, delegates to
|
|
services.translation_service.GoogleTranslationProvider.translate_batch
|
|
(batched deep_translator calls). Otherwise falls back to per-item
|
|
translate_text.
|
|
"""
|
|
if not requests:
|
|
return []
|
|
|
|
tgt0 = requests[0].target_language
|
|
src0 = requests[0].source_language or "auto"
|
|
uniform = all(
|
|
r.target_language == tgt0 and (r.source_language or "auto") == src0
|
|
for r in requests
|
|
)
|
|
if uniform:
|
|
try:
|
|
from services.translation_service import (
|
|
GoogleTranslationProvider as LegacyGoogle,
|
|
)
|
|
|
|
texts = [r.text for r in requests]
|
|
outs = LegacyGoogle().translate_batch(texts, tgt0, src0)
|
|
return [
|
|
TranslationResponse(
|
|
translated_text=out,
|
|
provider_name=self._provider_name,
|
|
from_cache=False,
|
|
)
|
|
for out in outs
|
|
]
|
|
except Exception as e:
|
|
logger.warning(
|
|
"google_batch_legacy_fallback",
|
|
error_type=type(e).__name__,
|
|
)
|
|
|
|
return [self.translate_text(req) for req in requests]
|
|
|
|
def health_check(self) -> ProviderHealthStatus:
|
|
"""
|
|
Return health status details for the provider.
|
|
|
|
Performs a lightweight check to verify the provider is operational.
|
|
Includes cached result for efficiency.
|
|
|
|
Returns:
|
|
ProviderHealthStatus with availability and latency information
|
|
"""
|
|
current_time = time.time()
|
|
|
|
with self._health_cache_lock:
|
|
if "health_check" in self._health_cache:
|
|
cached = self._health_cache["health_check"]
|
|
if current_time - cached["timestamp"] < self._health_cache_ttl:
|
|
return cached["value"]
|
|
|
|
start_time = time.time()
|
|
last_check_iso = datetime.now(timezone.utc).isoformat()
|
|
|
|
try:
|
|
available = self.is_available()
|
|
latency_ms = (time.time() - start_time) * 1000
|
|
|
|
status = ProviderHealthStatus(
|
|
name=self._provider_name,
|
|
available=available,
|
|
latency_ms=round(latency_ms, 2),
|
|
error=None if available else "Provider not available",
|
|
last_check=last_check_iso,
|
|
)
|
|
except Exception as e:
|
|
latency_ms = (time.time() - start_time) * 1000
|
|
status = ProviderHealthStatus(
|
|
name=self._provider_name,
|
|
available=False,
|
|
latency_ms=round(latency_ms, 2),
|
|
error=str(e)[:100],
|
|
last_check=last_check_iso,
|
|
)
|
|
|
|
with self._health_cache_lock:
|
|
self._health_cache["health_check"] = {
|
|
"value": status,
|
|
"timestamp": current_time,
|
|
}
|
|
|
|
return status
|
|
|
|
|
|
def register_google_provider():
|
|
"""
|
|
Register the Google provider in the global registry.
|
|
|
|
This function should be called during module initialization
|
|
to make the provider available through the registry.
|
|
"""
|
|
from .registry import registry
|
|
|
|
provider = get_google_provider()
|
|
registry.register("google", provider)
|
|
return provider
|
|
|
|
|
|
_provider_instance = None
|
|
|
|
|
|
def get_google_provider() -> GoogleTranslationProvider:
|
|
"""Get or create the Google provider instance (reads config from env)."""
|
|
global _provider_instance
|
|
if _provider_instance is None:
|
|
from .config import ProvidersConfig
|
|
_provider_instance = GoogleTranslationProvider(
|
|
use_cache=True,
|
|
timeout=ProvidersConfig.GOOGLE_TRANSLATE_TIMEOUT,
|
|
max_retries=ProvidersConfig.GOOGLE_TRANSLATE_MAX_RETRIES,
|
|
retry_delay=ProvidersConfig.GOOGLE_TRANSLATE_RETRY_DELAY,
|
|
)
|
|
return _provider_instance
|
|
|
|
|
|
class LegacyGoogleAdapter:
|
|
"""
|
|
Exposes the new GoogleTranslationProvider via the legacy interface used by
|
|
translation_service: .translate(text, target_lang, source_lang) -> str and
|
|
.translate_batch(texts, target_lang, source_lang) -> List[str].
|
|
Raises TranslationProviderError on failure so the API can return 4xx/502.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self._provider = get_google_provider()
|
|
self.provider_name = "google"
|
|
|
|
def translate(
|
|
self, text: str, target_language: str, source_language: str = "auto"
|
|
) -> str:
|
|
req = TranslationRequest(
|
|
text=text,
|
|
target_language=target_language,
|
|
source_language=source_language,
|
|
)
|
|
resp = self._provider.translate_text(req)
|
|
if resp.error:
|
|
from utils.exceptions import TranslationProviderError
|
|
raise TranslationProviderError(
|
|
resp.error_code or "UNKNOWN",
|
|
resp.error or "Translation failed",
|
|
resp.error_details,
|
|
)
|
|
return resp.translated_text
|
|
|
|
def translate_batch(
|
|
self,
|
|
texts: List[str],
|
|
target_language: str,
|
|
source_language: str = "auto",
|
|
batch_size: int = 50,
|
|
) -> List[str]:
|
|
requests = [
|
|
TranslationRequest(
|
|
text=t,
|
|
target_language=target_language,
|
|
source_language=source_language,
|
|
)
|
|
for t in texts
|
|
]
|
|
responses = self._provider.translate_batch(requests)
|
|
result = []
|
|
for r in responses:
|
|
if r.error:
|
|
from utils.exceptions import TranslationProviderError
|
|
raise TranslationProviderError(
|
|
r.error_code or "UNKNOWN",
|
|
r.error or "Translation failed",
|
|
r.error_details,
|
|
)
|
|
result.append(r.translated_text)
|
|
return result
|
|
|
|
|
|
def get_legacy_google_adapter() -> LegacyGoogleAdapter:
|
|
"""Return an adapter so the legacy translation_service can use the new provider."""
|
|
return LegacyGoogleAdapter()
|