Files
office_translator/services/providers/google_cloud_provider.py
Sepehr Ramezani 26bd096a06 feat: production deployment - full update with providers, admin, glossaries, pricing, tests
Major changes across backend, frontend, infrastructure:
- Provider system with model selection (Google, DeepL, OpenAI, Ollama, Google Cloud)
- Admin panel: user management, pricing, settings
- Glossary system with CSV import/export
- Subscription and tier quota management
- Security hardening (rate limiting, API key auth, path traversal fixes)
- Docker compose for dev, prod, and IONOS deployment
- Alembic migrations for new tables
- Frontend: dashboard, pricing page, landing page, i18n (en/fr)
- Test suite and verification scripts

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-25 15:01:47 +02:00

403 lines
15 KiB
Python

"""
Google Cloud Translation Provider — API officielle v2 (Basic).
Utilise l'API REST Cloud Translation v2 avec une clé API Google Cloud.
Contrairement à google_provider.py (deep_translator, accès web non officiel),
ce provider est l'implémentation officielle et facturable.
Tarification (avril 2026) :
- 500 000 caractères/mois gratuits (par projet GCP)
- Au-delà : ~$20 / million de caractères
Prérequis :
1. Activer « Cloud Translation API » dans Google Cloud Console
2. Créer une clé API (restreinte à l'API Cloud Translation)
3. Définir GOOGLE_CLOUD_API_KEY dans .env ou dans les paramètres admin
"""
import time
import threading
from typing import Any, Dict, List, Optional
import requests
from core.logging import get_logger
from .base import TranslationProvider
from .schemas import (
BatchTranslationRequest,
BatchTranslationResponse,
ProviderHealthStatus,
TranslationRequest,
TranslationResponse,
)
logger = get_logger(__name__)
_TRANSLATE_URL = "https://translation.googleapis.com/language/translate/v2"
_MAX_CHARS_PER_REQUEST = 30_000 # Cloud Translation v2 recommends ≤ 30 000 chars
# Codes d'erreur internes
GC_QUOTA_EXCEEDED = "GC_QUOTA_EXCEEDED"
GC_INVALID_KEY = "GC_INVALID_KEY"
GC_API_NOT_ENABLED = "GC_API_NOT_ENABLED"
GC_NETWORK_ERROR = "GC_NETWORK_ERROR"
GC_UNSUPPORTED_LANGUAGE = "GC_UNSUPPORTED_LANGUAGE"
GC_TEXT_TOO_LONG = "GC_TEXT_TOO_LONG"
_RETRYABLE_CODES = {GC_QUOTA_EXCEEDED, GC_NETWORK_ERROR}
# Mapping des codes de langue pour aligner avec l'API Cloud Translation
_LANG_MAP: dict[str, str] = {
"auto": "", # Cloud v2 : source vide = détection auto
"iw": "he", # hébreu
"jv": "jw", # javanais
"nb": "no", # norvégien bokmål
}
def _normalize_lang(code: str) -> str:
if not code or code == "auto":
return "" # Cloud v2 détecte automatiquement si source vide
return _LANG_MAP.get(code.lower(), code)
class GoogleCloudProviderError(Exception):
def __init__(self, code: str, message: str, details: Optional[Dict[str, Any]] = None):
self.code = code
self.message = message
self.details = details or {}
super().__init__(message)
class GoogleCloudTranslationProvider(TranslationProvider):
"""
Fournisseur Google Cloud Translation API v2 (Basic).
Utilise des requêtes REST avec clé API.
Réservé aux forfaits payants (Pro, Business, Enterprise).
"""
def __init__(
self,
api_key: str,
timeout: int = 30,
max_retries: int = 3,
retry_delay: float = 1.0,
):
if not api_key:
raise ValueError("GoogleCloudTranslationProvider requiert une clé API.")
self._api_key = api_key
self._timeout = timeout
self._max_retries = max_retries
self._retry_delay = retry_delay
self._provider_name = "google_cloud"
self._health_cache: Dict[str, Any] = {}
self._health_cache_ttl = 60
self._health_cache_lock = threading.Lock()
self._session = requests.Session()
def get_name(self) -> str:
return self._provider_name
def is_available(self) -> bool:
current_time = time.time()
with self._health_cache_lock:
cached = self._health_cache.get("is_available")
if cached and current_time - cached["timestamp"] < self._health_cache_ttl:
return cached["value"]
try:
resp = self._session.post(
_TRANSLATE_URL,
params={"key": self._api_key},
json={"q": "hello", "target": "fr", "format": "text"},
timeout=self._timeout,
)
available = resp.ok
except Exception:
available = False
with self._health_cache_lock:
self._health_cache["is_available"] = {"value": available, "timestamp": current_time}
return available
def _call_api(self, texts: List[str], target_lang: str, source_lang: str) -> List[str]:
"""
Appel REST à Cloud Translation API v2.
Envoie plusieurs textes en une seule requête (batch natif).
Lève GoogleCloudProviderError en cas d'erreur.
"""
payload: Dict[str, Any] = {
"q": texts,
"target": target_lang,
"format": "text",
}
if source_lang: # vide = détection auto
payload["source"] = source_lang
try:
resp = self._session.post(
_TRANSLATE_URL,
params={"key": self._api_key},
json=payload,
timeout=self._timeout,
)
except requests.Timeout:
raise GoogleCloudProviderError(
GC_NETWORK_ERROR,
"Délai dépassé pour Google Cloud Translation.",
)
except requests.ConnectionError as exc:
raise GoogleCloudProviderError(
GC_NETWORK_ERROR,
"Impossible de joindre Google Cloud Translation.",
{"original_error": str(exc)[:200]},
)
if resp.status_code == 200:
translations = resp.json().get("data", {}).get("translations", [])
return [t.get("translatedText", "") for t in translations]
# Gestion des erreurs HTTP
try:
err_body = resp.json()
err_msg = err_body.get("error", {}).get("message", resp.text[:200])
err_status = err_body.get("error", {}).get("status", "")
except Exception:
err_msg = resp.text[:200]
err_status = ""
if resp.status_code in (401, 403) or "API_KEY" in err_status:
raise GoogleCloudProviderError(
GC_INVALID_KEY,
f"Clé API Google Cloud invalide ou API non activée : {err_msg}",
{"http_status": resp.status_code},
)
if resp.status_code == 429 or "QUOTA" in err_status or "RATE_LIMIT" in err_status:
raise GoogleCloudProviderError(
GC_QUOTA_EXCEEDED,
"Quota Google Cloud Translation dépassé. Réessayez plus tard.",
{"http_status": resp.status_code},
)
raise GoogleCloudProviderError(
GC_NETWORK_ERROR,
f"Erreur Google Cloud Translation HTTP {resp.status_code}: {err_msg}",
{"http_status": resp.status_code},
)
def translate_text(self, request: TranslationRequest) -> TranslationResponse:
text = request.text
target = _normalize_lang(request.target_language)
source = _normalize_lang(request.source_language or "auto")
if not text or not text.strip():
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
)
if source and source == target:
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
source_language=source,
)
last_error: Optional[GoogleCloudProviderError] = None
retries = 0
while retries <= self._max_retries:
try:
results = self._call_api([text], target, source)
translated = results[0] if results else text
logger.info(
"google_cloud_translation_success",
chars=len(text),
target_lang=target,
retries=retries,
)
return TranslationResponse(
translated_text=translated,
provider_name=self._provider_name,
from_cache=False,
)
except GoogleCloudProviderError as exc:
last_error = exc
if exc.code not in _RETRYABLE_CODES:
break
retries += 1
if retries <= self._max_retries:
time.sleep(self._retry_delay * (2 ** (retries - 1)))
except Exception as exc:
last_error = GoogleCloudProviderError(
GC_NETWORK_ERROR,
"Erreur inattendue Google Cloud Translation.",
{"original_error": str(exc)[:200]},
)
retries += 1
if retries <= self._max_retries:
time.sleep(self._retry_delay * (2 ** (retries - 1)))
logger.error(
"google_cloud_translation_failed",
error_code=last_error.code if last_error else "UNKNOWN",
chars=len(text),
target_lang=target,
)
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
error=last_error.message if last_error else "Erreur inconnue",
error_code=last_error.code if last_error else GC_NETWORK_ERROR,
error_details=last_error.details if last_error else {},
)
def translate_batch(self, requests: List[TranslationRequest]) -> List[TranslationResponse]:
"""
Traduit plusieurs textes en utilisant le batch natif de Cloud Translation v2.
Regroupe les requêtes partageant la même paire source/cible dans un seul
appel API (meilleur rapport qualité/coût).
"""
if not requests:
return []
tgt0 = _normalize_lang(requests[0].target_language)
src0 = _normalize_lang(requests[0].source_language or "auto")
uniform = all(
_normalize_lang(r.target_language) == tgt0
and _normalize_lang(r.source_language or "auto") == src0
for r in requests
)
if uniform:
texts = [r.text for r in requests]
last_error: Optional[GoogleCloudProviderError] = None
retries = 0
while retries <= self._max_retries:
try:
results = self._call_api(texts, tgt0, src0)
logger.info(
"google_cloud_batch_success",
count=len(texts),
target_lang=tgt0,
)
return [
TranslationResponse(
translated_text=r,
provider_name=self._provider_name,
from_cache=False,
)
for r in results
]
except GoogleCloudProviderError as exc:
last_error = exc
if exc.code not in _RETRYABLE_CODES:
break
retries += 1
if retries <= self._max_retries:
time.sleep(self._retry_delay * (2 ** (retries - 1)))
except Exception as exc:
last_error = GoogleCloudProviderError(
GC_NETWORK_ERROR, str(exc)[:200]
)
retries += 1
if retries <= self._max_retries:
time.sleep(self._retry_delay * (2 ** (retries - 1)))
# Batch failed — renvoyer l'erreur pour chaque texte
err_msg = last_error.message if last_error else "Erreur inconnue"
err_code = last_error.code if last_error else GC_NETWORK_ERROR
return [
TranslationResponse(
translated_text=r.text,
provider_name=self._provider_name,
from_cache=False,
error=err_msg,
error_code=err_code,
)
for r in requests
]
# Paires source/cible hétérogènes : appel individuel
return [self.translate_text(req) for req in requests]
def health_check(self) -> ProviderHealthStatus:
current_time = time.time()
with self._health_cache_lock:
cached = self._health_cache.get("health_check")
if cached and current_time - cached["timestamp"] < self._health_cache_ttl:
return cached["value"]
from datetime import datetime, timezone
start = time.time()
last_check_iso = datetime.now(timezone.utc).isoformat()
try:
available = self.is_available()
latency_ms = (time.time() - start) * 1000
status = ProviderHealthStatus(
name=self._provider_name,
available=available,
latency_ms=round(latency_ms, 2),
error=None if available else "Clé API invalide ou quota dépassé",
last_check=last_check_iso,
)
except Exception as exc:
latency_ms = (time.time() - start) * 1000
status = ProviderHealthStatus(
name=self._provider_name,
available=False,
latency_ms=round(latency_ms, 2),
error=str(exc)[:200],
last_check=last_check_iso,
)
with self._health_cache_lock:
self._health_cache["health_check"] = {"value": status, "timestamp": current_time}
return status
class LegacyGoogleCloudAdapter:
"""
Adapteur exposant GoogleCloudTranslationProvider via l'interface legacy
(.translate / .translate_batch) utilisée par translation_service.
"""
def __init__(self, api_key: str):
self._provider = GoogleCloudTranslationProvider(api_key=api_key)
self.provider_name = "google_cloud"
def translate(self, text: str, target_language: str, source_language: str = "auto") -> str:
from .schemas import TranslationRequest
resp = self._provider.translate_text(
TranslationRequest(text=text, target_language=target_language, source_language=source_language)
)
if resp.error:
from utils.exceptions import TranslationProviderError
raise TranslationProviderError(resp.error_code or "UNKNOWN", resp.error, resp.error_details)
return resp.translated_text
def translate_batch(
self, texts: List[str], target_language: str, source_language: str = "auto", batch_size: int = 50
) -> List[str]:
from .schemas import TranslationRequest
reqs = [
TranslationRequest(text=t, target_language=target_language, source_language=source_language)
for t in texts
]
responses = self._provider.translate_batch(reqs)
result = []
for r in responses:
if r.error:
from utils.exceptions import TranslationProviderError
raise TranslationProviderError(r.error_code or "UNKNOWN", r.error, r.error_details)
result.append(r.translated_text)
return result