Files
office_translator/services/providers/deepl_provider.py
Sepehr Ramezani 26bd096a06 feat: production deployment - full update with providers, admin, glossaries, pricing, tests
Major changes across backend, frontend, infrastructure:
- Provider system with model selection (Google, DeepL, OpenAI, Ollama, Google Cloud)
- Admin panel: user management, pricing, settings
- Glossary system with CSV import/export
- Subscription and tier quota management
- Security hardening (rate limiting, API key auth, path traversal fixes)
- Docker compose for dev, prod, and IONOS deployment
- Alembic migrations for new tables
- Frontend: dashboard, pricing page, landing page, i18n (en/fr)
- Test suite and verification scripts

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-25 15:01:47 +02:00

758 lines
24 KiB
Python

"""
DeepL Provider - Production-ready implementation.
Extends TranslationProvider base class with robust error handling,
retry logic, and health monitoring.
Features:
- Automatic Free/Pro endpoint detection based on API key format
- Specific error codes for all DeepL API errors
- Retry logic with exponential backoff for transient errors
- Timeout configuration
- Health check with caching
- Structlog-compatible logging (no document content in logs)
"""
import os
import socket
import threading
import time
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional
from core.logging import get_logger
logger = get_logger(__name__)
_HAS_STRUCTLOG = True
def _log_info(event: str, **kwargs):
"""Log info message compatible with both structlog and standard logging."""
if _HAS_STRUCTLOG:
logger.info(event, **kwargs)
else:
logger.info(f"{event} {' '.join(f'{k}={v}' for k, v in kwargs.items())}")
def _log_warning(event: str, **kwargs):
"""Log warning message compatible with both structlog and standard logging."""
if _HAS_STRUCTLOG:
logger.warning(event, **kwargs)
else:
logger.warning(f"{event} {' '.join(f'{k}={v}' for k, v in kwargs.items())}")
def _log_error(event: str, **kwargs):
"""Log error message compatible with both structlog and standard logging."""
if _HAS_STRUCTLOG:
logger.error(event, **kwargs)
else:
logger.error(f"{event} {' '.join(f'{k}={v}' for k, v in kwargs.items())}")
from .base import TranslationProvider
from .schemas import (
BatchTranslationRequest,
BatchTranslationResponse,
ProviderHealthStatus,
TranslationRequest,
TranslationResponse,
)
DEEPL_QUOTA_EXCEEDED = "DEEPL_QUOTA_EXCEEDED"
DEEPL_INVALID_KEY = "DEEPL_INVALID_KEY"
DEEPL_NETWORK_ERROR = "DEEPL_NETWORK_ERROR"
DEEPL_UNSUPPORTED_LANGUAGE = "DEEPL_UNSUPPORTED_LANGUAGE"
DEEPL_TEXT_TOO_LONG = "DEEPL_TEXT_TOO_LONG"
_RETRYABLE_ERRORS = {DEEPL_NETWORK_ERROR, DEEPL_QUOTA_EXCEEDED}
DEEPL_FREE_SUFFIX = ":fx"
MAX_TEXT_LENGTH = 128 * 1024
DEEPL_SUPPORTED_LANGUAGES = {
"BG",
"CS",
"DA",
"DE",
"EL",
"EN-GB",
"EN-US",
"ES",
"ET",
"FI",
"FR",
"HU",
"ID",
"IT",
"JA",
"KO",
"LT",
"LV",
"NB",
"NL",
"PL",
"PT-BR",
"PT-PT",
"RO",
"RU",
"SK",
"SL",
"SV",
"TR",
"UK",
"ZH",
}
class DeepLProviderError(Exception):
"""Exception raised for DeepL API errors."""
def __init__(
self, code: str, message: str, details: Optional[Dict[str, Any]] = None
):
self.code = code
self.message = message
self.details = details or {}
super().__init__(message)
def to_dict(self) -> Dict[str, Any]:
"""Convert error to dictionary format."""
result = {
"error": self.code,
"message": self.message,
}
if self.details:
result["details"] = self.details
return result
class DeepLTranslationProvider(TranslationProvider):
"""
DeepL implementation using deep_translator library.
Features:
- Automatic Free/Pro endpoint detection based on API key format
- Thread-safe translator instances per thread
- Caching support (uses global cache from translation_service)
- Batch translation with optimized processing
- Robust error handling with specific error codes
- Retry logic with exponential backoff
- Configurable timeout
- Health check with result caching
"""
def __init__(
self,
api_key: str,
use_cache: bool = True,
timeout: int = 30,
max_retries: int = 3,
retry_delay: float = 1.0,
):
"""
Initialize DeepL provider.
Args:
api_key: DeepL API key (Free keys end with :fx)
use_cache: Whether to use translation caching (default: True)
timeout: Request timeout in seconds (default: 30)
max_retries: Maximum retry attempts for transient errors (default: 3)
retry_delay: Initial retry delay in seconds (default: 1.0)
"""
if not api_key:
raise ValueError("DeepL API key is required")
self._api_key = api_key
self._api_type = self._detect_api_type(api_key)
self._local = threading.local()
self._use_cache = use_cache
self._provider_name = "deepl"
self._cache = None
self.timeout = timeout
self.max_retries = max_retries
self.retry_delay = retry_delay
self._health_cache: Dict[str, Any] = {}
self._health_cache_ttl = 60
self._health_cache_lock = threading.Lock()
if use_cache:
self._init_cache()
def _detect_api_type(self, api_key: str) -> str:
"""
Detect if API key is Free or Pro based on suffix.
Free tier keys end with ':fx', Pro keys do not.
Args:
api_key: DeepL API key
Returns:
"free" or "pro"
"""
if api_key.endswith(DEEPL_FREE_SUFFIX):
return "free"
return "pro"
def _get_api_url(self) -> str:
"""
Get correct API URL based on key type.
Note: deep_translator handles this internally, but we log it.
Returns:
API URL for Free or Pro tier
"""
if self._api_type == "free":
return "https://api-free.deepl.com/v2/translate"
return "https://api.deepl.com/v2/translate"
def _init_cache(self):
"""Initialize or get the translation cache."""
from services.translation_service import _translation_cache
self._cache = _translation_cache
def _normalize_language_code(self, lang_code: str) -> str:
"""
Normalize language code for DeepL.
DeepL uses uppercase language codes (e.g., "EN-US", "FR").
Args:
lang_code: Input language code (e.g., "en", "en-US", "EN-us")
Returns:
Normalized language code for DeepL
"""
if not lang_code or lang_code.lower() == "auto":
return ""
lang_upper = lang_code.upper()
if lang_upper in DEEPL_SUPPORTED_LANGUAGES:
return lang_upper
base_lang = lang_upper.split("-")[0]
if base_lang == "EN":
return "EN-US"
elif base_lang == "PT":
return "PT-BR"
elif base_lang in {
"BG",
"CS",
"DA",
"DE",
"EL",
"ES",
"ET",
"FI",
"FR",
"HU",
"ID",
"IT",
"JA",
"KO",
"LT",
"LV",
"NB",
"NL",
"PL",
"RO",
"RU",
"SK",
"SL",
"SV",
"TR",
"UK",
"ZH",
}:
return base_lang
return lang_upper
def _is_language_supported(self, lang_code: str) -> bool:
"""
Check if a language code is supported by DeepL.
Args:
lang_code: Language code to check
Returns:
True if supported, False otherwise
"""
if not lang_code:
return True
normalized = self._normalize_language_code(lang_code)
return normalized in DEEPL_SUPPORTED_LANGUAGES
def _get_translator(self, source_language: str, target_language: str):
"""Get or create a translator instance for the current thread."""
from deep_translator import DeepLTranslator
source_lang = self._normalize_language_code(source_language)
target_lang = self._normalize_language_code(target_language)
key = f"{source_lang}_{target_lang}"
if not hasattr(self._local, "translators"):
self._local.translators = {}
if key not in self._local.translators:
self._local.translators[key] = DeepLTranslator(
api_key=self._api_key,
source=source_lang if source_lang else "auto",
target=target_lang,
)
return self._local.translators[key]
def _make_api_request(
self, text: str, source_language: str, target_language: str
) -> str:
"""
Make API request with error mapping.
Raises:
DeepLProviderError: For any API errors with specific codes
"""
if len(text.encode("utf-8")) > MAX_TEXT_LENGTH:
raise DeepLProviderError(
code=DEEPL_TEXT_TOO_LONG,
message="Texte trop long (max 128KB par requête).",
details={"text_length": len(text), "max_length": MAX_TEXT_LENGTH},
)
if not self._is_language_supported(target_language):
raise DeepLProviderError(
code=DEEPL_UNSUPPORTED_LANGUAGE,
message=f"Langue '{target_language}' non supportée par DeepL.",
details={"unsupported_language": target_language},
)
try:
translator = self._get_translator(source_language, target_language)
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(translator.translate, text)
return future.result(timeout=self.timeout)
except Exception as e:
error_str = str(e).lower()
if (
"quota" in error_str
or "limit" in error_str
or "429" in error_str
or "456" in error_str
):
raise DeepLProviderError(
code=DEEPL_QUOTA_EXCEEDED,
message="Quota DeepL dépassé. Réessayez demain.",
details={"provider": "deepl", "api_type": self._api_type},
)
elif (
"auth" in error_str
or "key" in error_str
or "invalid" in error_str
or "401" in error_str
or "403" in error_str
):
raise DeepLProviderError(
code=DEEPL_INVALID_KEY,
message="Clé API DeepL invalide. Contactez l'administrateur.",
details={"provider": "deepl"},
)
elif "language" in error_str or "not supported" in error_str:
raise DeepLProviderError(
code=DEEPL_UNSUPPORTED_LANGUAGE,
message=f"Langue '{target_language}' non supportée par DeepL.",
details={"unsupported_language": target_language},
)
elif (
isinstance(e, (socket.timeout, TimeoutError, FuturesTimeoutError))
or "timeout" in error_str
):
raise DeepLProviderError(
code=DEEPL_NETWORK_ERROR,
message="Service DeepL indisponible. Réessayez.",
details={"provider": "deepl", "error_type": "timeout"},
)
else:
raise DeepLProviderError(
code=DEEPL_NETWORK_ERROR,
message="Service DeepL indisponible. Réessayez.",
details={"provider": "deepl", "original_error": str(e)[:100]},
)
def get_name(self) -> str:
"""Return provider name."""
return self._provider_name
def is_available(self) -> bool:
"""
Check if DeepL is available (API key configured and API reachable).
Performs a minimal translate call to verify the API is actually reachable.
Uses cached result if available and not expired (TTL 60s).
"""
current_time = time.time()
with self._health_cache_lock:
if "is_available" in self._health_cache:
cached = self._health_cache["is_available"]
if current_time - cached["timestamp"] < self._health_cache_ttl:
return cached["value"]
available = False
try:
translator = self._get_translator("en", "fr")
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(translator.translate, "a")
future.result(timeout=5)
available = True
except Exception as e:
_log_warning(
"deepl_availability_check_failed",
error=str(e)[:100],
)
with self._health_cache_lock:
self._health_cache["is_available"] = {
"value": available,
"timestamp": current_time,
}
return available
def translate_text(self, request: TranslationRequest) -> TranslationResponse:
"""
Translate a single text string using DeepL.
API Usage Notes:
- DeepL Free tier: 500,000 characters/month
- DeepL Pro: ~€25 per million characters
- 128KB max per request
Optimization: Skips API call if source == target language.
Args:
request: TranslationRequest with text and language info
Returns:
TranslationResponse with translated text
"""
text = request.text
target_language = request.target_language
source_language = request.source_language or "auto"
if not text or not text.strip():
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
)
norm_source = self._normalize_language_code(source_language)
norm_target = self._normalize_language_code(target_language)
if norm_source and norm_source == norm_target:
_log_info(
"deepl_translation_skip",
source_target_lang=target_language,
text_length=len(text),
)
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
source_language=source_language,
)
if self._use_cache and self._cache:
cached = self._cache.get(
text, target_language, source_language, self._provider_name
)
if cached is not None:
return TranslationResponse(
translated_text=cached,
provider_name=self._provider_name,
from_cache=True,
)
last_error: Optional[DeepLProviderError] = None
retries = 0
while retries <= self.max_retries:
try:
result = self._make_api_request(text, source_language, target_language)
if self._use_cache and self._cache:
self._cache.set(
text,
target_language,
source_language,
self._provider_name,
result,
)
_log_info(
"deepl_translation_success",
chars=len(text),
source_lang=source_language,
target_lang=target_language,
api_type=self._api_type,
retries=retries,
)
return TranslationResponse(
translated_text=result,
provider_name=self._provider_name,
from_cache=False,
)
except DeepLProviderError as e:
last_error = e
if e.code not in _RETRYABLE_ERRORS:
break
retries += 1
if retries <= self.max_retries:
delay = self.retry_delay * (2 ** (retries - 1))
_log_info(
"deepl_translation_retry",
attempt=retries,
delay_s=round(delay, 2),
error_code=e.code,
text_length=len(text),
source_lang=source_language,
target_lang=target_language,
)
time.sleep(delay)
except Exception as e:
last_error = DeepLProviderError(
code=DEEPL_NETWORK_ERROR,
message="Service DeepL indisponible. Réessayez.",
details={"original_error": str(e)[:100]},
)
retries += 1
if retries <= self.max_retries:
delay = self.retry_delay * (2 ** (retries - 1))
time.sleep(delay)
if last_error:
_log_error(
"deepl_translation_failed",
error_code=last_error.code,
text_length=len(text),
source_lang=source_language,
target_lang=target_language,
retries=retries,
)
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
error=last_error.message,
error_code=last_error.code,
error_details=last_error.details,
)
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
error="Unknown error",
error_code=DEEPL_NETWORK_ERROR,
)
def translate_batch(
self, requests: List[TranslationRequest]
) -> List[TranslationResponse]:
"""
Translate multiple texts with optimized batch processing.
Args:
requests: List of TranslationRequest objects
Returns:
List of TranslationResponse objects
"""
if not requests:
return []
return [self.translate_text(req) for req in requests]
def health_check(self) -> ProviderHealthStatus:
"""
Return health status details for the provider.
Performs a lightweight check to verify the provider is operational.
Includes cached result for efficiency.
Returns:
ProviderHealthStatus with availability and latency information
"""
current_time = time.time()
with self._health_cache_lock:
if "health_check" in self._health_cache:
cached = self._health_cache["health_check"]
if current_time - cached["timestamp"] < self._health_cache_ttl:
return cached["value"]
start_time = time.time()
last_check_iso = datetime.now(timezone.utc).isoformat()
try:
available = self.is_available()
latency_ms = (time.time() - start_time) * 1000
status = ProviderHealthStatus(
name=self._provider_name,
available=available,
latency_ms=round(latency_ms, 2),
error=None if available else "Provider not available",
last_check=last_check_iso,
)
except Exception as e:
latency_ms = (time.time() - start_time) * 1000
status = ProviderHealthStatus(
name=self._provider_name,
available=False,
latency_ms=round(latency_ms, 2),
error=str(e)[:100],
last_check=last_check_iso,
)
with self._health_cache_lock:
self._health_cache["health_check"] = {
"value": status,
"timestamp": current_time,
}
return status
def register_deepl_provider():
"""
Register the DeepL provider in the global registry.
This function should be called during module initialization
to make the provider available through the registry.
"""
from .registry import registry
provider = get_deepl_provider()
if provider:
registry.register("deepl", provider)
return provider
_provider_instance = None
_provider_instance_lock = threading.Lock()
def get_deepl_provider() -> Optional[DeepLTranslationProvider]:
"""Get or create the DeepL provider instance (reads config from env). Thread-safe."""
global _provider_instance
if _provider_instance is None:
with _provider_instance_lock:
if _provider_instance is None:
from .config import ProvidersConfig
if not ProvidersConfig.DEEPL_API_KEY:
return None
_provider_instance = DeepLTranslationProvider(
api_key=ProvidersConfig.DEEPL_API_KEY,
use_cache=True,
timeout=getattr(ProvidersConfig, "DEEPL_TIMEOUT", 30),
max_retries=getattr(ProvidersConfig, "DEEPL_MAX_RETRIES", 3),
retry_delay=getattr(ProvidersConfig, "DEEPL_RETRY_DELAY", 1.0),
)
return _provider_instance
class LegacyDeepLAdapter:
"""
Exposes the new DeepLTranslationProvider via the legacy interface used by
translation_service: .translate(text, target_lang, source_lang) -> str and
.translate_batch(texts, target_lang, source_lang) -> List[str].
Raises TranslationProviderError on failure so the API can return 4xx/502.
"""
def __init__(self):
self._provider = get_deepl_provider()
self.provider_name = "deepl"
def translate(
self, text: str, target_language: str, source_language: str = "auto"
) -> str:
if not self._provider:
from utils.exceptions import TranslationProviderError
raise TranslationProviderError(
"DEEPL_NOT_CONFIGURED",
"DeepL provider not configured. Set DEEPL_API_KEY.",
None,
)
req = TranslationRequest(
text=text,
target_language=target_language,
source_language=source_language,
)
resp = self._provider.translate_text(req)
if resp.error:
from utils.exceptions import TranslationProviderError
raise TranslationProviderError(
resp.error_code or "UNKNOWN",
resp.error or "Translation failed",
resp.error_details,
)
return resp.translated_text
def translate_batch(
self,
texts: List[str],
target_language: str,
source_language: str = "auto",
batch_size: int = 50,
) -> List[str]:
if not self._provider:
from utils.exceptions import TranslationProviderError
raise TranslationProviderError(
"DEEPL_NOT_CONFIGURED",
"DeepL provider not configured. Set DEEPL_API_KEY.",
None,
)
requests = [
TranslationRequest(
text=t,
target_language=target_language,
source_language=source_language,
)
for t in texts
]
responses = self._provider.translate_batch(requests)
result = []
for r in responses:
if r.error:
from utils.exceptions import TranslationProviderError
raise TranslationProviderError(
r.error_code or "UNKNOWN",
r.error or "Translation failed",
r.error_details,
)
result.append(r.translated_text)
return result
def get_legacy_deepl_adapter() -> LegacyDeepLAdapter:
"""Return an adapter so the legacy translation_service can use the new provider."""
return LegacyDeepLAdapter()