Major changes across backend, frontend, infrastructure: - Provider system with model selection (Google, DeepL, OpenAI, Ollama, Google Cloud) - Admin panel: user management, pricing, settings - Glossary system with CSV import/export - Subscription and tier quota management - Security hardening (rate limiting, API key auth, path traversal fixes) - Docker compose for dev, prod, and IONOS deployment - Alembic migrations for new tables - Frontend: dashboard, pricing page, landing page, i18n (en/fr) - Test suite and verification scripts Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
600 lines
20 KiB
Python
600 lines
20 KiB
Python
"""
|
|
Ollama Provider - Local LLM translation provider.
|
|
|
|
Extends TranslationProvider base class with robust error handling,
|
|
retry logic, and health monitoring for local Ollama instances.
|
|
|
|
Features:
|
|
- Local LLM translation via Ollama REST API
|
|
- Custom system prompt support
|
|
- Specific error codes for all Ollama API errors
|
|
- Retry logic with exponential backoff for transient errors
|
|
- Timeout configuration (longer for LLM)
|
|
- Health check with caching
|
|
- Structlog-compatible logging (no document content in logs)
|
|
"""
|
|
|
|
import socket
|
|
import threading
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Dict, List, Optional
|
|
from urllib.parse import urljoin
|
|
|
|
from core.logging import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
_HAS_STRUCTLOG = True
|
|
|
|
|
|
def _log_info(event: str, **kwargs):
|
|
"""Log info with structlog or standard logging compatibility."""
|
|
if _HAS_STRUCTLOG:
|
|
logger.info(event, **kwargs)
|
|
else:
|
|
msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
|
|
logger.info(msg)
|
|
|
|
|
|
def _log_warning(event: str, **kwargs):
|
|
"""Log warning with structlog or standard logging compatibility."""
|
|
if _HAS_STRUCTLOG:
|
|
logger.warning(event, **kwargs)
|
|
else:
|
|
msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
|
|
logger.warning(msg)
|
|
|
|
|
|
def _log_error(event: str, **kwargs):
|
|
"""Log error with structlog or standard logging compatibility."""
|
|
if _HAS_STRUCTLOG:
|
|
logger.error(event, **kwargs)
|
|
else:
|
|
msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
|
|
logger.error(msg)
|
|
|
|
|
|
import requests
|
|
from requests.exceptions import Timeout, ConnectionError as RequestsConnectionError
|
|
|
|
from .base import TranslationProvider
|
|
from .schemas import (
|
|
ProviderHealthStatus,
|
|
TranslationRequest,
|
|
TranslationResponse,
|
|
)
|
|
|
|
OLLAMA_UNAVAILABLE = "OLLAMA_UNAVAILABLE"
|
|
OLLAMA_MODEL_NOT_FOUND = "OLLAMA_MODEL_NOT_FOUND"
|
|
OLLAMA_TIMEOUT = "OLLAMA_TIMEOUT"
|
|
OLLAMA_GENERATION_ERROR = "OLLAMA_GENERATION_ERROR"
|
|
OLLAMA_CONTEXT_TOO_LONG = "OLLAMA_CONTEXT_TOO_LONG"
|
|
|
|
_RETRYABLE_ERRORS = {OLLAMA_UNAVAILABLE, OLLAMA_TIMEOUT}
|
|
|
|
|
|
class OllamaProviderError(Exception):
|
|
"""Exception raised for Ollama API errors."""
|
|
|
|
def __init__(
|
|
self, code: str, message: str, details: Optional[Dict[str, Any]] = None
|
|
):
|
|
self.code = code
|
|
self.message = message
|
|
self.details = details or {}
|
|
super().__init__(message)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert error to dictionary format."""
|
|
result = {
|
|
"error": self.code,
|
|
"message": self.message,
|
|
}
|
|
if self.details:
|
|
result["details"] = self.details
|
|
return result
|
|
|
|
|
|
DEFAULT_TRANSLATION_PROMPT = """You are a professional translator. Translate the following text from {source_lang} to {target_lang}.
|
|
|
|
Rules:
|
|
- Translate ONLY the text, do not add explanations or notes
|
|
- Preserve the original formatting, line breaks, and structure
|
|
- Maintain the original tone and style
|
|
- For technical terms, use the standard translation in the target language
|
|
- If the text contains proper nouns or brand names, keep them unchanged unless there's a well-known translation"""
|
|
|
|
|
|
def _build_system_prompt(
|
|
source_lang: str, target_lang: str, custom_prompt: Optional[str] = None
|
|
) -> str:
|
|
"""Build system prompt for translation."""
|
|
if custom_prompt:
|
|
return custom_prompt
|
|
return DEFAULT_TRANSLATION_PROMPT.format(
|
|
source_lang=source_lang, target_lang=target_lang
|
|
)
|
|
|
|
|
|
def _get_language_name(code: str) -> str:
|
|
"""Convert language code to full name for better LLM understanding."""
|
|
language_names = {
|
|
"en": "English",
|
|
"fr": "French",
|
|
"es": "Spanish",
|
|
"de": "German",
|
|
"it": "Italian",
|
|
"pt": "Portuguese",
|
|
"nl": "Dutch",
|
|
"ru": "Russian",
|
|
"zh": "Chinese",
|
|
"ja": "Japanese",
|
|
"ko": "Korean",
|
|
"ar": "Arabic",
|
|
"hi": "Hindi",
|
|
"tr": "Turkish",
|
|
"pl": "Polish",
|
|
"vi": "Vietnamese",
|
|
"th": "Thai",
|
|
"id": "Indonesian",
|
|
"ms": "Malay",
|
|
"uk": "Ukrainian",
|
|
"cs": "Czech",
|
|
"sv": "Swedish",
|
|
"da": "Danish",
|
|
"fi": "Finnish",
|
|
"no": "Norwegian",
|
|
"el": "Greek",
|
|
"he": "Hebrew",
|
|
"ro": "Romanian",
|
|
"hu": "Hungarian",
|
|
"bg": "Bulgarian",
|
|
"sk": "Slovak",
|
|
"hr": "Croatian",
|
|
"sl": "Slovenian",
|
|
"lt": "Lithuanian",
|
|
"lv": "Latvian",
|
|
"et": "Estonian",
|
|
}
|
|
base_code = code.split("-")[0].lower()
|
|
return language_names.get(base_code, code)
|
|
|
|
|
|
class OllamaTranslationProvider(TranslationProvider):
|
|
"""
|
|
Ollama LLM implementation for local translation.
|
|
|
|
Features:
|
|
- Uses Ollama REST API (/api/chat endpoint)
|
|
- Custom system prompt support for translation context
|
|
- Thread-safe HTTP client
|
|
- Robust error handling with specific error codes
|
|
- Retry logic with exponential backoff
|
|
- Configurable timeout (default 120s for LLM)
|
|
- Health check with result caching
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
base_url: str = "http://localhost:11434",
|
|
model: Optional[str] = None,
|
|
timeout: int = 120,
|
|
max_retries: int = 2,
|
|
retry_delay: float = 2.0,
|
|
):
|
|
"""
|
|
Initialize Ollama provider.
|
|
|
|
Args:
|
|
base_url: Ollama API base URL (default: http://localhost:11434)
|
|
model: Model name (e.g. llama3, mistral). If None, uses OLLAMA_MODEL from config.
|
|
timeout: Request timeout in seconds (default: 120 for LLM)
|
|
max_retries: Maximum retry attempts for transient errors (default: 2)
|
|
retry_delay: Initial retry delay in seconds (default: 2.0)
|
|
"""
|
|
if model is None:
|
|
from .config import ProvidersConfig
|
|
|
|
model = ProvidersConfig.OLLAMA_MODEL
|
|
self._base_url = base_url.rstrip("/")
|
|
self._model = model
|
|
self._provider_name = "ollama"
|
|
self.timeout = timeout
|
|
self.max_retries = max_retries
|
|
self.retry_delay = retry_delay
|
|
self._health_cache: Dict[str, Any] = {}
|
|
self._health_cache_ttl = 60
|
|
self._health_cache_lock = threading.Lock()
|
|
self._available_models: Optional[List[str]] = None
|
|
self._models_cache_time: float = 0
|
|
self._models_cache_ttl = 300
|
|
|
|
def _fetch_available_models(self) -> List[str]:
|
|
"""Fetch list of available (pulled) models from Ollama."""
|
|
current_time = time.time()
|
|
|
|
if (
|
|
self._available_models is not None
|
|
and current_time - self._models_cache_time < self._models_cache_ttl
|
|
):
|
|
return self._available_models
|
|
|
|
try:
|
|
response = requests.get(f"{self._base_url}/api/tags", timeout=10)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
models = [m.get("name", "") for m in data.get("models", [])]
|
|
self._available_models = models
|
|
self._models_cache_time = current_time
|
|
return models
|
|
except Exception as e:
|
|
_log_warning("ollama_models_fetch_failed", error=str(e)[:100])
|
|
|
|
return []
|
|
|
|
def _check_model_available(self, model: str) -> bool:
|
|
"""Check if a specific model is available (pulled)."""
|
|
models = self._fetch_available_models()
|
|
return any(m.startswith(model) or model in m for m in models)
|
|
|
|
def _make_api_request(self, text: str, system_prompt: str) -> str:
|
|
"""
|
|
Make API request to Ollama.
|
|
|
|
Raises:
|
|
OllamaProviderError: For any API errors with specific codes
|
|
"""
|
|
if not text or not text.strip():
|
|
return text
|
|
|
|
if len(text) > 128000:
|
|
raise OllamaProviderError(
|
|
code=OLLAMA_CONTEXT_TOO_LONG,
|
|
message="Texte trop long pour le modèle (max ~128K caractères).",
|
|
details={"text_length": len(text), "max_chars": 128000},
|
|
)
|
|
|
|
if not self._check_model_available(self._model):
|
|
raise OllamaProviderError(
|
|
code=OLLAMA_MODEL_NOT_FOUND,
|
|
message=f"Modèle '{self._model}' non trouvé. Exécutez: ollama pull {self._model}",
|
|
details={"model": self._model, "provider": "ollama"},
|
|
)
|
|
|
|
payload = {
|
|
"model": self._model,
|
|
"messages": [
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": text},
|
|
],
|
|
"stream": False,
|
|
"options": {"temperature": 0.3},
|
|
}
|
|
|
|
try:
|
|
response = requests.post(
|
|
f"{self._base_url}/api/chat",
|
|
json=payload,
|
|
timeout=self.timeout,
|
|
)
|
|
|
|
if response.status_code == 404:
|
|
raise OllamaProviderError(
|
|
code=OLLAMA_MODEL_NOT_FOUND,
|
|
message=f"Modèle '{self._model}' non trouvé. Exécutez: ollama pull {self._model}",
|
|
details={"model": self._model, "status_code": 404},
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
error_text = response.text[:200] if response.text else "Unknown error"
|
|
raise OllamaProviderError(
|
|
code=OLLAMA_GENERATION_ERROR,
|
|
message=f"Erreur de génération Ollama: {error_text}",
|
|
details={"status_code": response.status_code, "model": self._model},
|
|
)
|
|
|
|
data = response.json()
|
|
message = data.get("message", {})
|
|
content = message.get("content", "")
|
|
|
|
if not content:
|
|
raise OllamaProviderError(
|
|
code=OLLAMA_GENERATION_ERROR,
|
|
message="Erreur de génération Ollama: réponse vide",
|
|
details={"model": self._model, "response": str(data)[:200]},
|
|
)
|
|
|
|
return content.strip()
|
|
|
|
except Timeout:
|
|
raise OllamaProviderError(
|
|
code=OLLAMA_TIMEOUT,
|
|
message="Délai d'attente Ollama dépassé. Réessayez avec un texte plus court.",
|
|
details={"provider": "ollama", "timeout_seconds": self.timeout},
|
|
)
|
|
except RequestsConnectionError:
|
|
raise OllamaProviderError(
|
|
code=OLLAMA_UNAVAILABLE,
|
|
message="Service Ollama indisponible. Vérifiez que Ollama est en cours d'exécution.",
|
|
details={"provider": "ollama", "base_url": self._base_url},
|
|
)
|
|
except OllamaProviderError:
|
|
raise
|
|
except Exception as e:
|
|
error_str = str(e).lower()
|
|
if "connection" in error_str or "refused" in error_str:
|
|
raise OllamaProviderError(
|
|
code=OLLAMA_UNAVAILABLE,
|
|
message="Service Ollama indisponible. Vérifiez que Ollama est en cours d'exécution.",
|
|
details={"provider": "ollama", "base_url": self._base_url},
|
|
)
|
|
raise OllamaProviderError(
|
|
code=OLLAMA_GENERATION_ERROR,
|
|
message=f"Erreur de génération Ollama: {str(e)[:100]}",
|
|
details={"provider": "ollama", "original_error": str(e)[:100]},
|
|
)
|
|
|
|
def get_name(self) -> str:
|
|
"""Return provider name."""
|
|
return self._provider_name
|
|
|
|
def is_available(self) -> bool:
|
|
"""
|
|
Check if Ollama is available.
|
|
|
|
Uses cached result if available and not expired.
|
|
"""
|
|
current_time = time.time()
|
|
|
|
with self._health_cache_lock:
|
|
if "is_available" in self._health_cache:
|
|
cached = self._health_cache["is_available"]
|
|
if current_time - cached["timestamp"] < self._health_cache_ttl:
|
|
return cached["value"]
|
|
|
|
try:
|
|
response = requests.get(f"{self._base_url}/api/tags", timeout=5)
|
|
available = response.status_code == 200
|
|
except Exception as e:
|
|
_log_warning("ollama_availability_check_failed", error=str(e)[:100])
|
|
available = False
|
|
|
|
with self._health_cache_lock:
|
|
self._health_cache["is_available"] = {
|
|
"value": available,
|
|
"timestamp": current_time,
|
|
}
|
|
|
|
return available
|
|
|
|
def translate_text(self, request: TranslationRequest) -> TranslationResponse:
|
|
"""
|
|
Translate a single text string using Ollama LLM.
|
|
|
|
Supports custom system prompt via request.metadata["custom_prompt"].
|
|
|
|
Args:
|
|
request: TranslationRequest with text and language info
|
|
|
|
Returns:
|
|
TranslationResponse with translated text
|
|
"""
|
|
text = request.text
|
|
target_language = request.target_language
|
|
source_language = request.source_language or "auto"
|
|
|
|
if not text or not text.strip():
|
|
return TranslationResponse(
|
|
translated_text=text,
|
|
provider_name=self._provider_name,
|
|
from_cache=False,
|
|
)
|
|
|
|
source_lang_name = _get_language_name(source_language)
|
|
target_lang_name = _get_language_name(target_language)
|
|
|
|
custom_prompt = None
|
|
if request.metadata:
|
|
custom_prompt = request.metadata.get("custom_prompt")
|
|
|
|
system_prompt = _build_system_prompt(
|
|
source_lang_name, target_lang_name, custom_prompt
|
|
)
|
|
|
|
last_error: Optional[OllamaProviderError] = None
|
|
retries = 0
|
|
|
|
while retries <= self.max_retries:
|
|
try:
|
|
start_time = time.time()
|
|
result = self._make_api_request(text, system_prompt)
|
|
latency = time.time() - start_time
|
|
|
|
_log_info(
|
|
"ollama_translation_success",
|
|
chars=len(text),
|
|
source_lang=source_language,
|
|
target_lang=target_language,
|
|
model=self._model,
|
|
latency_ms=round(latency * 1000, 2),
|
|
retries=retries,
|
|
)
|
|
|
|
return TranslationResponse(
|
|
translated_text=result,
|
|
provider_name=self._provider_name,
|
|
from_cache=False,
|
|
source_language=source_language,
|
|
)
|
|
|
|
except OllamaProviderError as e:
|
|
last_error = e
|
|
|
|
if e.code not in _RETRYABLE_ERRORS:
|
|
break
|
|
|
|
retries += 1
|
|
if retries <= self.max_retries:
|
|
delay = self.retry_delay * (2 ** (retries - 1))
|
|
_log_info(
|
|
"ollama_translation_retry",
|
|
attempt=retries,
|
|
delay_s=round(delay, 2),
|
|
error_code=e.code,
|
|
text_length=len(text),
|
|
source_lang=source_language,
|
|
target_lang=target_language,
|
|
)
|
|
time.sleep(delay)
|
|
|
|
except Exception as e:
|
|
last_error = OllamaProviderError(
|
|
code=OLLAMA_GENERATION_ERROR,
|
|
message=f"Erreur de génération Ollama: {str(e)[:100]}",
|
|
details={"original_error": str(e)[:100]},
|
|
)
|
|
retries += 1
|
|
if retries <= self.max_retries:
|
|
delay = self.retry_delay * (2 ** (retries - 1))
|
|
time.sleep(delay)
|
|
|
|
if last_error:
|
|
_log_error(
|
|
"ollama_translation_failed",
|
|
error_code=last_error.code,
|
|
text_length=len(text),
|
|
source_lang=source_language,
|
|
target_lang=target_language,
|
|
retries=retries,
|
|
)
|
|
return TranslationResponse(
|
|
translated_text=text,
|
|
provider_name=self._provider_name,
|
|
from_cache=False,
|
|
error=last_error.message,
|
|
error_code=last_error.code,
|
|
error_details=last_error.details,
|
|
)
|
|
|
|
return TranslationResponse(
|
|
translated_text=text,
|
|
provider_name=self._provider_name,
|
|
from_cache=False,
|
|
error="Unknown error",
|
|
error_code=OLLAMA_GENERATION_ERROR,
|
|
)
|
|
|
|
def translate_batch(
|
|
self, requests: List[TranslationRequest]
|
|
) -> List[TranslationResponse]:
|
|
"""
|
|
Translate multiple texts.
|
|
|
|
Args:
|
|
requests: List of TranslationRequest objects
|
|
|
|
Returns:
|
|
List of TranslationResponse objects
|
|
"""
|
|
if not requests:
|
|
return []
|
|
|
|
return [self.translate_text(req) for req in requests]
|
|
|
|
def health_check(self) -> ProviderHealthStatus:
|
|
"""
|
|
Return health status details for the provider.
|
|
|
|
Includes cached result for efficiency.
|
|
|
|
Returns:
|
|
ProviderHealthStatus with availability and latency information
|
|
"""
|
|
current_time = time.time()
|
|
|
|
with self._health_cache_lock:
|
|
if "health_check" in self._health_cache:
|
|
cached = self._health_cache["health_check"]
|
|
if current_time - cached["timestamp"] < self._health_cache_ttl:
|
|
return cached["value"]
|
|
|
|
start_time = time.time()
|
|
last_check_iso = datetime.now(timezone.utc).isoformat()
|
|
|
|
try:
|
|
models = self._fetch_available_models()
|
|
model_available = self._check_model_available(self._model)
|
|
available = len(models) > 0 and model_available
|
|
latency_ms = (time.time() - start_time) * 1000
|
|
|
|
error_msg = None
|
|
if not available and len(models) == 0:
|
|
error_msg = "Service Ollama indisponible. Vérifiez que Ollama est en cours d'exécution."
|
|
elif not model_available:
|
|
error_msg = f"Modèle '{self._model}' non trouvé. Exécutez: ollama pull {self._model}"
|
|
|
|
status = ProviderHealthStatus(
|
|
name=self._provider_name,
|
|
available=available,
|
|
latency_ms=round(latency_ms, 2),
|
|
error=error_msg,
|
|
last_check=last_check_iso,
|
|
model=self._model,
|
|
model_available=model_available,
|
|
)
|
|
except Exception as e:
|
|
latency_ms = (time.time() - start_time) * 1000
|
|
status = ProviderHealthStatus(
|
|
name=self._provider_name,
|
|
available=False,
|
|
latency_ms=round(latency_ms, 2),
|
|
error=str(e)[:100],
|
|
last_check=last_check_iso,
|
|
model=self._model,
|
|
model_available=None,
|
|
)
|
|
|
|
with self._health_cache_lock:
|
|
self._health_cache["health_check"] = {
|
|
"value": status,
|
|
"timestamp": current_time,
|
|
}
|
|
|
|
return status
|
|
|
|
|
|
def register_ollama_provider():
|
|
"""
|
|
Register the Ollama provider in the global registry.
|
|
|
|
This function should be called during module initialization
|
|
to make the provider available through the registry.
|
|
"""
|
|
from .registry import registry
|
|
|
|
provider = get_ollama_provider()
|
|
registry.register("ollama", provider)
|
|
return provider
|
|
|
|
|
|
_provider_instance: Optional[OllamaTranslationProvider] = None
|
|
_provider_lock = threading.Lock()
|
|
|
|
|
|
def get_ollama_provider() -> OllamaTranslationProvider:
|
|
"""Get or create the Ollama provider instance (reads config from env)."""
|
|
global _provider_instance
|
|
if _provider_instance is None:
|
|
with _provider_lock:
|
|
if _provider_instance is None:
|
|
from .config import ProvidersConfig
|
|
|
|
_provider_instance = OllamaTranslationProvider(
|
|
base_url=ProvidersConfig.OLLAMA_BASE_URL,
|
|
model=ProvidersConfig.OLLAMA_MODEL,
|
|
timeout=ProvidersConfig.OLLAMA_TIMEOUT,
|
|
max_retries=ProvidersConfig.OLLAMA_MAX_RETRIES,
|
|
retry_delay=ProvidersConfig.OLLAMA_RETRY_DELAY,
|
|
)
|
|
return _provider_instance
|