Files
office_translator/services/providers/ollama_provider.py
Sepehr Ramezani 26bd096a06 feat: production deployment - full update with providers, admin, glossaries, pricing, tests
Major changes across backend, frontend, infrastructure:
- Provider system with model selection (Google, DeepL, OpenAI, Ollama, Google Cloud)
- Admin panel: user management, pricing, settings
- Glossary system with CSV import/export
- Subscription and tier quota management
- Security hardening (rate limiting, API key auth, path traversal fixes)
- Docker compose for dev, prod, and IONOS deployment
- Alembic migrations for new tables
- Frontend: dashboard, pricing page, landing page, i18n (en/fr)
- Test suite and verification scripts

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-25 15:01:47 +02:00

600 lines
20 KiB
Python

"""
Ollama Provider - Local LLM translation provider.
Extends TranslationProvider base class with robust error handling,
retry logic, and health monitoring for local Ollama instances.
Features:
- Local LLM translation via Ollama REST API
- Custom system prompt support
- Specific error codes for all Ollama API errors
- Retry logic with exponential backoff for transient errors
- Timeout configuration (longer for LLM)
- Health check with caching
- Structlog-compatible logging (no document content in logs)
"""
import socket
import threading
import time
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional
from urllib.parse import urljoin
from core.logging import get_logger
logger = get_logger(__name__)
_HAS_STRUCTLOG = True
def _log_info(event: str, **kwargs):
"""Log info with structlog or standard logging compatibility."""
if _HAS_STRUCTLOG:
logger.info(event, **kwargs)
else:
msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
logger.info(msg)
def _log_warning(event: str, **kwargs):
"""Log warning with structlog or standard logging compatibility."""
if _HAS_STRUCTLOG:
logger.warning(event, **kwargs)
else:
msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
logger.warning(msg)
def _log_error(event: str, **kwargs):
"""Log error with structlog or standard logging compatibility."""
if _HAS_STRUCTLOG:
logger.error(event, **kwargs)
else:
msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
logger.error(msg)
import requests
from requests.exceptions import Timeout, ConnectionError as RequestsConnectionError
from .base import TranslationProvider
from .schemas import (
ProviderHealthStatus,
TranslationRequest,
TranslationResponse,
)
OLLAMA_UNAVAILABLE = "OLLAMA_UNAVAILABLE"
OLLAMA_MODEL_NOT_FOUND = "OLLAMA_MODEL_NOT_FOUND"
OLLAMA_TIMEOUT = "OLLAMA_TIMEOUT"
OLLAMA_GENERATION_ERROR = "OLLAMA_GENERATION_ERROR"
OLLAMA_CONTEXT_TOO_LONG = "OLLAMA_CONTEXT_TOO_LONG"
_RETRYABLE_ERRORS = {OLLAMA_UNAVAILABLE, OLLAMA_TIMEOUT}
class OllamaProviderError(Exception):
"""Exception raised for Ollama API errors."""
def __init__(
self, code: str, message: str, details: Optional[Dict[str, Any]] = None
):
self.code = code
self.message = message
self.details = details or {}
super().__init__(message)
def to_dict(self) -> Dict[str, Any]:
"""Convert error to dictionary format."""
result = {
"error": self.code,
"message": self.message,
}
if self.details:
result["details"] = self.details
return result
DEFAULT_TRANSLATION_PROMPT = """You are a professional translator. Translate the following text from {source_lang} to {target_lang}.
Rules:
- Translate ONLY the text, do not add explanations or notes
- Preserve the original formatting, line breaks, and structure
- Maintain the original tone and style
- For technical terms, use the standard translation in the target language
- If the text contains proper nouns or brand names, keep them unchanged unless there's a well-known translation"""
def _build_system_prompt(
source_lang: str, target_lang: str, custom_prompt: Optional[str] = None
) -> str:
"""Build system prompt for translation."""
if custom_prompt:
return custom_prompt
return DEFAULT_TRANSLATION_PROMPT.format(
source_lang=source_lang, target_lang=target_lang
)
def _get_language_name(code: str) -> str:
"""Convert language code to full name for better LLM understanding."""
language_names = {
"en": "English",
"fr": "French",
"es": "Spanish",
"de": "German",
"it": "Italian",
"pt": "Portuguese",
"nl": "Dutch",
"ru": "Russian",
"zh": "Chinese",
"ja": "Japanese",
"ko": "Korean",
"ar": "Arabic",
"hi": "Hindi",
"tr": "Turkish",
"pl": "Polish",
"vi": "Vietnamese",
"th": "Thai",
"id": "Indonesian",
"ms": "Malay",
"uk": "Ukrainian",
"cs": "Czech",
"sv": "Swedish",
"da": "Danish",
"fi": "Finnish",
"no": "Norwegian",
"el": "Greek",
"he": "Hebrew",
"ro": "Romanian",
"hu": "Hungarian",
"bg": "Bulgarian",
"sk": "Slovak",
"hr": "Croatian",
"sl": "Slovenian",
"lt": "Lithuanian",
"lv": "Latvian",
"et": "Estonian",
}
base_code = code.split("-")[0].lower()
return language_names.get(base_code, code)
class OllamaTranslationProvider(TranslationProvider):
"""
Ollama LLM implementation for local translation.
Features:
- Uses Ollama REST API (/api/chat endpoint)
- Custom system prompt support for translation context
- Thread-safe HTTP client
- Robust error handling with specific error codes
- Retry logic with exponential backoff
- Configurable timeout (default 120s for LLM)
- Health check with result caching
"""
def __init__(
self,
base_url: str = "http://localhost:11434",
model: Optional[str] = None,
timeout: int = 120,
max_retries: int = 2,
retry_delay: float = 2.0,
):
"""
Initialize Ollama provider.
Args:
base_url: Ollama API base URL (default: http://localhost:11434)
model: Model name (e.g. llama3, mistral). If None, uses OLLAMA_MODEL from config.
timeout: Request timeout in seconds (default: 120 for LLM)
max_retries: Maximum retry attempts for transient errors (default: 2)
retry_delay: Initial retry delay in seconds (default: 2.0)
"""
if model is None:
from .config import ProvidersConfig
model = ProvidersConfig.OLLAMA_MODEL
self._base_url = base_url.rstrip("/")
self._model = model
self._provider_name = "ollama"
self.timeout = timeout
self.max_retries = max_retries
self.retry_delay = retry_delay
self._health_cache: Dict[str, Any] = {}
self._health_cache_ttl = 60
self._health_cache_lock = threading.Lock()
self._available_models: Optional[List[str]] = None
self._models_cache_time: float = 0
self._models_cache_ttl = 300
def _fetch_available_models(self) -> List[str]:
"""Fetch list of available (pulled) models from Ollama."""
current_time = time.time()
if (
self._available_models is not None
and current_time - self._models_cache_time < self._models_cache_ttl
):
return self._available_models
try:
response = requests.get(f"{self._base_url}/api/tags", timeout=10)
if response.status_code == 200:
data = response.json()
models = [m.get("name", "") for m in data.get("models", [])]
self._available_models = models
self._models_cache_time = current_time
return models
except Exception as e:
_log_warning("ollama_models_fetch_failed", error=str(e)[:100])
return []
def _check_model_available(self, model: str) -> bool:
"""Check if a specific model is available (pulled)."""
models = self._fetch_available_models()
return any(m.startswith(model) or model in m for m in models)
def _make_api_request(self, text: str, system_prompt: str) -> str:
"""
Make API request to Ollama.
Raises:
OllamaProviderError: For any API errors with specific codes
"""
if not text or not text.strip():
return text
if len(text) > 128000:
raise OllamaProviderError(
code=OLLAMA_CONTEXT_TOO_LONG,
message="Texte trop long pour le modèle (max ~128K caractères).",
details={"text_length": len(text), "max_chars": 128000},
)
if not self._check_model_available(self._model):
raise OllamaProviderError(
code=OLLAMA_MODEL_NOT_FOUND,
message=f"Modèle '{self._model}' non trouvé. Exécutez: ollama pull {self._model}",
details={"model": self._model, "provider": "ollama"},
)
payload = {
"model": self._model,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": text},
],
"stream": False,
"options": {"temperature": 0.3},
}
try:
response = requests.post(
f"{self._base_url}/api/chat",
json=payload,
timeout=self.timeout,
)
if response.status_code == 404:
raise OllamaProviderError(
code=OLLAMA_MODEL_NOT_FOUND,
message=f"Modèle '{self._model}' non trouvé. Exécutez: ollama pull {self._model}",
details={"model": self._model, "status_code": 404},
)
if response.status_code != 200:
error_text = response.text[:200] if response.text else "Unknown error"
raise OllamaProviderError(
code=OLLAMA_GENERATION_ERROR,
message=f"Erreur de génération Ollama: {error_text}",
details={"status_code": response.status_code, "model": self._model},
)
data = response.json()
message = data.get("message", {})
content = message.get("content", "")
if not content:
raise OllamaProviderError(
code=OLLAMA_GENERATION_ERROR,
message="Erreur de génération Ollama: réponse vide",
details={"model": self._model, "response": str(data)[:200]},
)
return content.strip()
except Timeout:
raise OllamaProviderError(
code=OLLAMA_TIMEOUT,
message="Délai d'attente Ollama dépassé. Réessayez avec un texte plus court.",
details={"provider": "ollama", "timeout_seconds": self.timeout},
)
except RequestsConnectionError:
raise OllamaProviderError(
code=OLLAMA_UNAVAILABLE,
message="Service Ollama indisponible. Vérifiez que Ollama est en cours d'exécution.",
details={"provider": "ollama", "base_url": self._base_url},
)
except OllamaProviderError:
raise
except Exception as e:
error_str = str(e).lower()
if "connection" in error_str or "refused" in error_str:
raise OllamaProviderError(
code=OLLAMA_UNAVAILABLE,
message="Service Ollama indisponible. Vérifiez que Ollama est en cours d'exécution.",
details={"provider": "ollama", "base_url": self._base_url},
)
raise OllamaProviderError(
code=OLLAMA_GENERATION_ERROR,
message=f"Erreur de génération Ollama: {str(e)[:100]}",
details={"provider": "ollama", "original_error": str(e)[:100]},
)
def get_name(self) -> str:
"""Return provider name."""
return self._provider_name
def is_available(self) -> bool:
"""
Check if Ollama is available.
Uses cached result if available and not expired.
"""
current_time = time.time()
with self._health_cache_lock:
if "is_available" in self._health_cache:
cached = self._health_cache["is_available"]
if current_time - cached["timestamp"] < self._health_cache_ttl:
return cached["value"]
try:
response = requests.get(f"{self._base_url}/api/tags", timeout=5)
available = response.status_code == 200
except Exception as e:
_log_warning("ollama_availability_check_failed", error=str(e)[:100])
available = False
with self._health_cache_lock:
self._health_cache["is_available"] = {
"value": available,
"timestamp": current_time,
}
return available
def translate_text(self, request: TranslationRequest) -> TranslationResponse:
"""
Translate a single text string using Ollama LLM.
Supports custom system prompt via request.metadata["custom_prompt"].
Args:
request: TranslationRequest with text and language info
Returns:
TranslationResponse with translated text
"""
text = request.text
target_language = request.target_language
source_language = request.source_language or "auto"
if not text or not text.strip():
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
)
source_lang_name = _get_language_name(source_language)
target_lang_name = _get_language_name(target_language)
custom_prompt = None
if request.metadata:
custom_prompt = request.metadata.get("custom_prompt")
system_prompt = _build_system_prompt(
source_lang_name, target_lang_name, custom_prompt
)
last_error: Optional[OllamaProviderError] = None
retries = 0
while retries <= self.max_retries:
try:
start_time = time.time()
result = self._make_api_request(text, system_prompt)
latency = time.time() - start_time
_log_info(
"ollama_translation_success",
chars=len(text),
source_lang=source_language,
target_lang=target_language,
model=self._model,
latency_ms=round(latency * 1000, 2),
retries=retries,
)
return TranslationResponse(
translated_text=result,
provider_name=self._provider_name,
from_cache=False,
source_language=source_language,
)
except OllamaProviderError as e:
last_error = e
if e.code not in _RETRYABLE_ERRORS:
break
retries += 1
if retries <= self.max_retries:
delay = self.retry_delay * (2 ** (retries - 1))
_log_info(
"ollama_translation_retry",
attempt=retries,
delay_s=round(delay, 2),
error_code=e.code,
text_length=len(text),
source_lang=source_language,
target_lang=target_language,
)
time.sleep(delay)
except Exception as e:
last_error = OllamaProviderError(
code=OLLAMA_GENERATION_ERROR,
message=f"Erreur de génération Ollama: {str(e)[:100]}",
details={"original_error": str(e)[:100]},
)
retries += 1
if retries <= self.max_retries:
delay = self.retry_delay * (2 ** (retries - 1))
time.sleep(delay)
if last_error:
_log_error(
"ollama_translation_failed",
error_code=last_error.code,
text_length=len(text),
source_lang=source_language,
target_lang=target_language,
retries=retries,
)
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
error=last_error.message,
error_code=last_error.code,
error_details=last_error.details,
)
return TranslationResponse(
translated_text=text,
provider_name=self._provider_name,
from_cache=False,
error="Unknown error",
error_code=OLLAMA_GENERATION_ERROR,
)
def translate_batch(
self, requests: List[TranslationRequest]
) -> List[TranslationResponse]:
"""
Translate multiple texts.
Args:
requests: List of TranslationRequest objects
Returns:
List of TranslationResponse objects
"""
if not requests:
return []
return [self.translate_text(req) for req in requests]
def health_check(self) -> ProviderHealthStatus:
"""
Return health status details for the provider.
Includes cached result for efficiency.
Returns:
ProviderHealthStatus with availability and latency information
"""
current_time = time.time()
with self._health_cache_lock:
if "health_check" in self._health_cache:
cached = self._health_cache["health_check"]
if current_time - cached["timestamp"] < self._health_cache_ttl:
return cached["value"]
start_time = time.time()
last_check_iso = datetime.now(timezone.utc).isoformat()
try:
models = self._fetch_available_models()
model_available = self._check_model_available(self._model)
available = len(models) > 0 and model_available
latency_ms = (time.time() - start_time) * 1000
error_msg = None
if not available and len(models) == 0:
error_msg = "Service Ollama indisponible. Vérifiez que Ollama est en cours d'exécution."
elif not model_available:
error_msg = f"Modèle '{self._model}' non trouvé. Exécutez: ollama pull {self._model}"
status = ProviderHealthStatus(
name=self._provider_name,
available=available,
latency_ms=round(latency_ms, 2),
error=error_msg,
last_check=last_check_iso,
model=self._model,
model_available=model_available,
)
except Exception as e:
latency_ms = (time.time() - start_time) * 1000
status = ProviderHealthStatus(
name=self._provider_name,
available=False,
latency_ms=round(latency_ms, 2),
error=str(e)[:100],
last_check=last_check_iso,
model=self._model,
model_available=None,
)
with self._health_cache_lock:
self._health_cache["health_check"] = {
"value": status,
"timestamp": current_time,
}
return status
def register_ollama_provider():
"""
Register the Ollama provider in the global registry.
This function should be called during module initialization
to make the provider available through the registry.
"""
from .registry import registry
provider = get_ollama_provider()
registry.register("ollama", provider)
return provider
_provider_instance: Optional[OllamaTranslationProvider] = None
_provider_lock = threading.Lock()
def get_ollama_provider() -> OllamaTranslationProvider:
"""Get or create the Ollama provider instance (reads config from env)."""
global _provider_instance
if _provider_instance is None:
with _provider_lock:
if _provider_instance is None:
from .config import ProvidersConfig
_provider_instance = OllamaTranslationProvider(
base_url=ProvidersConfig.OLLAMA_BASE_URL,
model=ProvidersConfig.OLLAMA_MODEL,
timeout=ProvidersConfig.OLLAMA_TIMEOUT,
max_retries=ProvidersConfig.OLLAMA_MAX_RETRIES,
retry_delay=ProvidersConfig.OLLAMA_RETRY_DELAY,
)
return _provider_instance