+>(({ className, ...props }, ref) => (
+ | [role=checkbox]]:translate-y-[2px]",
+ className
+ )}
+ {...props}
+ />
+))
+TableCell.displayName = "TableCell"
+
+const TableCaption = React.forwardRef<
+ HTMLTableCaptionElement,
+ React.HTMLAttributes
+>(({ className, ...props }, ref) => (
+
+))
+TableCaption.displayName = "TableCaption"
+
+export {
+ Table,
+ TableHeader,
+ TableBody,
+ TableFooter,
+ TableHead,
+ TableRow,
+ TableCell,
+ TableCaption,
+}
diff --git a/frontend/src/components/ui/toast.tsx b/frontend/src/components/ui/toast.tsx
index 839a6b8..fd03ab6 100644
--- a/frontend/src/components/ui/toast.tsx
+++ b/frontend/src/components/ui/toast.tsx
@@ -1,3 +1,5 @@
+"use client"
+
import * as React from "react"
import * as ToastPrimitives from "@radix-ui/react-toast"
import { cva, type VariantProps } from "class-variance-authority"
@@ -116,7 +118,7 @@ const Toast = React.forwardRef<
className={cn(
"inline-flex h-8 shrink-0 items-center justify-center rounded-md border bg-transparent px-3 text-sm font-medium transition-colors hover:bg-secondary focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 group-[.destructive]:border-muted/40 group-[.destructive]:hover:border-destructive/30 group-[.destructive]:hover:bg-destructive group-[.destructive]:hover:text-destructive-foreground group-[.destructive]:focus:ring-destructive"
)}
- alt={typeof action === 'string' ? action : undefined}
+ altText={typeof action === 'string' ? action : 'Action'}
>
{action}
@@ -146,6 +148,21 @@ const ToastAction = React.forwardRef<
))
ToastAction.displayName = ToastPrimitives.Action.displayName
+const ToastClose = React.forwardRef<
+ React.ElementRef,
+ React.ComponentPropsWithoutRef
+>(({ className, ...props }, ref) => (
+
+))
+ToastClose.displayName = ToastPrimitives.Close.displayName
+
const ToastTitle = React.forwardRef<
React.ElementRef,
React.ComponentPropsWithoutRef
@@ -185,7 +202,7 @@ export function useToast() {
description?: string
variant?: VariantProps["variant"]
duration?: number
- action?: ToastActionElement
+ action?: React.ReactNode
icon?: React.ReactNode
}>>([])
@@ -267,7 +284,7 @@ export const ToastContainer = ({ children }: { children: React.ReactNode }) => {
// Individual Toast Component for use in ToastContainer
export const ToastItem = React.forwardRef<
- HTMLDivElement,
+ HTMLLIElement,
{
toast: {
id: string
@@ -275,15 +292,15 @@ export const ToastItem = React.forwardRef<
description?: string
variant?: VariantProps["variant"]
duration?: number
- action?: ToastActionElement
+ action?: React.ReactNode
icon?: React.ReactNode
}
onDismiss: (id: string) => void
}
>(({ toast, onDismiss, ...props }, ref) => {
return (
+
+
)
})
ToastItem.displayName = "ToastItem"
diff --git a/frontend/src/messages/en.json b/frontend/src/messages/en.json
new file mode 100644
index 0000000..cd6e8c3
--- /dev/null
+++ b/frontend/src/messages/en.json
@@ -0,0 +1,112 @@
+{
+ "common": {
+ "login": "Login",
+ "signup": "Sign Up",
+ "getStarted": "Get Started",
+ "getStartedFree": "Get Started Free",
+ "learnMore": "Learn More",
+ "startNow": "Start Now",
+ "tryPro": "Try Pro",
+ "contactSales": "Contact Sales",
+ "seeDemo": "See Demo",
+ "free": "Free",
+ "popular": "Popular",
+ "month": "month",
+ "onRequest": "On Request"
+ },
+ "nav": {
+ "features": "Features",
+ "pricing": "Pricing",
+ "enterprise": "Enterprise"
+ },
+ "hero": {
+ "title": "Translate your Office documents.",
+ "titleHighlight": "Keep the format.",
+ "subtitle": "Instantly translate your Excel, Word, and PowerPoint files while preserving their original layout. Secure, accurate, and powered by AI.",
+ "cta": "Get Started Free",
+ "demoCta": "See Demo",
+ "badge1": "Zero data retention",
+ "badge2": "Deleted in 60 min"
+ },
+ "trustBadges": {
+ "title": "Trusted by professionals worldwide"
+ },
+ "features": {
+ "title": "Professional Translation Power",
+ "subtitle": "Compatible with all your essential office formats, powered by cutting-edge AI for unmatched accuracy.",
+ "excel": {
+ "title": "Excel Expert",
+ "description": "Translate cells and formulas without breaking the spreadsheet. Your data stays intact."
+ },
+ "word": {
+ "title": "Perfect Word",
+ "description": "Paragraphs, fonts, and styles preserved identically. Ready to print."
+ },
+ "powerpoint": {
+ "title": "Intact PowerPoint",
+ "description": "Slide layout preserved pixel by pixel. Perfect for your presentations."
+ },
+ "ai": {
+ "title": "Multi-Model AI",
+ "description": "Choose the engine that fits your content: GPT-4, Claude 3, or Llama 3."
+ },
+ "privacy": {
+ "title": "Ollama Privacy",
+ "description": "Local processing option for maximum privacy. Your data never leaves your server."
+ },
+ "speed": {
+ "title": "Lightning Fast",
+ "description": "Translate large documents in seconds thanks to our optimized infrastructure."
+ }
+ },
+ "pricing": {
+ "title": "Simple and Transparent Pricing",
+ "subtitle": "Start for free, scale as you need.",
+ "free": {
+ "name": "Free",
+ "description": "For occasional needs.",
+ "price": "$0",
+ "features": [
+ "5 documents / month",
+ "Word & Excel only",
+ "Community support"
+ ]
+ },
+ "pro": {
+ "name": "Pro",
+ "description": "For demanding professionals.",
+ "price": "$29",
+ "features": [
+ "50 documents / month",
+ "All formats (PPTX included)",
+ "Advanced AI models (GPT-4)",
+ "Priority support"
+ ]
+ },
+ "enterprise": {
+ "name": "Enterprise",
+ "description": "For large teams.",
+ "features": [
+ "Unlimited documents",
+ "API Access",
+ "SSO & Advanced Security",
+ "Dedicated Account Manager"
+ ]
+ }
+ },
+ "cta": {
+ "title": "Ready to internationalize your documents?",
+ "subtitle": "Join over 10,000 professionals who save hours of work every week with Office Translator.",
+ "button": "Start Now"
+ },
+ "footer": {
+ "privacy": "Privacy",
+ "terms": "Terms",
+ "contact": "Contact",
+ "copyright": "© 2024 Office Translator. All rights reserved."
+ },
+ "landing": {
+ "originalLabel": "Original",
+ "translatedLabel": "Translated"
+ }
+}
diff --git a/frontend/src/messages/fr.json b/frontend/src/messages/fr.json
new file mode 100644
index 0000000..5872658
--- /dev/null
+++ b/frontend/src/messages/fr.json
@@ -0,0 +1,112 @@
+{
+ "common": {
+ "login": "Connexion",
+ "signup": "S'inscrire",
+ "getStarted": "Commencer",
+ "getStartedFree": "Commencer gratuitement",
+ "learnMore": "En savoir plus",
+ "startNow": "Commencer maintenant",
+ "tryPro": "Essayer Pro",
+ "contactSales": "Contacter les ventes",
+ "seeDemo": "Voir la démo",
+ "free": "Gratuit",
+ "popular": "Populaire",
+ "month": "mois",
+ "onRequest": "Sur devis"
+ },
+ "nav": {
+ "features": "Fonctionnalités",
+ "pricing": "Tarifs",
+ "enterprise": "Entreprise"
+ },
+ "hero": {
+ "title": "Traduisez vos documents Office.",
+ "titleHighlight": "Gardez le format.",
+ "subtitle": "Traduisez instantanément vos fichiers Excel, Word et PowerPoint tout en préservant leur mise en page originale. Sécurisé, précis et propulsé par l'IA.",
+ "cta": "Commencer gratuitement",
+ "demoCta": "Voir la démo",
+ "badge1": "0 données stockées",
+ "badge2": "Suppression en 60 min"
+ },
+ "trustBadges": {
+ "title": "Ils nous font confiance"
+ },
+ "features": {
+ "title": "La puissance de la traduction professionnelle",
+ "subtitle": "Compatible avec tous vos formats de bureau essentiels, propulsé par une IA de pointe pour une précision inégalée.",
+ "excel": {
+ "title": "Expert Excel",
+ "description": "Traduction des cellules et formules sans casser le tableur. Vos données restent intactes."
+ },
+ "word": {
+ "title": "Word Parfait",
+ "description": "Paragraphes, polices et styles conservés à l'identique. Prêt à imprimer."
+ },
+ "powerpoint": {
+ "title": "PowerPoint Intact",
+ "description": "Mise en page des diapositives préservée pixel par pixel. Idéal pour vos présentations."
+ },
+ "ai": {
+ "title": "Multi-Modèles IA",
+ "description": "Choisissez le moteur qui convient à votre contenu : GPT-4, Claude 3 ou Llama 3."
+ },
+ "privacy": {
+ "title": "Confidentialité Ollama",
+ "description": "Option de traitement local pour une confidentialité maximale. Vos données ne quittent pas votre serveur."
+ },
+ "speed": {
+ "title": "Vitesse Éclair",
+ "description": "Traduction de documents volumineux en quelques secondes grâce à notre infrastructure optimisée."
+ }
+ },
+ "pricing": {
+ "title": "Tarification simple et transparente",
+ "subtitle": "Commencez gratuitement, évoluez selon vos besoins.",
+ "free": {
+ "name": "Gratuit",
+ "description": "Pour les besoins ponctuels.",
+ "price": "0€",
+ "features": [
+ "5 documents / mois",
+ "Word & Excel uniquement",
+ "Support communautaire"
+ ]
+ },
+ "pro": {
+ "name": "Pro",
+ "description": "Pour les professionnels exigeants.",
+ "price": "29€",
+ "features": [
+ "50 documents / mois",
+ "Tous formats (PPTX inclus)",
+ "Modèles IA avancés (GPT-4)",
+ "Support prioritaire"
+ ]
+ },
+ "enterprise": {
+ "name": "Entreprise",
+ "description": "Pour les grandes équipes.",
+ "features": [
+ "Documents illimités",
+ "Accès API",
+ "SSO & Sécurité avancée",
+ "Manager de compte dédié"
+ ]
+ }
+ },
+ "cta": {
+ "title": "Prêt à internationaliser vos documents ?",
+ "subtitle": "Rejoignez plus de 10 000 professionnels qui gagnent des heures de travail chaque semaine avec Office Translator.",
+ "button": "Commencer maintenant"
+ },
+ "footer": {
+ "privacy": "Confidentialité",
+ "terms": "CGU",
+ "contact": "Contact",
+ "copyright": "© 2024 Office Translator. Tous droits réservés."
+ },
+ "landing": {
+ "originalLabel": "Original",
+ "translatedLabel": "Traduit"
+ }
+}
diff --git a/frontend/src/providers/QueryProvider.tsx b/frontend/src/providers/QueryProvider.tsx
new file mode 100644
index 0000000..537d6be
--- /dev/null
+++ b/frontend/src/providers/QueryProvider.tsx
@@ -0,0 +1,38 @@
+'use client';
+
+import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
+import { useState, type ReactNode, useCallback } from 'react';
+
+const DEFAULT_STALE_TIME_MS = 60 * 1000;
+
+export function QueryProvider({ children }: { children: ReactNode }) {
+ const [queryClient] = useState(
+ () =>
+ new QueryClient({
+ defaultOptions: {
+ queries: {
+ staleTime: DEFAULT_STALE_TIME_MS,
+ retry: 1,
+ refetchOnWindowFocus: false,
+ },
+ mutations: {
+ retry: 1,
+ },
+ },
+ })
+ );
+
+ const handleGlobalError = useCallback((error: unknown) => {
+ if (typeof window !== 'undefined') {
+ console.error('[QueryClient Error]', error);
+ }
+ }, []);
+
+ return (
+
+ {children}
+
+ );
+}
+
+export { DEFAULT_STALE_TIME_MS };
diff --git a/frontend/src/test/constants.test.ts b/frontend/src/test/constants.test.ts
new file mode 100644
index 0000000..6160953
--- /dev/null
+++ b/frontend/src/test/constants.test.ts
@@ -0,0 +1,28 @@
+import { describe, it, expect } from 'vitest';
+import { baseNavItems, proNavItem, getNavItems } from '../app/dashboard/constants';
+
+describe('getNavItems', () => {
+ it('should return only base items for free users', () => {
+ const items = getNavItems(false);
+ expect(items).toHaveLength(2);
+ expect(items).toEqual(baseNavItems);
+ });
+
+ it('should include pro item for pro users', () => {
+ const items = getNavItems(true);
+ expect(items).toHaveLength(3);
+ expect(items).toContain(proNavItem);
+ });
+
+ it('should have correct structure for base items', () => {
+ baseNavItems.forEach(item => {
+ expect(item).toHaveProperty('label');
+ expect(item).toHaveProperty('href');
+ expect(item).toHaveProperty('icon');
+ });
+ });
+
+ it('should have proOnly flag on proNavItem', () => {
+ expect(proNavItem.proOnly).toBe(true);
+ });
+});
diff --git a/frontend/src/test/setup.ts b/frontend/src/test/setup.ts
new file mode 100644
index 0000000..bb02c60
--- /dev/null
+++ b/frontend/src/test/setup.ts
@@ -0,0 +1 @@
+import '@testing-library/jest-dom/vitest';
diff --git a/frontend/src/test/utils.test.ts b/frontend/src/test/utils.test.ts
new file mode 100644
index 0000000..dc3d50e
--- /dev/null
+++ b/frontend/src/test/utils.test.ts
@@ -0,0 +1,28 @@
+import { describe, it, expect } from 'vitest';
+import { getInitials } from '../app/dashboard/utils';
+
+describe('getInitials', () => {
+ it('should return first two initials for full name', () => {
+ expect(getInitials('John Doe')).toBe('JD');
+ });
+
+ it('should return single initial for single name', () => {
+ expect(getInitials('Jane')).toBe('J');
+ });
+
+ it('should handle names with multiple spaces', () => {
+ expect(getInitials('John Jacob Jingleheimer Schmidt')).toBe('JJ');
+ });
+
+ it('should return ? for empty string', () => {
+ expect(getInitials('')).toBe('?');
+ });
+
+ it('should return ? for undefined', () => {
+ expect(getInitials(undefined as unknown as string)).toBe('?');
+ });
+
+ it('should handle lowercase names', () => {
+ expect(getInitials('john doe')).toBe('JD');
+ });
+});
diff --git a/frontend/vitest.config.ts b/frontend/vitest.config.ts
new file mode 100644
index 0000000..7625106
--- /dev/null
+++ b/frontend/vitest.config.ts
@@ -0,0 +1,17 @@
+import { defineConfig } from 'vitest/config';
+import react from '@vitejs/plugin-react';
+import path from 'path';
+
+export default defineConfig({
+ plugins: [react()],
+ test: {
+ environment: 'jsdom',
+ globals: true,
+ setupFiles: ['./src/test/setup.ts'],
+ },
+ resolve: {
+ alias: {
+ '@': path.resolve(__dirname, './src'),
+ },
+ },
+});
diff --git a/main.py b/main.py
index c70c98d..fecb287 100644
--- a/main.py
+++ b/main.py
@@ -2,162 +2,71 @@
Document Translation API
FastAPI application for translating complex documents while preserving formatting
SaaS-ready with rate limiting, validation, and robust error handling
+
+Story 3.5: API Versioning - All endpoints under /api/v1/ prefix
+Story 3.6: Documentation OpenAPI (Swagger + ReDoc)
"""
-from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request, Depends, Header
-from fastapi.responses import FileResponse, JSONResponse
+
+from fastapi import (
+ FastAPI,
+ Request,
+)
+from fastapi.openapi.utils import get_openapi
+from starlette.exceptions import HTTPException as StarletteHTTPException
+from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
-from fastapi.security import HTTPBasic, HTTPBasicCredentials
-from pydantic import BaseModel
+from fastapi.exceptions import RequestValidationError
from contextlib import asynccontextmanager
from pathlib import Path
-from typing import Optional
-import asyncio
import logging
import os
-import secrets
-import hashlib
-import time
from config import config
-from translators import excel_translator, word_translator, pptx_translator
-from utils import file_handler, handle_translation_error, DocumentProcessingError
+from translators import (
+ excel_translator,
+ word_translator,
+ pptx_translator,
+ ExcelProcessorError,
+ WordProcessorError,
+ PptxProcessorError,
+)
+from utils import handle_translation_error, DocumentProcessingError
+from services.providers.fallback import AllProvidersFailedError
from services.translation_service import _translation_cache
-# Import auth routes
-from routes.auth_routes import router as auth_router
+from routes.api_v1_router import router as api_v1_router
-# Import SaaS middleware
-from middleware.rate_limiting import RateLimitMiddleware, RateLimitManager, RateLimitConfig
-from middleware.security import SecurityHeadersMiddleware, RequestLoggingMiddleware, ErrorHandlingMiddleware
-from middleware.cleanup import FileCleanupManager, MemoryMonitor, HealthChecker, create_cleanup_manager
-from middleware.validation import FileValidator, LanguageValidator, ProviderValidator, InputSanitizer, ValidationError
+from middleware.rate_limiting import (
+ RateLimitMiddleware,
+ RateLimitManager,
+ RateLimitConfig,
+)
+from middleware.security import (
+ SecurityHeadersMiddleware,
+ RequestLoggingMiddleware,
+)
+from middleware.error_handler import ErrorHandlingMiddleware, format_error_response
+from middleware.cleanup import (
+ MemoryMonitor,
+ HealthChecker,
+ create_cleanup_manager,
+)
+from middleware.validation import ValidationError
+from utils.exceptions import (
+ TranslationError,
+ UnsupportedFileTypeError,
+ FileSizeLimitExceededError,
+ LanguageNotSupportedError,
+ DocumentProcessingError as UtilsDocumentProcessingError,
+)
-# Configure structured logging
logging.basicConfig(
level=getattr(logging, os.getenv("LOG_LEVEL", "INFO")),
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
-# ============== Admin Authentication ==============
-ADMIN_USERNAME = os.getenv("ADMIN_USERNAME")
-ADMIN_PASSWORD_HASH = os.getenv("ADMIN_PASSWORD_HASH") # SHA256 hash of password (preferred)
-ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD") # Plain password (use hash in production!)
-ADMIN_TOKEN_SECRET = os.getenv("ADMIN_TOKEN_SECRET", secrets.token_hex(32))
-
-# Validate admin credentials are configured
-if not ADMIN_USERNAME:
- logger.warning("⚠️ ADMIN_USERNAME not set - admin endpoints will be disabled")
-if not ADMIN_PASSWORD_HASH and not ADMIN_PASSWORD:
- logger.warning("⚠️ ADMIN_PASSWORD/ADMIN_PASSWORD_HASH not set - admin endpoints will be disabled")
-
-# Redis connection for sessions (fallback to in-memory if not available)
-REDIS_URL = os.getenv("REDIS_URL", "")
-_redis_client = None
-
-def get_redis_client():
- """Get Redis client for session storage"""
- global _redis_client
- if _redis_client is None and REDIS_URL:
- try:
- import redis
- _redis_client = redis.from_url(REDIS_URL, decode_responses=True)
- _redis_client.ping()
- logger.info("✅ Connected to Redis for session storage")
- except Exception as e:
- logger.warning(f"⚠️ Redis connection failed: {e}. Using in-memory sessions.")
- _redis_client = False # Mark as failed
- return _redis_client if _redis_client else None
-
-# In-memory fallback for sessions (not recommended for production)
-_memory_sessions: dict = {}
-
-def hash_password(password: str) -> str:
- """Hash password with SHA256"""
- return hashlib.sha256(password.encode()).hexdigest()
-
-def verify_admin_password(password: str) -> bool:
- """Verify admin password"""
- if not ADMIN_PASSWORD_HASH and not ADMIN_PASSWORD:
- return False # No credentials configured
- if ADMIN_PASSWORD_HASH:
- return hash_password(password) == ADMIN_PASSWORD_HASH
- return password == ADMIN_PASSWORD
-
-def _get_session_key(token: str) -> str:
- """Get Redis key for session token"""
- return f"admin_session:{token}"
-
-def create_admin_token() -> str:
- """Create a new admin session token with Redis or memory fallback"""
- token = secrets.token_urlsafe(32)
- expiry = int(time.time()) + (24 * 60 * 60) # 24 hours
-
- redis_client = get_redis_client()
- if redis_client:
- try:
- redis_client.setex(_get_session_key(token), 24 * 60 * 60, str(expiry))
- except Exception as e:
- logger.warning(f"Redis session save failed: {e}")
- _memory_sessions[token] = expiry
- else:
- _memory_sessions[token] = expiry
-
- return token
-
-def verify_admin_token(token: str) -> bool:
- """Verify admin token is valid and not expired"""
- redis_client = get_redis_client()
-
- if redis_client:
- try:
- expiry = redis_client.get(_get_session_key(token))
- if expiry and int(expiry) > time.time():
- return True
- return False
- except Exception as e:
- logger.warning(f"Redis session check failed: {e}")
-
- # Fallback to memory
- if token not in _memory_sessions:
- return False
- if time.time() > _memory_sessions[token]:
- del _memory_sessions[token]
- return False
- return True
-
-def delete_admin_token(token: str):
- """Delete an admin session token"""
- redis_client = get_redis_client()
- if redis_client:
- try:
- redis_client.delete(_get_session_key(token))
- except Exception:
- pass
- if token in _memory_sessions:
- del _memory_sessions[token]
-
-async def require_admin(authorization: Optional[str] = Header(None)) -> bool:
- """Dependency to require admin authentication"""
- if not ADMIN_USERNAME or (not ADMIN_PASSWORD_HASH and not ADMIN_PASSWORD):
- raise HTTPException(status_code=503, detail="Admin authentication not configured")
-
- if not authorization:
- raise HTTPException(status_code=401, detail="Authorization header required")
-
- # Expect "Bearer "
- parts = authorization.split(" ")
- if len(parts) != 2 or parts[0].lower() != "bearer":
- raise HTTPException(status_code=401, detail="Invalid authorization format. Use: Bearer ")
-
- token = parts[1]
- if not verify_admin_token(token):
- raise HTTPException(status_code=401, detail="Invalid or expired token")
-
- return True
-
-# Initialize SaaS components
rate_limit_config = RateLimitConfig(
requests_per_minute=int(os.getenv("RATE_LIMIT_PER_MINUTE", "30")),
requests_per_hour=int(os.getenv("RATE_LIMIT_PER_HOUR", "200")),
@@ -168,118 +77,459 @@ rate_limit_config = RateLimitConfig(
rate_limit_manager = RateLimitManager(rate_limit_config)
cleanup_manager = create_cleanup_manager(config)
-memory_monitor = MemoryMonitor(max_memory_percent=float(os.getenv("MAX_MEMORY_PERCENT", "80")))
+memory_monitor = MemoryMonitor(
+ max_memory_percent=float(os.getenv("MAX_MEMORY_PERCENT", "80"))
+)
health_checker = HealthChecker(cleanup_manager, memory_monitor)
-file_validator = FileValidator(
- max_size_mb=config.MAX_FILE_SIZE_MB,
- allowed_extensions=config.SUPPORTED_EXTENSIONS
-)
-
-def build_full_prompt(system_prompt: str, glossary: str) -> str:
- """Combine system prompt and glossary into a single prompt for LLM translation."""
- parts = []
-
- # Add system prompt if provided
- if system_prompt and system_prompt.strip():
- parts.append(system_prompt.strip())
-
- # Add glossary if provided
- if glossary and glossary.strip():
- glossary_section = """
-TECHNICAL GLOSSARY - Use these exact translations for the following terms:
-{}
-
-Always use the translations from this glossary when you encounter these terms.""".format(glossary.strip())
- parts.append(glossary_section)
-
- return "\n\n".join(parts) if parts else ""
-
-
-# Lifespan context manager for startup/shutdown
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Handle startup and shutdown events"""
- # Startup
logger.info("Starting Document Translation API...")
config.ensure_directories()
-
- # Initialize database
+
try:
from database.connection import init_db, check_db_connection
- init_db()
- if check_db_connection():
- logger.info("✅ Database connection verified")
+
+ await init_db()
+ if await check_db_connection():
+ logger.info("Database connection verified")
else:
- logger.warning("⚠️ Database connection check failed")
+ logger.warning("Database connection check failed")
except Exception as e:
- logger.warning(f"⚠️ Database initialization skipped: {e}")
-
+ logger.warning(f"Database initialization skipped: {e}")
+
await cleanup_manager.start()
logger.info("API ready to accept requests")
-
+
yield
-
- # Shutdown
+
logger.info("Shutting down...")
await cleanup_manager.stop()
logger.info("Cleanup completed")
-# Create FastAPI app with lifespan
+# OpenAPI Tags Metadata
+OPENAPI_TAGS = [
+ {
+ "name": "Translation",
+ "description": "Endpoints de traduction de documents. Upload, suivi et téléchargement des traductions.",
+ },
+ {
+ "name": "Authentication",
+ "description": "Authentification utilisateur via JWT. Inscription, connexion, déconnexion et rafraîchissement des tokens.",
+ },
+ {
+ "name": "API Keys",
+ "description": "Gestion des clés API pour l'automatisation (utilisateurs Pro uniquement). Génération, liste et révocation.",
+ },
+ {
+ "name": "Admin",
+ "description": "Endpoints d'administration. Dashboard, gestion utilisateurs, statistiques et configuration.",
+ },
+ {
+ "name": "Health",
+ "description": "Endpoints de santé pour monitoring et probes Kubernetes.",
+ },
+ {
+ "name": "Legacy",
+ "description": "Endpoints utilitaires et de compatibilité. Langues, métriques, configuration Ollama.",
+ },
+]
+
+
+def custom_openapi():
+ """Generate custom OpenAPI schema with comprehensive documentation."""
+ if app.openapi_schema:
+ return app.openapi_schema
+
+ openapi_schema = get_openapi(
+ title="Office Translator API",
+ version="1.0.0",
+ description="""
+API de traduction de documents Office avec préservation parfaite du format.
+
+## 🔐 Authentification
+
+L'API supporte deux méthodes d'authentification:
+
+### 1. JWT (Web Dashboard & Admin)
+Utilisé pour l'interface web et le dashboard admin.
+
+**Obtenir un token:**
+```
+POST /api/v1/auth/login
+{
+ "email": "user@example.com",
+ "password": "password123"
+}
+```
+
+**Utiliser le token:**
+```
+Authorization: Bearer eyJhbGciOiJIUzI1NiIs...
+```
+
+**Détails:**
+- Access token expire en 15 minutes
+- Refresh token expire en 7 jours
+- Utilisez `/api/v1/auth/refresh` pour renouveler l'access token
+
+### 2. API Key (Automation)
+Utilisé pour l'automatisation et l'intégration (Pro users only).
+
+**Obtenir une clé:**
+```
+POST /api/v1/api-keys
+Authorization: Bearer
+```
+
+**Utiliser la clé:**
+```
+X-API-Key: sk_live_abc123def456...
+```
+
+**Détails:**
+- Clé statique, pas d'expiration
+- Peut être révoquée à tout moment
+- Uniquement pour utilisateurs Pro
+
+## 📄 Endpoints Principaux
+
+### Translation
+- `POST /api/v1/translate` - Traduire un document
+- `GET /api/v1/translations/{id}` - Vérifier le statut
+- `GET /api/v1/download/{id}` - Télécharger le fichier traduit
+- `GET /api/v1/languages` - Langues supportées
+
+### Authentication
+- `POST /api/v1/auth/register` - Créer un compte
+- `POST /api/v1/auth/login` - Connexion
+- `POST /api/v1/auth/logout` - Déconnexion
+- `POST /api/v1/auth/refresh` - Renouveler le token
+
+### API Keys (Pro)
+- `POST /api/v1/api-keys` - Générer une clé
+- `GET /api/v1/api-keys` - Lister les clés
+- `DELETE /api/v1/api-keys/{key_id}` - Révoquer une clé
+
+### Admin
+- `POST /api/v1/admin/login` - Connexion admin
+- `GET /api/v1/admin/dashboard` - Dashboard admin
+- `GET /api/v1/admin/users` - Gestion utilisateurs
+- `PATCH /api/v1/admin/users/{user_id}` - Modifier tier utilisateur
+
+## 📋 Format des Réponses
+
+### Succès
+```json
+{
+ "data": {
+ "id": "tr_abc123",
+ "status": "processing",
+ "file_name": "report.xlsx"
+ },
+ "meta": {
+ "rate_limit_remaining": 45
+ }
+}
+```
+
+### Erreur
+```json
+{
+ "error": "INVALID_FORMAT",
+ "message": "Format PDF non supporté. Formats acceptés: .xlsx, .docx, .pptx",
+ "details": {
+ "accepted_formats": [".xlsx", ".docx", ".pptx"]
+ }
+}
+```
+
+## ⚠️ Codes d'Erreur Courants
+
+| Code | HTTP | Description |
+|------|------|-------------|
+| `INVALID_FORMAT` | 400 | Format fichier non supporté |
+| `FILE_TOO_LARGE` | 413 | Fichier > 50 MB |
+| `QUOTA_EXCEEDED` | 429 | Limite quotidienne atteinte |
+| `UNAUTHORIZED` | 401 | Token/API key invalide |
+| `FORBIDDEN` | 403 | Pas les droits requis |
+| `PRO_FEATURE_REQUIRED` | 403 | Feature réservée Pro |
+| `PROVIDER_ERROR` | 502 | Erreur provider externe |
+
+## 📊 Rate Limiting
+
+- **Free**: 5 fichiers par jour
+- **Pro**: Illimité (fair use policy)
+- Rate limit info dans `meta.rate_limit_remaining`
+- Header `Retry-After` si quota dépassé
+
+## 📁 Formats Supportés
+
+- **Excel**: .xlsx
+- **Word**: .docx
+- **PowerPoint**: .pptx
+- Taille max: 50 MB
+
+## 🌐 Langues Supportées
+
+Utilisez `GET /api/v1/languages` pour obtenir la liste complète.
+Codes ISO 639-1 (ex: en, fr, de, es, it, pt, ja, zh, ar, ru...)
+
+## 🔔 Webhooks (Pro)
+
+Spécifiez `webhook_url` dans votre requête pour recevoir une notification POST quand la traduction termine.
+
+Payload envoyé:
+```json
+{
+ "translation_id": "tr_abc123",
+ "status": "completed",
+ "timestamp": "2024-01-15T10:35:00Z",
+ "file_name": "report.xlsx",
+ "error_message": null
+}
+```
+ """,
+ routes=app.routes,
+ tags=OPENAPI_TAGS,
+ )
+
+ # Configuration des security schemes
+ openapi_schema["components"]["securitySchemes"] = {
+ "JWT": {
+ "type": "http",
+ "scheme": "bearer",
+ "bearerFormat": "JWT",
+ "description": "JWT token obtenu via /api/v1/auth/login. Format: Bearer "
+ },
+ "APIKey": {
+ "type": "apiKey",
+ "in": "header",
+ "name": "X-API-Key",
+ "description": "Clé API obtenue via /api/v1/api-keys (utilisateurs Pro uniquement). Format: sk_live_..."
+ }
+ }
+
+ # Contact and license info
+ openapi_schema["info"]["contact"] = {
+ "name": "Office Translator Support",
+ "email": "support@office-translator.com",
+ }
+ openapi_schema["info"]["license"] = {
+ "name": "Proprietary",
+ }
+
+ app.openapi_schema = openapi_schema
+ return app.openapi_schema
+
+
app = FastAPI(
- title=config.API_TITLE,
- version=config.API_VERSION,
- description=config.API_DESCRIPTION,
- lifespan=lifespan
+ title="Office Translator API",
+ version="1.0.0",
+ description="API de traduction de documents Office (Excel, Word, PowerPoint) avec préservation du format.",
+ lifespan=lifespan,
+ docs_url="/docs",
+ redoc_url="/redoc",
+ openapi_url="/openapi.json",
+ contact={
+ "name": "Office Translator Support",
+ "email": "support@office-translator.com",
+ },
+ license_info={
+ "name": "Proprietary",
+ },
)
-# Add middleware (order matters - first added is outermost)
+# Apply custom OpenAPI schema
+app.openapi = custom_openapi
+
app.add_middleware(ErrorHandlingMiddleware)
app.add_middleware(RequestLoggingMiddleware, log_body=False)
-app.add_middleware(SecurityHeadersMiddleware, config={"enable_hsts": os.getenv("ENABLE_HSTS", "false").lower() == "true"})
+app.add_middleware(
+ SecurityHeadersMiddleware,
+ config={"enable_hsts": os.getenv("ENABLE_HSTS", "false").lower() == "true"},
+)
app.add_middleware(RateLimitMiddleware, rate_limit_manager=rate_limit_manager)
-# CORS - configure for production
-# WARNING: Do not use "*" in production! Set CORS_ORIGINS to your actual frontend domains
_cors_env = os.getenv("CORS_ORIGINS", "")
if _cors_env == "*" or not _cors_env:
- logger.warning("⚠️ CORS_ORIGINS not properly configured. Using permissive settings for development only!")
+ logger.warning(
+ "CORS_ORIGINS not properly configured. Using permissive settings for development only!"
+ )
allowed_origins = ["*"]
else:
- allowed_origins = [origin.strip() for origin in _cors_env.split(",") if origin.strip()]
- logger.info(f"✅ CORS configured for origins: {allowed_origins}")
+ allowed_origins = [
+ origin.strip() for origin in _cors_env.split(",") if origin.strip()
+ ]
+ logger.info(f"CORS configured for origins: {allowed_origins}")
app.add_middleware(
CORSMiddleware,
allow_origins=allowed_origins,
- allow_credentials=True if allowed_origins != ["*"] else False, # Can't use credentials with wildcard
- allow_methods=["GET", "POST", "DELETE", "OPTIONS"],
+ allow_credentials=True if allowed_origins != ["*"] else False,
+ allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
allow_headers=["*"],
- expose_headers=["X-Request-ID", "X-Original-Filename", "X-File-Size-MB", "X-Target-Language"]
+ expose_headers=[
+ "X-Request-ID",
+ "X-Original-Filename",
+ "X-File-Size-MB",
+ "X-Target-Language",
+ ],
)
-# Mount static files
static_dir = Path(__file__).parent / "static"
if static_dir.exists():
app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
-# Include auth routes
-app.include_router(auth_router)
+app.include_router(api_v1_router)
-# Custom exception handler for ValidationError
-@app.exception_handler(ValidationError)
-async def validation_error_handler(request: Request, exc: ValidationError):
- """Handle validation errors with user-friendly messages"""
- return JSONResponse(
+@app.exception_handler(TranslationError)
+async def translation_error_handler(request: Request, exc: TranslationError):
+ """Handle custom translation errors."""
+ request_id = getattr(request.state, "request_id", "unknown")
+ status_code = 400
+ if isinstance(exc, FileSizeLimitExceededError):
+ status_code = 413
+ elif isinstance(exc, UtilsDocumentProcessingError):
+ status_code = 500
+
+ return format_error_response(
+ status_code=status_code,
+ message=exc.message,
+ error_code=exc.code,
+ details=exc.details,
+ request_id=request_id,
+ )
+
+
+@app.exception_handler(StarletteHTTPException)
+async def http_exception_handler(request: Request, exc: StarletteHTTPException):
+ """Handle standard FastAPI/Starlette HTTP exceptions (like 404, 405)."""
+ request_id = getattr(request.state, "request_id", "unknown")
+ headers = getattr(exc, "headers", None)
+ detail = exc.detail if hasattr(exc, "detail") else "An error occurred"
+ if isinstance(detail, dict):
+ return format_error_response(
+ status_code=exc.status_code,
+ message=detail.get("message", "An error occurred"),
+ error_code=detail.get("error"),
+ request_id=request_id,
+ headers=headers,
+ )
+ return format_error_response(
+ status_code=exc.status_code,
+ message=str(detail),
+ request_id=request_id,
+ headers=headers,
+ )
+
+
+@app.exception_handler(RequestValidationError)
+async def validation_exception_handler(request: Request, exc: RequestValidationError):
+ """Handle Pydantic validation errors (422) and convert them to structured 400."""
+ request_id = getattr(request.state, "request_id", "unknown")
+ error_details = []
+ for err in exc.errors():
+ loc = ".".join(str(p) for p in err.get("loc", []))
+ msg = err.get("msg", "Validation error")
+ error_details.append({"field": loc, "message": msg})
+
+ return format_error_response(
status_code=400,
- content={
- "error": exc.code,
- "message": exc.message,
- "details": exc.details
- }
+ message="Erreur de validation des donnees transmises.",
+ error_code="INVALID_FORMAT",
+ details={"validation_errors": error_details},
+ request_id=request_id,
+ )
+
+
+@app.exception_handler(ValidationError)
+async def custom_validation_error_handler(request: Request, exc: ValidationError):
+ """Handle custom validation errors with user-friendly messages."""
+ request_id = getattr(request.state, "request_id", "unknown")
+ return format_error_response(
+ status_code=400,
+ message=exc.message,
+ error_code=exc.code,
+ details=exc.details,
+ request_id=request_id,
+ )
+
+
+@app.exception_handler(AllProvidersFailedError)
+async def all_providers_failed_handler(request: Request, exc: AllProvidersFailedError):
+ """Return 502 with structured JSON when all providers in the chain fail."""
+ request_id = getattr(request.state, "request_id", "unknown")
+ return format_error_response(
+ status_code=502,
+ message=exc.message,
+ error_code=exc.code,
+ details={
+ "providers_tried": exc.providers_tried,
+ "error_count": len(exc.errors),
+ },
+ request_id=request_id,
+ )
+
+
+@app.exception_handler(ExcelProcessorError)
+async def excel_processor_error_handler(request: Request, exc: ExcelProcessorError):
+ """Handle Excel processing errors with structured JSON response."""
+ status_code = 400
+ if exc.code == ExcelProcessorError.EXCEL_WRITE_ERROR:
+ status_code = 500
+ elif exc.code == ExcelProcessorError.EXCEL_TOO_LARGE:
+ status_code = 413
+
+ request_id = getattr(request.state, "request_id", "unknown")
+ return format_error_response(
+ status_code=status_code,
+ message=exc.message,
+ error_code=exc.code,
+ details=exc.details,
+ request_id=request_id,
+ )
+
+
+@app.exception_handler(WordProcessorError)
+async def word_processor_error_handler(request: Request, exc: WordProcessorError):
+ """Handle Word processing errors with structured JSON response."""
+ status_code = 400
+ if exc.code == WordProcessorError.DOCX_WRITE_ERROR:
+ status_code = 500
+ elif exc.code == WordProcessorError.DOCX_TOO_LARGE:
+ status_code = 413
+
+ request_id = getattr(request.state, "request_id", "unknown")
+ return format_error_response(
+ status_code=status_code,
+ message=exc.message,
+ error_code=exc.code,
+ details=exc.details,
+ request_id=request_id,
+ )
+
+
+@app.exception_handler(PptxProcessorError)
+async def pptx_processor_error_handler(request: Request, exc: PptxProcessorError):
+ """Handle PowerPoint processing errors with structured JSON response."""
+ status_code = 400
+ if exc.code == PptxProcessorError.PPTX_WRITE_ERROR:
+ status_code = 500
+ elif exc.code == PptxProcessorError.PPTX_TOO_LARGE:
+ status_code = 413
+
+ request_id = getattr(request.state, "request_id", "unknown")
+ return format_error_response(
+ status_code=status_code,
+ message=exc.message,
+ error_code=exc.code,
+ details=exc.details,
+ request_id=request_id,
)
@@ -290,44 +540,43 @@ async def root():
"name": config.API_TITLE,
"version": config.API_VERSION,
"status": "operational",
+ "docs": "/docs",
+ "redoc": "/redoc",
+ "api_base": "/api/v1",
"supported_formats": list(config.SUPPORTED_EXTENSIONS),
- "endpoints": {
- "translate": "/translate",
- "health": "/health",
- "supported_languages": "/languages"
- }
}
-@app.get("/health")
+@app.get("/health", tags=["Health"])
async def health_check():
- """Health check endpoint with detailed system status"""
+ """Health check endpoint with detailed system status (Kubernetes liveness probe)"""
+ REDIS_URL = os.getenv("REDIS_URL", "")
+
health_status = await health_checker.check_health()
status_code = 200 if health_status.get("status") == "healthy" else 503
-
- # Check database connection
+
db_status = {"status": "not_configured"}
try:
from database.connection import check_db_connection
+
if check_db_connection():
db_status = {"status": "healthy"}
else:
db_status = {"status": "unhealthy"}
except Exception as e:
db_status = {"status": "error", "error": str(e)}
-
- # Check Redis connection
+
redis_status = {"status": "not_configured"}
- redis_client = get_redis_client()
- if redis_client:
+ if REDIS_URL:
try:
+ import redis
+
+ redis_client = redis.from_url(REDIS_URL, decode_responses=True)
redis_client.ping()
redis_status = {"status": "healthy"}
except Exception as e:
redis_status = {"status": "unhealthy", "error": str(e)}
- elif redis_client is False:
- redis_status = {"status": "connection_failed"}
-
+
return JSONResponse(
status_code=status_code,
content={
@@ -342,920 +591,44 @@ async def health_check():
"requests_per_minute": rate_limit_config.requests_per_minute,
"translations_per_minute": rate_limit_config.translations_per_minute,
},
- "translation_cache": _translation_cache.stats()
- }
+ "translation_cache": _translation_cache.stats(),
+ },
)
-@app.get("/ready")
+@app.get("/ready", tags=["Health"])
async def readiness_check():
"""Kubernetes readiness probe - check if app can serve traffic"""
+ REDIS_URL = os.getenv("REDIS_URL", "")
issues = []
-
- # Check database
+
try:
from database.connection import check_db_connection, DATABASE_URL
- if DATABASE_URL: # Only check if configured
+
+ if DATABASE_URL:
if not check_db_connection():
issues.append("database_unavailable")
except ImportError:
- pass # Database module not available - OK for development
+ pass
except Exception as e:
issues.append(f"database_error: {str(e)}")
-
- # Check Redis (optional but log if configured and unavailable)
+
if REDIS_URL:
- redis_client = get_redis_client()
- if redis_client:
- try:
- redis_client.ping()
- except Exception:
- issues.append("redis_unavailable")
- elif redis_client is False:
- issues.append("redis_connection_failed")
-
- if issues:
- return JSONResponse(
- status_code=503,
- content={"ready": False, "issues": issues}
- )
-
- return {"ready": True}
-
-
-@app.get("/languages")
-async def get_supported_languages():
- """Get list of supported language codes"""
- return {
- "supported_languages": {
- "es": "Spanish",
- "fr": "French",
- "de": "German",
- "it": "Italian",
- "pt": "Portuguese",
- "ru": "Russian",
- "zh": "Chinese (Simplified)",
- "ja": "Japanese",
- "ko": "Korean",
- "ar": "Arabic",
- "hi": "Hindi",
- "nl": "Dutch",
- "pl": "Polish",
- "tr": "Turkish",
- "sv": "Swedish",
- "da": "Danish",
- "no": "Norwegian",
- "fi": "Finnish",
- "cs": "Czech",
- "el": "Greek",
- "th": "Thai",
- "vi": "Vietnamese",
- "id": "Indonesian",
- "uk": "Ukrainian",
- "ro": "Romanian",
- "hu": "Hungarian"
- },
- "note": "Supported languages may vary depending on the translation service configured"
- }
-
-
-@app.post("/translate")
-async def translate_document(
- request: Request,
- file: UploadFile = File(..., description="Document file to translate (.xlsx, .docx, or .pptx)"),
- target_language: str = Form(..., description="Target language code (e.g., 'es', 'fr', 'de')"),
- source_language: str = Form(default="auto", description="Source language code (default: auto-detect)"),
- provider: str = Form(default="openrouter", description="Translation provider (openrouter, google, ollama, deepl, libre, openai)"),
- translate_images: bool = Form(default=False, description="Translate images with multimodal Ollama/OpenAI model"),
- ollama_model: str = Form(default="", description="Ollama model to use (also used for vision if multimodal)"),
- system_prompt: str = Form(default="", description="Custom system prompt with context or instructions for LLM translation"),
- glossary: str = Form(default="", description="Technical glossary (format: source=target, one per line)"),
- libre_url: str = Form(default="https://libretranslate.com", description="LibreTranslate server URL"),
- openai_api_key: str = Form(default="", description="OpenAI API key"),
- openai_model: str = Form(default="gpt-4o-mini", description="OpenAI model to use (gpt-4o-mini is cheapest with vision)"),
- openrouter_api_key: str = Form(default="", description="OpenRouter API key"),
- openrouter_model: str = Form(default="deepseek/deepseek-chat", description="OpenRouter model (deepseek/deepseek-chat is best value)"),
- cleanup: bool = Form(default=True, description="Delete input file after translation")
-):
- """
- Translate a document while preserving all formatting, layout, and embedded media
-
- **Supported File Types:**
- - Excel (.xlsx) - Preserves formulas, merged cells, styling, and images
- - Word (.docx) - Preserves headings, tables, images, headers/footers
- - PowerPoint (.pptx) - Preserves layouts, animations, and media
-
- **Parameters:**
- - **file**: The document file to translate
- - **target_language**: Target language code (e.g., 'es' for Spanish, 'fr' for French)
- - **source_language**: Source language code (optional, default: auto-detect)
- - **cleanup**: Whether to delete the uploaded file after translation (default: True)
-
- **Returns:**
- - Translated document file with preserved formatting
- """
- input_path = None
- output_path = None
- request_id = getattr(request.state, 'request_id', 'unknown')
-
- try:
- # Validate inputs
- sanitized_language = InputSanitizer.sanitize_language_code(target_language)
- LanguageValidator.validate(sanitized_language)
- ProviderValidator.validate(provider)
-
- # Validate file before processing
- validation_result = await file_validator.validate_async(file)
- if not validation_result.is_valid:
- raise ValidationError(
- message=f"File validation failed: {'; '.join(validation_result.errors)}",
- code="INVALID_FILE",
- details={"errors": validation_result.errors, "warnings": validation_result.warnings}
- )
-
- # Log any warnings
- if validation_result.warnings:
- logger.warning(f"[{request_id}] File validation warnings: {validation_result.warnings}")
-
- # Reset file position after validation read
- await file.seek(0)
-
- # Check rate limit for translations
- client_ip = request.client.host if request.client else "unknown"
- if not await rate_limit_manager.check_translation_limit(client_ip):
- raise HTTPException(
- status_code=429,
- detail="Translation rate limit exceeded. Please try again later."
- )
-
- # Validate file extension
- file_extension = file_handler.validate_file_extension(file.filename)
- logger.info(f"[{request_id}] Processing {file_extension} file: {file.filename}")
-
- # Validate file size
- file_handler.validate_file_size(file)
-
- # Generate unique filenames
- input_filename = file_handler.generate_unique_filename(file.filename, "input")
- output_filename = file_handler.generate_unique_filename(file.filename, "translated")
-
- # Save uploaded file
- input_path = config.UPLOAD_DIR / input_filename
- output_path = config.OUTPUT_DIR / output_filename
-
- await file_handler.save_upload_file(file, input_path)
- logger.info(f"[{request_id}] Saved input file to: {input_path}")
-
- # Track file for cleanup
- await cleanup_manager.track_file(input_path, ttl_minutes=30)
- await cleanup_manager.track_file(output_path, ttl_minutes=60)
-
- # Configure translation provider
- from services.translation_service import GoogleTranslationProvider, DeepLTranslationProvider, LibreTranslationProvider, OllamaTranslationProvider, OpenAITranslationProvider, OpenRouterTranslationProvider, translation_service
-
- if provider.lower() == "openrouter":
- api_key = openrouter_api_key.strip() if openrouter_api_key else os.getenv("OPENROUTER_API_KEY", "")
- if not api_key:
- raise HTTPException(status_code=400, detail="OpenRouter API key not provided. Get one at https://openrouter.ai/keys")
- model_to_use = openrouter_model.strip() if openrouter_model else "deepseek/deepseek-chat"
- custom_prompt = build_full_prompt(system_prompt, glossary)
- logger.info(f"Using OpenRouter model: {model_to_use}")
- if custom_prompt:
- logger.info(f"Custom system prompt provided ({len(custom_prompt)} chars)")
- translation_provider = OpenRouterTranslationProvider(api_key, model_to_use, custom_prompt)
- elif provider.lower() == "deepl":
- if not config.DEEPL_API_KEY:
- raise HTTPException(status_code=400, detail="DeepL API key not configured")
- translation_provider = DeepLTranslationProvider(config.DEEPL_API_KEY)
- elif provider.lower() == "libre":
- libre_server = libre_url.strip() if libre_url else "https://libretranslate.com"
- logger.info(f"Using LibreTranslate server: {libre_server}")
- translation_provider = LibreTranslationProvider(libre_server)
- elif provider.lower() == "openai":
- api_key = openai_api_key.strip() if openai_api_key else ""
- if not api_key:
- raise HTTPException(status_code=400, detail="OpenAI API key not provided")
- model_to_use = openai_model.strip() if openai_model else "gpt-4o-mini"
- # Combine system prompt and glossary
- custom_prompt = build_full_prompt(system_prompt, glossary)
- logger.info(f"Using OpenAI model: {model_to_use}")
- if custom_prompt:
- logger.info(f"Custom system prompt provided ({len(custom_prompt)} chars)")
- translation_provider = OpenAITranslationProvider(api_key, model_to_use, custom_prompt)
- elif provider.lower() == "ollama":
- # Use the same model for text and vision (multimodal models like gemma3, qwen3-vl)
- model_to_use = ollama_model.strip() if ollama_model else config.OLLAMA_MODEL
- # Combine system prompt and glossary
- custom_prompt = build_full_prompt(system_prompt, glossary)
- logger.info(f"Using Ollama model: {model_to_use} (text + vision)")
- if custom_prompt:
- logger.info(f"Custom system prompt provided ({len(custom_prompt)} chars)")
- translation_provider = OllamaTranslationProvider(config.OLLAMA_BASE_URL, model_to_use, model_to_use, custom_prompt)
- elif provider.lower() == "google":
- translation_provider = GoogleTranslationProvider()
- else:
- # Default to OpenRouter with DeepSeek (best value)
- api_key = openrouter_api_key.strip() if openrouter_api_key else os.getenv("OPENROUTER_API_KEY", "")
- if api_key:
- translation_provider = OpenRouterTranslationProvider(api_key, "deepseek/deepseek-chat", build_full_prompt(system_prompt, glossary))
- else:
- translation_provider = GoogleTranslationProvider()
-
- # Update the global translation service
- translation_service.provider = translation_provider
-
- # Store translate_images flag for translators to access
- translation_service.translate_images = translate_images
-
- # Translate based on file type
- if file_extension == ".xlsx":
- logger.info("Translating Excel file...")
- excel_translator.translate_file(input_path, output_path, target_language)
- elif file_extension == ".docx":
- logger.info("Translating Word document...")
- word_translator.translate_file(input_path, output_path, target_language)
- elif file_extension == ".pptx":
- logger.info("Translating PowerPoint presentation...")
- pptx_translator.translate_file(input_path, output_path, target_language)
- else:
- raise DocumentProcessingError(f"Unsupported file type: {file_extension}")
-
- logger.info(f"Translation completed: {output_path}")
-
- # Get file info
- output_info = file_handler.get_file_info(output_path)
-
- # Cleanup input file if requested
- if cleanup and input_path:
- file_handler.cleanup_file(input_path)
- logger.info(f"Cleaned up input file: {input_path}")
-
- # Return the translated file
- return FileResponse(
- path=output_path,
- filename=f"translated_{file.filename}",
- media_type="application/octet-stream",
- headers={
- "X-Original-Filename": file.filename,
- "X-File-Size-MB": str(output_info.get("size_mb", 0)),
- "X-Target-Language": target_language
- }
- )
-
- except HTTPException:
- # Re-raise HTTP exceptions
- raise
- except Exception as e:
- logger.error(f"Translation error: {str(e)}", exc_info=True)
-
- # Cleanup files on error
- if input_path:
- file_handler.cleanup_file(input_path)
- if output_path:
- file_handler.cleanup_file(output_path)
-
- raise handle_translation_error(e)
-
-
-@app.delete("/cleanup/{filename}")
-async def cleanup_translated_file(filename: str):
- """
- Cleanup a translated file after download
-
- **Parameters:**
- - **filename**: Name of the file to delete from the outputs directory
- """
- try:
- file_path = config.OUTPUT_DIR / filename
-
- if not file_path.exists():
- raise HTTPException(status_code=404, detail="File not found")
-
- file_handler.cleanup_file(file_path)
-
- return {"message": f"File {filename} deleted successfully"}
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"Cleanup error: {str(e)}")
- raise HTTPException(status_code=500, detail="Error cleaning up file")
-
-
-@app.post("/translate-batch")
-async def translate_batch_documents(
- files: list[UploadFile] = File(..., description="Multiple document files to translate"),
- target_language: str = Form(..., description="Target language code"),
- source_language: str = Form(default="auto", description="Source language code")
-):
- """
- Translate multiple documents in batch
-
- **Note:** This endpoint processes files sequentially. For large batches, consider
- calling the single file endpoint multiple times with concurrent requests.
- """
- results = []
-
- for file in files:
try:
- # Process each file using the same logic as single file translation
- file_extension = file_handler.validate_file_extension(file.filename)
- file_handler.validate_file_size(file)
-
- input_filename = file_handler.generate_unique_filename(file.filename, "input")
- output_filename = file_handler.generate_unique_filename(file.filename, "translated")
-
- input_path = config.UPLOAD_DIR / input_filename
- output_path = config.OUTPUT_DIR / output_filename
-
- await file_handler.save_upload_file(file, input_path)
-
- # Translate based on file type
- if file_extension == ".xlsx":
- excel_translator.translate_file(input_path, output_path, target_language)
- elif file_extension == ".docx":
- word_translator.translate_file(input_path, output_path, target_language)
- elif file_extension == ".pptx":
- pptx_translator.translate_file(input_path, output_path, target_language)
-
- # Cleanup input file
- file_handler.cleanup_file(input_path)
-
- results.append({
- "filename": file.filename,
- "status": "success",
- "output_file": output_filename,
- "download_url": f"/download/{output_filename}"
- })
-
- except Exception as e:
- logger.error(f"Error processing {file.filename}: {str(e)}")
- results.append({
- "filename": file.filename,
- "status": "error",
- "error": str(e)
- })
-
- return {
- "total_files": len(files),
- "successful": len([r for r in results if r["status"] == "success"]),
- "failed": len([r for r in results if r["status"] == "error"]),
- "results": results
- }
+ import redis
+ redis_client = redis.from_url(REDIS_URL, decode_responses=True)
+ redis_client.ping()
+ except Exception:
+ issues.append("redis_unavailable")
-@app.get("/download/{filename}")
-async def download_file(filename: str):
- """
- Download a translated file by filename
-
- **Parameters:**
- - **filename**: Name of the file to download from the outputs directory
- """
- file_path = config.OUTPUT_DIR / filename
-
- if not file_path.exists():
- raise HTTPException(status_code=404, detail="File not found")
-
- return FileResponse(
- path=file_path,
- filename=filename,
- media_type="application/octet-stream"
- )
+ if issues:
+ return JSONResponse(status_code=503, content={"ready": False, "issues": issues})
-
-@app.get("/ollama/models")
-async def list_ollama_models(base_url: Optional[str] = None):
- """
- List available Ollama models
-
- **Parameters:**
- - **base_url**: Ollama server URL (default: from config)
- """
- from services.translation_service import OllamaTranslationProvider
-
- url = base_url or config.OLLAMA_BASE_URL
- models = OllamaTranslationProvider.list_models(url)
-
- return {
- "ollama_url": url,
- "models": models,
- "count": len(models)
- }
-
-
-@app.post("/ollama/configure")
-async def configure_ollama(base_url: str = Form(...), model: str = Form(...)):
- """
- Configure Ollama settings
-
- **Parameters:**
- - **base_url**: Ollama server URL (e.g., http://localhost:11434)
- - **model**: Model name to use for translation (e.g., llama3, mistral)
- """
- config.OLLAMA_BASE_URL = base_url
- config.OLLAMA_MODEL = model
-
- return {
- "status": "success",
- "message": "Ollama configuration updated",
- "ollama_url": base_url,
- "model": model
- }
-
-
-@app.post("/extract-texts")
-async def extract_texts_from_document(
- file: UploadFile = File(..., description="Document file to extract texts from"),
-):
- """
- Extract all translatable texts from a document for client-side translation (WebLLM).
- Returns a list of texts and a session ID to use for reconstruction.
-
- **Parameters:**
- - **file**: The document file to extract texts from
-
- **Returns:**
- - session_id: Unique ID to reference this extraction
- - texts: Array of texts to translate
- - file_type: Type of the document
- """
- import uuid
- import json
-
- try:
- # Validate file extension
- file_extension = file_handler.validate_file_extension(file.filename)
- logger.info(f"Extracting texts from {file_extension} file: {file.filename}")
-
- # Validate file size
- file_handler.validate_file_size(file)
-
- # Generate session ID
- session_id = str(uuid.uuid4())
-
- # Save uploaded file
- input_filename = f"session_{session_id}{file_extension}"
- input_path = config.UPLOAD_DIR / input_filename
- await file_handler.save_upload_file(file, input_path)
-
- # Extract texts based on file type
- texts = []
-
- if file_extension == ".xlsx":
- from openpyxl import load_workbook
- wb = load_workbook(input_path)
- for sheet in wb.worksheets:
- for row in sheet.iter_rows():
- for cell in row:
- if cell.value and isinstance(cell.value, str) and cell.value.strip():
- texts.append({
- "id": f"{sheet.title}!{cell.coordinate}",
- "text": cell.value
- })
- wb.close()
- elif file_extension == ".docx":
- from docx import Document
- doc = Document(input_path)
- para_idx = 0
- for para in doc.paragraphs:
- if para.text.strip():
- texts.append({
- "id": f"para_{para_idx}",
- "text": para.text
- })
- para_idx += 1
- # Also extract from tables
- table_idx = 0
- for table in doc.tables:
- for row_idx, row in enumerate(table.rows):
- for cell_idx, cell in enumerate(row.cells):
- if cell.text.strip():
- texts.append({
- "id": f"table_{table_idx}_r{row_idx}_c{cell_idx}",
- "text": cell.text
- })
- table_idx += 1
- elif file_extension == ".pptx":
- from pptx import Presentation
- prs = Presentation(input_path)
- for slide_idx, slide in enumerate(prs.slides):
- for shape_idx, shape in enumerate(slide.shapes):
- if shape.has_text_frame:
- for para_idx, para in enumerate(shape.text_frame.paragraphs):
- for run_idx, run in enumerate(para.runs):
- if run.text.strip():
- texts.append({
- "id": f"slide_{slide_idx}_shape_{shape_idx}_para_{para_idx}_run_{run_idx}",
- "text": run.text
- })
-
- # Save session metadata
- session_data = {
- "original_filename": file.filename,
- "file_extension": file_extension,
- "input_path": str(input_path),
- "text_count": len(texts)
- }
- session_file = config.UPLOAD_DIR / f"session_{session_id}.json"
- with open(session_file, "w", encoding="utf-8") as f:
- json.dump(session_data, f)
-
- logger.info(f"Extracted {len(texts)} texts from {file.filename}, session: {session_id}")
-
- return {
- "session_id": session_id,
- "texts": texts,
- "file_type": file_extension,
- "text_count": len(texts)
- }
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"Text extraction error: {str(e)}", exc_info=True)
- raise HTTPException(status_code=500, detail=f"Failed to extract texts: {str(e)}")
-
-
-@app.post("/reconstruct-document")
-async def reconstruct_document(
- session_id: str = Form(..., description="Session ID from extract-texts"),
- translations: str = Form(..., description="JSON array of {id, translated_text} objects"),
- target_language: str = Form(..., description="Target language code"),
-):
- """
- Reconstruct a document with translated texts.
-
- **Parameters:**
- - **session_id**: The session ID from extract-texts
- - **translations**: JSON array of translations with matching IDs
- - **target_language**: Target language for filename
-
- **Returns:**
- - Translated document file
- """
- import json
-
- try:
- # Load session data
- session_file = config.UPLOAD_DIR / f"session_{session_id}.json"
- if not session_file.exists():
- raise HTTPException(status_code=404, detail="Session not found or expired")
-
- with open(session_file, "r", encoding="utf-8") as f:
- session_data = json.load(f)
-
- input_path = Path(session_data["input_path"])
- file_extension = session_data["file_extension"]
- original_filename = session_data["original_filename"]
-
- if not input_path.exists():
- raise HTTPException(status_code=404, detail="Source file not found or expired")
-
- # Parse translations
- translation_list = json.loads(translations)
- translation_map = {t["id"]: t["translated_text"] for t in translation_list}
-
- # Generate output path
- output_filename = file_handler.generate_unique_filename(original_filename, "translated")
- output_path = config.OUTPUT_DIR / output_filename
-
- # Reconstruct based on file type
- if file_extension == ".xlsx":
- from openpyxl import load_workbook
- import shutil
- shutil.copy(input_path, output_path)
- wb = load_workbook(output_path)
- for sheet in wb.worksheets:
- for row in sheet.iter_rows():
- for cell in row:
- cell_id = f"{sheet.title}!{cell.coordinate}"
- if cell_id in translation_map:
- cell.value = translation_map[cell_id]
- wb.save(output_path)
- wb.close()
-
- elif file_extension == ".docx":
- from docx import Document
- import shutil
- shutil.copy(input_path, output_path)
- doc = Document(output_path)
- para_idx = 0
- for para in doc.paragraphs:
- para_id = f"para_{para_idx}"
- if para_id in translation_map and para.text.strip():
- # Replace text while keeping formatting
- for run in para.runs:
- run.text = ""
- if para.runs:
- para.runs[0].text = translation_map[para_id]
- else:
- para.text = translation_map[para_id]
- para_idx += 1
- # Also handle tables
- table_idx = 0
- for table in doc.tables:
- for row_idx, row in enumerate(table.rows):
- for cell_idx, cell in enumerate(row.cells):
- cell_id = f"table_{table_idx}_r{row_idx}_c{cell_idx}"
- if cell_id in translation_map:
- # Clear and set new text
- for para in cell.paragraphs:
- for run in para.runs:
- run.text = ""
- if cell.paragraphs and cell.paragraphs[0].runs:
- cell.paragraphs[0].runs[0].text = translation_map[cell_id]
- elif cell.paragraphs:
- cell.paragraphs[0].text = translation_map[cell_id]
- table_idx += 1
- doc.save(output_path)
-
- elif file_extension == ".pptx":
- from pptx import Presentation
- import shutil
- shutil.copy(input_path, output_path)
- prs = Presentation(output_path)
- for slide_idx, slide in enumerate(prs.slides):
- for shape_idx, shape in enumerate(slide.shapes):
- if shape.has_text_frame:
- for para_idx, para in enumerate(shape.text_frame.paragraphs):
- for run_idx, run in enumerate(para.runs):
- run_id = f"slide_{slide_idx}_shape_{shape_idx}_para_{para_idx}_run_{run_idx}"
- if run_id in translation_map:
- run.text = translation_map[run_id]
- prs.save(output_path)
-
- # Cleanup session files
- file_handler.cleanup_file(input_path)
- file_handler.cleanup_file(session_file)
-
- logger.info(f"Reconstructed document: {output_path}")
-
- return FileResponse(
- path=output_path,
- filename=f"translated_{original_filename}",
- media_type="application/octet-stream"
- )
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"Reconstruction error: {str(e)}", exc_info=True)
- raise HTTPException(status_code=500, detail=f"Failed to reconstruct document: {str(e)}")
-
-
-# ============== SaaS Management Endpoints ==============
-
-class AdminLoginRequest(BaseModel):
- password: str
-
-@app.post("/admin/login")
-async def admin_login(request: AdminLoginRequest):
- """
- Admin login endpoint
- Returns a bearer token for authenticated admin access
- """
- if not verify_admin_password(request.password):
- logger.warning(f"Failed admin login attempt - wrong password")
- raise HTTPException(status_code=401, detail="Invalid credentials")
-
- token = create_admin_token()
- logger.info(f"Admin login successful")
-
- return {
- "status": "success",
- "access_token": token,
- "token_type": "bearer",
- "expires_in": 86400, # 24 hours in seconds
- "message": "Login successful"
- }
-
-
-@app.post("/admin/logout")
-async def admin_logout(authorization: Optional[str] = Header(None)):
- """Logout and invalidate admin token"""
- if authorization:
- parts = authorization.split(" ")
- if len(parts) == 2 and parts[0].lower() == "bearer":
- token = parts[1]
- delete_admin_token(token)
- logger.info("Admin logout successful")
-
- return {"status": "success", "message": "Logged out"}
-
-
-@app.get("/admin/verify")
-async def verify_admin_session(is_admin: bool = Depends(require_admin)):
- """Verify admin token is still valid"""
- return {"status": "valid", "authenticated": True}
-
-
-@app.get("/admin/dashboard")
-async def get_admin_dashboard(is_admin: bool = Depends(require_admin)):
- """Get comprehensive admin dashboard data"""
- health_status = await health_checker.check_health()
- cleanup_stats = cleanup_manager.get_stats()
- rate_limit_stats = rate_limit_manager.get_stats()
- tracked_files = cleanup_manager.get_tracked_files()
-
- return {
- "timestamp": health_status.get("timestamp"),
- "uptime": health_status.get("uptime_human"),
- "status": health_status.get("status"),
- "issues": health_status.get("issues", []),
- "system": {
- "memory": health_status.get("memory", {}),
- "disk": health_status.get("disk", {}),
- },
- "translations": health_status.get("translations", {}),
- "cleanup": {
- **cleanup_stats,
- "tracked_files_count": len(tracked_files)
- },
- "rate_limits": rate_limit_stats,
- "config": {
- "max_file_size_mb": config.MAX_FILE_SIZE_MB,
- "supported_extensions": list(config.SUPPORTED_EXTENSIONS),
- "translation_service": config.TRANSLATION_SERVICE,
- "rate_limit_per_minute": rate_limit_config.requests_per_minute,
- "translations_per_minute": rate_limit_config.translations_per_minute
- }
- }
-
-
-@app.get("/metrics")
-async def get_metrics():
- """Get system metrics and statistics for monitoring"""
- health_status = await health_checker.check_health()
- cleanup_stats = cleanup_manager.get_stats()
- rate_limit_stats = rate_limit_manager.get_stats()
-
- return {
- "system": {
- "memory": health_status.get("memory", {}),
- "disk": health_status.get("disk", {}),
- "status": health_status.get("status", "unknown")
- },
- "cleanup": cleanup_stats,
- "rate_limits": rate_limit_stats,
- "config": {
- "max_file_size_mb": config.MAX_FILE_SIZE_MB,
- "supported_extensions": list(config.SUPPORTED_EXTENSIONS),
- "translation_service": config.TRANSLATION_SERVICE
- }
- }
-
-
-@app.get("/rate-limit/status")
-async def get_rate_limit_status(request: Request):
- """Get current rate limit status for the requesting client"""
- client_ip = request.client.host if request.client else "unknown"
- status = await rate_limit_manager.get_client_status(client_ip)
-
- return {
- "client_ip": client_ip,
- "limits": {
- "requests_per_minute": rate_limit_config.requests_per_minute,
- "requests_per_hour": rate_limit_config.requests_per_hour,
- "translations_per_minute": rate_limit_config.translations_per_minute,
- "translations_per_hour": rate_limit_config.translations_per_hour
- },
- "current_usage": status
- }
-
-
-@app.post("/admin/cleanup/trigger")
-async def trigger_cleanup(is_admin: bool = Depends(require_admin)):
- """Trigger manual cleanup of expired files (requires admin auth)"""
- try:
- cleaned = await cleanup_manager.cleanup_expired()
- return {
- "status": "success",
- "files_cleaned": cleaned,
- "message": f"Cleaned up {cleaned} expired files"
- }
- except Exception as e:
- logger.error(f"Manual cleanup failed: {str(e)}")
- raise HTTPException(status_code=500, detail=f"Cleanup failed: {str(e)}")
-
-
-@app.get("/admin/files/tracked")
-async def get_tracked_files(is_admin: bool = Depends(require_admin)):
- """Get list of currently tracked files (requires admin auth)"""
- tracked = cleanup_manager.get_tracked_files()
- return {
- "count": len(tracked),
- "files": tracked
- }
-
-
-@app.get("/admin/users")
-async def get_admin_users(is_admin: bool = Depends(require_admin)):
- """Get all users with their usage stats (requires admin auth)"""
- from services.auth_service import load_users
- from models.subscription import PLANS
-
- users_data = load_users()
- users_list = []
-
- for user_id, user_data in users_data.items():
- plan = user_data.get("plan", "free")
- plan_info = PLANS.get(plan, PLANS["free"])
-
- users_list.append({
- "id": user_id,
- "email": user_data.get("email", ""),
- "name": user_data.get("name", ""),
- "plan": plan,
- "subscription_status": user_data.get("subscription_status", "active"),
- "docs_translated_this_month": user_data.get("docs_translated_this_month", 0),
- "pages_translated_this_month": user_data.get("pages_translated_this_month", 0),
- "extra_credits": user_data.get("extra_credits", 0),
- "created_at": user_data.get("created_at", ""),
- "plan_limits": {
- "docs_per_month": plan_info.get("docs_per_month", 0),
- "max_pages_per_doc": plan_info.get("max_pages_per_doc", 0),
- }
- })
-
- # Sort by created_at descending (newest first)
- users_list.sort(key=lambda x: x.get("created_at", ""), reverse=True)
-
- return {
- "total": len(users_list),
- "users": users_list
- }
-
-
-@app.get("/admin/stats")
-async def get_admin_stats(is_admin: bool = Depends(require_admin)):
- """Get comprehensive admin statistics (requires admin auth)"""
- from services.auth_service import load_users
- from models.subscription import PLANS
-
- users_data = load_users()
-
- # Calculate stats
- total_users = len(users_data)
- plan_distribution = {}
- total_docs_translated = 0
- total_pages_translated = 0
- active_users = 0 # Users who translated something this month
-
- for user_data in users_data.values():
- plan = user_data.get("plan", "free")
- plan_distribution[plan] = plan_distribution.get(plan, 0) + 1
-
- docs = user_data.get("docs_translated_this_month", 0)
- pages = user_data.get("pages_translated_this_month", 0)
- total_docs_translated += docs
- total_pages_translated += pages
-
- if docs > 0:
- active_users += 1
-
- # Get cache stats
- cache_stats = _translation_cache.get_stats()
-
- return {
- "users": {
- "total": total_users,
- "active_this_month": active_users,
- "by_plan": plan_distribution
- },
- "translations": {
- "docs_this_month": total_docs_translated,
- "pages_this_month": total_pages_translated
- },
- "cache": cache_stats,
- "config": {
- "translation_service": config.TRANSLATION_SERVICE,
- "max_file_size_mb": config.MAX_FILE_SIZE_MB,
- "supported_extensions": list(config.SUPPORTED_EXTENSIONS)
- }
- }
-
-
-@app.post("/admin/config/provider")
-async def update_default_provider(
- provider: str = Form(...),
- is_admin: bool = Depends(require_admin)
-):
- """Update the default translation provider (requires admin auth)"""
- valid_providers = ["google", "openrouter", "ollama", "deepl", "libre", "openai"]
- if provider not in valid_providers:
- raise HTTPException(status_code=400, detail=f"Invalid provider. Must be one of: {valid_providers}")
-
- # Update config (in production, this would persist to database/env)
- config.TRANSLATION_SERVICE = provider
-
- return {
- "status": "success",
- "message": f"Default provider updated to {provider}",
- "provider": provider
- }
+ return {"ready": True}
if __name__ == "__main__":
import uvicorn
- uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
\ No newline at end of file
+
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
diff --git a/middleware/__init__.py b/middleware/__init__.py
index 2d3c558..64e3490 100644
--- a/middleware/__init__.py
+++ b/middleware/__init__.py
@@ -27,6 +27,9 @@ from .validation import (
from .security import (
SecurityHeadersMiddleware,
RequestLoggingMiddleware,
+)
+
+from .error_handler import (
ErrorHandlingMiddleware,
)
@@ -37,7 +40,25 @@ from .cleanup import (
create_cleanup_manager,
)
+from .api_key_auth import (
+ APIKeyError,
+ get_user_from_api_key,
+ get_authenticated_user,
+ get_authenticated_user_optional,
+ get_current_user_optional,
+ require_authenticated_user,
+ require_api_key,
+)
+
__all__ = [
+ # API Key Authentication
+ "APIKeyError",
+ "get_user_from_api_key",
+ "get_authenticated_user",
+ "get_authenticated_user_optional",
+ "get_current_user_optional",
+ "require_authenticated_user",
+ "require_api_key",
# Rate limiting
"RateLimitConfig",
"RateLimitManager",
diff --git a/middleware/api_key_auth.py b/middleware/api_key_auth.py
new file mode 100644
index 0000000..9d7aa74
--- /dev/null
+++ b/middleware/api_key_auth.py
@@ -0,0 +1,222 @@
+"""
+API Key Authentication Middleware
+
+Provides reusable dependencies for API key authentication across all endpoints.
+Story 3.4: Authentification API via X-API-Key
+"""
+
+from typing import Optional, Any, Union
+from fastapi import Header, Depends
+from fastapi.responses import JSONResponse
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+
+security = HTTPBearer(auto_error=False)
+
+
+class APIKeyError(Exception):
+ """Exception for API key authentication errors with structured error codes."""
+
+ INVALID_API_KEY = "INVALID_API_KEY"
+ API_KEY_REVOKED = "API_KEY_REVOKED"
+ API_KEY_EXPIRED = "API_KEY_EXPIRED"
+ MISSING_API_KEY = "MISSING_API_KEY"
+ UNAUTHORIZED = "UNAUTHORIZED"
+
+ ERROR_MESSAGES = {
+ INVALID_API_KEY: "Clé API invalide ou non reconnue.",
+ API_KEY_REVOKED: "Cette clé API a été révoquée.",
+ API_KEY_EXPIRED: "Cette clé API a expiré.",
+ MISSING_API_KEY: "Clé API requise pour cet endpoint.",
+ UNAUTHORIZED: "Authentification requise. Utilisez X-API-Key ou Authorization: Bearer.",
+ }
+
+ def __init__(self, code: str, message: Optional[str] = None):
+ self.code = code
+ self.message = message or self.ERROR_MESSAGES.get(code, "Erreur d'authentification")
+ super().__init__(self.message)
+
+ def to_response(self, status_code: int = 401) -> JSONResponse:
+ """Convert to JSONResponse for FastAPI."""
+ return JSONResponse(
+ status_code=status_code,
+ content={
+ "error": self.code,
+ "message": self.message,
+ },
+ )
+
+
+def _raise_api_key_error(code: str, message: Optional[str] = None) -> None:
+ """Raise an APIKeyError and convert it to JSONResponse for FastAPI."""
+ raise APIKeyError(code, message)
+
+
+async def get_user_from_api_key(
+ x_api_key: Optional[str] = Header(None, alias="X-API-Key"),
+) -> Optional[Any]:
+ """
+ Get user from X-API-Key header if provided.
+
+ Returns:
+ User object if valid API key provided
+ None if no API key provided (caller should try other auth methods)
+
+ Raises:
+ APIKeyError: With structured error code if API key is invalid/revoked/expired
+ """
+ if not x_api_key:
+ return None
+
+ try:
+ from services.auth_service import get_user_by_api_key
+
+ user = get_user_by_api_key(x_api_key)
+ return user
+
+ except ValueError as e:
+ # Handle revoked/expired API keys with specific error codes
+ error_code = str(e)
+
+ if error_code == "API_KEY_REVOKED":
+ raise APIKeyError("API_KEY_REVOKED", "Cette clé API a été révoquée.")
+ elif error_code == "API_KEY_EXPIRED":
+ raise APIKeyError("API_KEY_EXPIRED", "Cette clé API a expiré.")
+ else:
+ # Unknown error - treat as invalid
+ raise APIKeyError("INVALID_API_KEY", "Clé API invalide ou non reconnue.")
+
+ except Exception:
+ # Unexpected error - treat as invalid
+ raise APIKeyError("INVALID_API_KEY", "Clé API invalide ou non reconnue.")
+
+
+async def get_current_user_optional(
+ credentials: Optional[HTTPAuthorizationCredentials] = Depends(security),
+) -> Optional[Any]:
+ """Get current user if authenticated via JWT, None otherwise."""
+ if not credentials:
+ return None
+ try:
+ from routes.auth_routes import get_current_user
+
+ user = await get_current_user(credentials)
+ return user
+ except Exception:
+ return None
+
+
+async def get_authenticated_user_optional(
+ credentials: Optional[HTTPAuthorizationCredentials] = Depends(security),
+ x_api_key: Optional[str] = Header(None, alias="X-API-Key"),
+) -> Optional[Any]:
+ """
+ Get authenticated user from API key or JWT (optional - returns None if not authenticated).
+
+ Priority:
+ 1. X-API-Key header (automation users)
+ 2. JWT Bearer token (web users)
+ 3. None (unauthenticated)
+
+ Returns:
+ User object if authenticated, None otherwise (never raises for auth failures)
+ """
+ # Try API key first (priority for automation)
+ if x_api_key:
+ try:
+ user = await get_user_from_api_key(x_api_key)
+ if user:
+ return user
+ except APIKeyError:
+ # Invalid API key, fall through to JWT
+ pass
+
+ # Fall back to JWT
+ if credentials:
+ try:
+ from routes.auth_routes import get_current_user
+
+ user = await get_current_user(credentials)
+ return user
+ except Exception:
+ pass
+
+ return None
+
+
+async def get_authenticated_user(
+ credentials: Optional[HTTPAuthorizationCredentials] = Depends(security),
+ x_api_key: Optional[str] = Header(None, alias="X-API-Key"),
+) -> Optional[Any]:
+ """
+ Get authenticated user from API key or JWT.
+
+ Priority:
+ 1. X-API-Key header (automation users)
+ 2. JWT Bearer token (web users)
+ 3. None (unauthenticated)
+
+ Returns:
+ User object if authenticated
+ None if not authenticated
+
+ Raises:
+ APIKeyError: If API key is provided but invalid/revoked/expired
+ """
+ # Try API key first (priority for automation)
+ if x_api_key:
+ # get_user_from_api_key will raise APIKeyError for invalid keys
+ user = await get_user_from_api_key(x_api_key)
+ if user:
+ return user
+ # Should not reach here - get_user_from_api_key returns None only if no key provided
+ raise APIKeyError("INVALID_API_KEY", "Clé API invalide ou non reconnue.")
+
+ # Fall back to JWT
+ if credentials:
+ try:
+ from routes.auth_routes import get_current_user
+
+ user = await get_current_user(credentials)
+ return user
+ except Exception:
+ pass
+
+ return None
+
+
+async def require_authenticated_user(
+ credentials: Optional[HTTPAuthorizationCredentials] = Depends(security),
+ x_api_key: Optional[str] = Header(None, alias="X-API-Key"),
+) -> Any:
+ """
+ Require authentication (API key or JWT).
+
+ Raises:
+ APIKeyError: 401 if not authenticated
+
+ Returns:
+ User object (guaranteed to be authenticated)
+ """
+ user = await get_authenticated_user(credentials, x_api_key)
+
+ if not user:
+ raise APIKeyError("MISSING_API_KEY", "Authentification requise. Utilisez X-API-Key ou Authorization: Bearer.")
+
+ return user
+
+
+async def require_api_key(
+ x_api_key: str = Header(..., alias="X-API-Key"),
+) -> Any:
+ """
+ Require API key authentication (no JWT fallback).
+
+ Use this for endpoints that MUST use API key (e.g., certain automation endpoints).
+
+ Raises:
+ APIKeyError: 401 if API key is missing, invalid, revoked, or expired
+
+ Returns:
+ User object (guaranteed to be authenticated via API key)
+ """
+ return await get_user_from_api_key(x_api_key)
\ No newline at end of file
diff --git a/middleware/cleanup.py b/middleware/cleanup.py
index ad32e91..1ae3b41 100644
--- a/middleware/cleanup.py
+++ b/middleware/cleanup.py
@@ -2,6 +2,7 @@
Cleanup and Resource Management for SaaS robustness
Automatic cleanup of temporary files and resources
"""
+
import os
import time
import asyncio
@@ -10,77 +11,84 @@ from pathlib import Path
from datetime import datetime, timedelta
from typing import Optional, Set
import logging
+import json
+from services.storage_tracker import _get_async_redis, KEY_PREFIX
-logger = logging.getLogger(__name__)
+try:
+ import structlog
+
+ logger = structlog.get_logger(__name__)
+ _HAS_STRUCTLOG = True
+except ImportError:
+ logger = logging.getLogger(__name__)
+ _HAS_STRUCTLOG = False
class FileCleanupManager:
"""Manages automatic cleanup of temporary and output files"""
-
+
def __init__(
self,
upload_dir: Path,
output_dir: Path,
temp_dir: Path,
- max_file_age_hours: int = 1,
- cleanup_interval_minutes: int = 10,
- max_total_size_gb: float = 10.0
+ max_file_age_minutes: int = 60,
+ cleanup_interval_minutes: int = 5,
+ max_total_size_gb: float = 10.0,
):
self.upload_dir = Path(upload_dir)
self.output_dir = Path(output_dir)
self.temp_dir = Path(temp_dir)
- self.max_file_age_seconds = max_file_age_hours * 3600
+ self.max_file_age_seconds = max_file_age_minutes * 60
self.cleanup_interval = cleanup_interval_minutes * 60
self.max_total_size_bytes = int(max_total_size_gb * 1024 * 1024 * 1024)
-
+
self._running = False
self._task: Optional[asyncio.Task] = None
self._protected_files: Set[str] = set()
self._tracked_files: dict = {} # filepath -> {created, ttl_minutes}
self._lock = threading.Lock()
- self._stats = {
- "files_cleaned": 0,
- "bytes_freed": 0,
- "cleanup_runs": 0
- }
-
+ self._stats = {"files_cleaned": 0, "bytes_freed": 0, "cleanup_runs": 0}
+
async def track_file(self, filepath: Path, ttl_minutes: int = 60):
"""Track a file for automatic cleanup after TTL expires"""
with self._lock:
self._tracked_files[str(filepath)] = {
"created": time.time(),
"ttl_minutes": ttl_minutes,
- "expires_at": time.time() + (ttl_minutes * 60)
+ "expires_at": time.time() + (ttl_minutes * 60),
}
-
+
def get_tracked_files(self) -> list:
"""Get list of currently tracked files with their status"""
now = time.time()
result = []
-
+
with self._lock:
for filepath, info in self._tracked_files.items():
remaining = info["expires_at"] - now
- result.append({
- "path": filepath,
- "exists": Path(filepath).exists(),
- "expires_in_seconds": max(0, int(remaining)),
- "ttl_minutes": info["ttl_minutes"]
- })
-
+ result.append(
+ {
+ "path": filepath,
+ "exists": Path(filepath).exists(),
+ "expires_in_seconds": max(0, int(remaining)),
+ "ttl_minutes": info["ttl_minutes"],
+ }
+ )
+
return result
-
+
async def cleanup_expired(self) -> int:
"""Cleanup expired tracked files"""
now = time.time()
cleaned = 0
to_remove = []
-
+
with self._lock:
for filepath, info in list(self._tracked_files.items()):
if now > info["expires_at"]:
to_remove.append(filepath)
-
+
for filepath in to_remove:
try:
path = Path(filepath)
@@ -91,55 +99,57 @@ class FileCleanupManager:
self._stats["files_cleaned"] += 1
self._stats["bytes_freed"] += size
logger.info(f"Cleaned expired file: {filepath}")
-
+
with self._lock:
self._tracked_files.pop(filepath, None)
-
+
except Exception as e:
logger.warning(f"Failed to clean expired file {filepath}: {e}")
-
+
return cleaned
-
+
def get_stats(self) -> dict:
"""Get cleanup statistics"""
disk_usage = self.get_disk_usage()
-
+
with self._lock:
tracked_count = len(self._tracked_files)
-
+
return {
"files_cleaned_total": self._stats["files_cleaned"],
- "bytes_freed_total_mb": round(self._stats["bytes_freed"] / (1024 * 1024), 2),
+ "bytes_freed_total_mb": round(
+ self._stats["bytes_freed"] / (1024 * 1024), 2
+ ),
"cleanup_runs": self._stats["cleanup_runs"],
"tracked_files": tracked_count,
"disk_usage": disk_usage,
- "is_running": self._running
+ "is_running": self._running,
}
-
+
def protect_file(self, filepath: Path):
"""Mark a file as protected (being processed)"""
with self._lock:
self._protected_files.add(str(filepath))
-
+
def unprotect_file(self, filepath: Path):
"""Remove protection from a file"""
with self._lock:
self._protected_files.discard(str(filepath))
-
+
def is_protected(self, filepath: Path) -> bool:
"""Check if a file is protected"""
with self._lock:
return str(filepath) in self._protected_files
-
+
async def start(self):
"""Start the cleanup background task"""
if self._running:
return
-
+
self._running = True
self._task = asyncio.create_task(self._cleanup_loop())
logger.info("File cleanup manager started")
-
+
async def stop(self):
"""Stop the cleanup background task"""
self._running = False
@@ -150,7 +160,7 @@ class FileCleanupManager:
except asyncio.CancelledError:
pass
logger.info("File cleanup manager stopped")
-
+
async def _cleanup_loop(self):
"""Background loop for periodic cleanup"""
while self._running:
@@ -160,88 +170,146 @@ class FileCleanupManager:
self._stats["cleanup_runs"] += 1
except Exception as e:
logger.error(f"Cleanup error: {e}")
-
+
await asyncio.sleep(self.cleanup_interval)
-
+
async def cleanup(self) -> dict:
- """Perform cleanup of old files"""
+ """Perform cleanup of old files and orphans"""
stats = {
"files_deleted": 0,
"bytes_freed": 0,
- "errors": []
+ "orphaned_deleted": 0,
+ "errors": [],
}
-
+
now = time.time()
-
- # Cleanup each directory
+
+ # Get tracked paths from Redis to identify orphans
+ tracked_paths = set()
+ redis_client = _get_async_redis()
+ redis_available = redis_client is not None
+ if redis_client:
+ try:
+ keys = await redis_client.keys(f"{KEY_PREFIX}:*")
+ for key in keys:
+ data = await redis_client.get(key)
+ if data:
+ metadata = json.loads(data)
+ if "file_path" in metadata:
+ # Normalize path to absolute string for comparison
+ path_str = str(Path(metadata["file_path"]).absolute())
+ tracked_paths.add(path_str)
+ except Exception as e:
+ logger.warning(f"Failed to fetch tracked paths from Redis: {e}")
+ redis_available = False
+ else:
+ logger.warning(
+ "Redis unavailable - orphan detection disabled, using age-based cleanup only"
+ )
+
+ # Cleanup each directory (collect files first to avoid race condition)
for directory in [self.upload_dir, self.output_dir, self.temp_dir]:
if not directory.exists():
continue
-
- for filepath in directory.iterdir():
+
+ try:
+ files_to_check = list(directory.iterdir())
+ except OSError as e:
+ logger.warning(f"Failed to list directory {directory}: {e}")
+ continue
+
+ for filepath in files_to_check:
if not filepath.is_file():
continue
-
+
# Skip protected files
if self.is_protected(filepath):
continue
-
+
try:
+ # Check if it's an orphan (only if Redis is available)
+ abs_path = str(filepath.absolute())
+ is_orphan = redis_available and abs_path not in tracked_paths
+
# Check file age
file_age = now - filepath.stat().st_mtime
-
- if file_age > self.max_file_age_seconds:
+
+ should_delete = False
+ reason = ""
+
+ if is_orphan:
+ should_delete = True
+ reason = "orphan"
+ elif file_age > self.max_file_age_seconds:
+ should_delete = True
+ reason = "expired"
+
+ if should_delete:
file_size = filepath.stat().st_size
filepath.unlink()
stats["files_deleted"] += 1
stats["bytes_freed"] += file_size
- logger.debug(f"Deleted old file: {filepath}")
-
+ if reason == "orphan":
+ stats["orphaned_deleted"] += 1
+ logger.info(f"Deleted {reason} file: {filepath}")
+
except Exception as e:
stats["errors"].append(str(e))
logger.warning(f"Failed to delete {filepath}: {e}")
-
+
# Force cleanup if total size exceeds limit
await self._enforce_size_limit(stats)
-
- if stats["files_deleted"] > 0:
- mb_freed = stats["bytes_freed"] / (1024 * 1024)
- logger.info(f"Cleanup: deleted {stats['files_deleted']} files, freed {mb_freed:.2f}MB")
-
+
+ mb_freed = stats["bytes_freed"] / (1024 * 1024)
+ cleanup_timestamp = datetime.now().isoformat()
+
+ # Structured logging (AC: #5)
+ log_data = {
+ "files_deleted": stats["files_deleted"],
+ "bytes_freed_mb": round(mb_freed, 2),
+ "orphaned_deleted": stats["orphaned_deleted"],
+ "cleanup_run_timestamp": cleanup_timestamp,
+ }
+
+ if _HAS_STRUCTLOG:
+ logger.info("cleanup_completed", **log_data)
+ else:
+ logger.info(f"Cleanup completed: {log_data}")
+
return stats
-
+
async def _enforce_size_limit(self, stats: dict):
"""Delete oldest files if total size exceeds limit"""
files_with_mtime = []
total_size = 0
-
+
for directory in [self.upload_dir, self.output_dir, self.temp_dir]:
if not directory.exists():
continue
-
+
for filepath in directory.iterdir():
if not filepath.is_file() or self.is_protected(filepath):
continue
-
+
try:
stat = filepath.stat()
files_with_mtime.append((filepath, stat.st_mtime, stat.st_size))
total_size += stat.st_size
except Exception:
pass
-
+
# If under limit, nothing to do
if total_size <= self.max_total_size_bytes:
return
-
+
# Sort by modification time (oldest first)
files_with_mtime.sort(key=lambda x: x[1])
-
+
# Delete oldest files until under limit
for filepath, _, size in files_with_mtime:
if total_size <= self.max_total_size_bytes:
break
-
+
try:
filepath.unlink()
total_size -= size
@@ -250,16 +318,16 @@ class FileCleanupManager:
logger.info(f"Deleted file to free space: {filepath}")
except Exception as e:
stats["errors"].append(str(e))
-
+
def get_disk_usage(self) -> dict:
"""Get current disk usage statistics"""
total_files = 0
total_size = 0
-
+
for directory in [self.upload_dir, self.output_dir, self.temp_dir]:
if not directory.exists():
continue
-
+
for filepath in directory.iterdir():
if filepath.is_file():
total_files += 1
@@ -267,55 +335,60 @@ class FileCleanupManager:
total_size += filepath.stat().st_size
except Exception:
pass
-
+
return {
"total_files": total_files,
"total_size_mb": round(total_size / (1024 * 1024), 2),
"max_size_gb": self.max_total_size_bytes / (1024 * 1024 * 1024),
- "usage_percent": round((total_size / self.max_total_size_bytes) * 100, 1) if self.max_total_size_bytes > 0 else 0,
+ "usage_percent": round((total_size / self.max_total_size_bytes) * 100, 1)
+ if self.max_total_size_bytes > 0
+ else 0,
"directories": {
"uploads": str(self.upload_dir),
"outputs": str(self.output_dir),
- "temp": str(self.temp_dir)
- }
+ "temp": str(self.temp_dir),
+ },
}
class MemoryMonitor:
"""Monitors memory usage and triggers cleanup if needed"""
-
+
def __init__(self, max_memory_percent: float = 80.0):
self.max_memory_percent = max_memory_percent
self._high_memory_callbacks = []
-
+
def get_memory_usage(self) -> dict:
"""Get current memory usage"""
try:
import psutil
+
process = psutil.Process()
memory_info = process.memory_info()
system_memory = psutil.virtual_memory()
-
+
return {
"process_rss_mb": round(memory_info.rss / (1024 * 1024), 2),
"process_vms_mb": round(memory_info.vms / (1024 * 1024), 2),
"system_total_gb": round(system_memory.total / (1024 * 1024 * 1024), 2),
- "system_available_gb": round(system_memory.available / (1024 * 1024 * 1024), 2),
- "system_percent": system_memory.percent
+ "system_available_gb": round(
+ system_memory.available / (1024 * 1024 * 1024), 2
+ ),
+ "system_percent": system_memory.percent,
}
except ImportError:
return {"error": "psutil not installed"}
except Exception as e:
return {"error": str(e)}
-
+
def check_memory(self) -> bool:
"""Check if memory usage is within limits"""
usage = self.get_memory_usage()
if "error" in usage:
return True # Can't check, assume OK
-
+
return usage.get("system_percent", 0) < self.max_memory_percent
-
+
def on_high_memory(self, callback):
"""Register callback for high memory situations"""
self._high_memory_callbacks.append(callback)
@@ -323,67 +396,75 @@ class MemoryMonitor:
class HealthChecker:
"""Comprehensive health checking for the application"""
-
- def __init__(self, cleanup_manager: FileCleanupManager, memory_monitor: MemoryMonitor):
+
+ def __init__(
+ self, cleanup_manager: FileCleanupManager, memory_monitor: MemoryMonitor
+ ):
self.cleanup_manager = cleanup_manager
self.memory_monitor = memory_monitor
self.start_time = datetime.now()
self._translation_count = 0
self._error_count = 0
self._lock = threading.Lock()
-
+
def record_translation(self, success: bool = True):
"""Record a translation attempt"""
with self._lock:
self._translation_count += 1
if not success:
self._error_count += 1
-
+
async def check_health(self) -> dict:
"""Get comprehensive health status (async version)"""
return self.get_health()
-
+
def get_health(self) -> dict:
"""Get comprehensive health status"""
memory = self.memory_monitor.get_memory_usage()
disk = self.cleanup_manager.get_disk_usage()
-
+
# Determine overall status
status = "healthy"
issues = []
-
+
if "error" not in memory:
if memory.get("system_percent", 0) > 90:
status = "degraded"
issues.append("High memory usage")
elif memory.get("system_percent", 0) > 80:
issues.append("Memory usage elevated")
-
+
if disk.get("usage_percent", 0) > 90:
status = "degraded"
issues.append("High disk usage")
elif disk.get("usage_percent", 0) > 80:
issues.append("Disk usage elevated")
-
+
uptime = datetime.now() - self.start_time
-
+
return {
"status": status,
"issues": issues,
"uptime_seconds": int(uptime.total_seconds()),
- "uptime_human": str(uptime).split('.')[0],
+ "uptime_human": str(uptime).split(".")[0],
"translations": {
"total": self._translation_count,
"errors": self._error_count,
"success_rate": round(
- ((self._translation_count - self._error_count) / self._translation_count * 100)
- if self._translation_count > 0 else 100, 1
- )
+ (
+ (self._translation_count - self._error_count)
+ / self._translation_count
+ * 100
+ )
+ if self._translation_count > 0
+ else 100,
+ 1,
+ ),
},
"memory": memory,
"disk": disk,
"cleanup_service": self.cleanup_manager.get_stats(),
- "timestamp": datetime.now().isoformat()
+ "timestamp": datetime.now().isoformat(),
}
@@ -394,7 +475,7 @@ def create_cleanup_manager(config) -> FileCleanupManager:
upload_dir=config.UPLOAD_DIR,
output_dir=config.OUTPUT_DIR,
temp_dir=config.TEMP_DIR,
- max_file_age_hours=getattr(config, 'MAX_FILE_AGE_HOURS', 1),
- cleanup_interval_minutes=getattr(config, 'CLEANUP_INTERVAL_MINUTES', 10),
- max_total_size_gb=getattr(config, 'MAX_TOTAL_SIZE_GB', 10.0)
+ max_file_age_minutes=getattr(config, "FILE_TTL_MINUTES", 60),
+ cleanup_interval_minutes=getattr(config, "CLEANUP_INTERVAL_MINUTES", 5),
+ max_total_size_gb=getattr(config, "MAX_TOTAL_SIZE_GB", 10.0),
)
diff --git a/middleware/error_handler.py b/middleware/error_handler.py
new file mode 100644
index 0000000..010e017
--- /dev/null
+++ b/middleware/error_handler.py
@@ -0,0 +1,107 @@
+"""
+Global Error Handling Middleware
+Catches all unhandled exceptions and standardizes API error responses.
+"""
+
+import logging
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from starlette.responses import Response, JSONResponse
+from fastapi import HTTPException
+from starlette.exceptions import HTTPException as StarletteHTTPException
+
+# Import APIKeyError for handling
+from middleware.api_key_auth import APIKeyError
+
+try:
+ import structlog
+
+ logger = structlog.get_logger(__name__)
+except ImportError:
+ logger = logging.getLogger(__name__)
+
+
+def format_error_response(
+ status_code: int,
+ message: str,
+ error_code: str = None,
+ details: dict = None,
+ request_id: str = "unknown",
+ headers: dict = None,
+) -> JSONResponse:
+ """
+ Standardizes the error response format.
+ Format: {error: "CODE", message: "...", details: {...}}
+ """
+ if not error_code:
+ error_code = _map_http_status_to_code(status_code)
+
+ content = {"error": error_code, "message": message, "details": details or {}}
+
+ # Always include request_id in details if not present
+ if "request_id" not in content["details"]:
+ content["details"]["request_id"] = request_id
+
+ return JSONResponse(status_code=status_code, content=content, headers=headers)
+
+
+def _map_http_status_to_code(status_code: int) -> str:
+ """Map HTTP status codes to architectural error codes."""
+ mapping = {
+ 400: "INVALID_FORMAT",
+ 401: "UNAUTHORIZED",
+ 403: "FORBIDDEN",
+ 404: "NOT_FOUND",
+ 405: "METHOD_NOT_ALLOWED",
+ 413: "FILE_TOO_LARGE",
+ 422: "VALIDATION_ERROR",
+ 429: "QUOTA_EXCEEDED",
+ 502: "PROVIDER_ERROR",
+ 503: "SERVICE_UNAVAILABLE",
+ }
+ return mapping.get(status_code, "INTERNAL_ERROR")
+
+
+class ErrorHandlingMiddleware(BaseHTTPMiddleware):
+ """
+ Catch all unhandled exceptions (Exception) that bubble up to the top.
+ Note: HTTPException is often caught by FastAPI handlers before reaching here.
+ """
+
+ async def dispatch(self, request: Request, call_next) -> Response:
+ try:
+ return await call_next(request)
+ except APIKeyError as e:
+ # Handle APIKeyError with structured response using to_response()
+ request_id = getattr(request.state, "request_id", "unknown")
+ logger.info(f"[{request_id}] API Key authentication error: {e.code}")
+ return e.to_response()
+ except Exception as e:
+ request_id = getattr(request.state, "request_id", "unknown")
+
+ # If it's already an HTTPException, we might want to handle it specifically if it leaked through
+ if isinstance(e, (HTTPException, StarletteHTTPException)):
+ detail = e.detail if hasattr(e, "detail") and e.detail else {}
+ if isinstance(detail, dict):
+ return format_error_response(
+ status_code=e.status_code,
+ message=detail.get("message", "Une erreur s'est produite."),
+ error_code=detail.get("error"),
+ request_id=request_id,
+ )
+ return format_error_response(
+ status_code=e.status_code,
+ message=str(detail) if detail else "Une erreur s'est produite.",
+ request_id=request_id,
+ )
+
+ # Log the full stack trace for internal debugging
+ logger.exception(f"[{request_id}] Unhandled internal exception: {str(e)}")
+
+ # Return generic error in French to user (AC4, AC5)
+ return format_error_response(
+ status_code=500,
+ message="Une erreur inattendue s'est produite. Veuillez réessayer plus tard.",
+ error_code="INTERNAL_ERROR",
+ request_id=request_id,
+ )
diff --git a/middleware/security.py b/middleware/security.py
index 271e778..4b266ef 100644
--- a/middleware/security.py
+++ b/middleware/security.py
@@ -116,27 +116,3 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
return request.client.host
return "unknown"
-
-
-class ErrorHandlingMiddleware(BaseHTTPMiddleware):
- """Catch all unhandled exceptions and return proper error responses"""
-
- async def dispatch(self, request: Request, call_next) -> Response:
- from starlette.responses import JSONResponse
-
- try:
- return await call_next(request)
-
- except Exception as e:
- request_id = getattr(request.state, 'request_id', 'unknown')
- logger.exception(f"[{request_id}] Unhandled exception: {str(e)}")
-
- # Don't expose internal errors in production
- return JSONResponse(
- status_code=500,
- content={
- "error": "internal_server_error",
- "message": "An unexpected error occurred. Please try again later.",
- "request_id": request_id
- }
- )
diff --git a/middleware/tier_quota.py b/middleware/tier_quota.py
new file mode 100644
index 0000000..1ff880f
--- /dev/null
+++ b/middleware/tier_quota.py
@@ -0,0 +1,180 @@
+"""
+Tier-based daily translation quota (Story 1.6).
+Uses Redis sliding-window daily counter per user; fallback in-memory when Redis unavailable.
+Coexists with IP-based rate limiting in rate_limiting.py.
+
+Source of truth: Redis (key per user per UTC date) is the authority for quota enforcement.
+User.daily_translation_count in DB is kept in sync on each successful translation for
+reporting/analytics; reset at midnight UTC is automatic in Redis (new key per day). DB
+reset can be done by a scheduled job at midnight UTC if needed.
+"""
+from __future__ import annotations
+
+import os
+import logging
+from dataclasses import dataclass
+from datetime import datetime, timezone, timedelta
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+# Free tier: 5 translations per day (UTC). Pro (and equivalent) tiers: no daily cap.
+FREE_TIER_DAILY_LIMIT = 5
+KEY_PREFIX = "rate_limit:daily"
+
+
+def _utc_date_str(dt: Optional[datetime] = None) -> str:
+ """Current date in UTC as YYYY-MM-DD."""
+ t = dt or datetime.now(timezone.utc)
+ return t.strftime("%Y-%m-%d")
+
+
+def _next_midnight_utc(dt: Optional[datetime] = None) -> datetime:
+ """Next midnight UTC after the given time (or now)."""
+ now = dt or datetime.now(timezone.utc)
+ tomorrow = (now.date() + timedelta(days=1))
+ return datetime(tomorrow.year, tomorrow.month, tomorrow.day, tzinfo=timezone.utc)
+
+
+def _seconds_until_midnight_utc(dt: Optional[datetime] = None) -> int:
+ """Seconds until next midnight UTC."""
+ now = dt or datetime.now(timezone.utc)
+ return max(0, int((_next_midnight_utc(now) - now).total_seconds()))
+
+
+@dataclass
+class QuotaResult:
+ """Result of a quota check."""
+ allowed: bool
+ remaining: int # -1 for pro (unlimited)
+ reset_at_utc: datetime
+ current_usage: int = 0
+ limit: int = FREE_TIER_DAILY_LIMIT
+
+
+# ---------------------------------------------------------------------------
+# Redis backend
+# ---------------------------------------------------------------------------
+
+_async_redis = None
+
+
+def _get_async_redis():
+ """Return async Redis client or None. Uses REDIS_URL from env. Single shared client."""
+ global _async_redis
+ if _async_redis is not None:
+ return _async_redis if _async_redis is not False else None
+ url = os.getenv("REDIS_URL", "").strip()
+ if not url:
+ _async_redis = False
+ return None
+ try:
+ import redis.asyncio as redis
+ _async_redis = redis.Redis.from_url(url, decode_responses=True)
+ logger.info("Tier quota: using Redis for daily quota")
+ return _async_redis
+ except Exception as e:
+ logger.warning("Tier quota: Redis unavailable (%s), using in-memory fallback", e)
+ _async_redis = False
+ return None
+
+
+# ---------------------------------------------------------------------------
+# In-memory fallback (per process; not shared across workers). Documented as fallback.
+# ---------------------------------------------------------------------------
+
+_memory_usage: dict[tuple[str, str], int] = {} # (user_id, date_utc_str) -> count
+
+
+def _memory_get(user_id: str, date_str: str) -> int:
+ return _memory_usage.get((user_id, date_str), 0)
+
+
+def _memory_incr(user_id: str, date_str: str) -> int:
+ key = (user_id, date_str)
+ _memory_usage[key] = _memory_usage.get(key, 0) + 1
+ return _memory_usage[key]
+
+
+# ---------------------------------------------------------------------------
+# TierQuotaService
+# ---------------------------------------------------------------------------
+
+class TierQuotaService:
+ """
+ Daily translation quota per user by tier.
+ Redis key pattern: rate_limit:daily:{user_id}:{YYYY-MM-DD}, TTL 25h.
+ If Redis is unavailable, uses in-memory dict (documented fallback).
+ """
+
+ def __init__(self):
+ self._redis = None # Lazy init on first use
+
+ def _redis_client(self):
+ if self._redis is None:
+ self._redis = _get_async_redis()
+ return self._redis
+
+ def _date_str(self, dt: Optional[datetime] = None) -> str:
+ return _utc_date_str(dt)
+
+ async def check_quota(self, user_id: str, tier: str) -> QuotaResult:
+ """
+ Check if user has quota for one more translation today (UTC).
+ tier "free" -> limit 5/day; "pro" (or equivalent) -> unlimited.
+ """
+ reset_at = _next_midnight_utc()
+ tier_lower = (tier or "free").lower()
+ if tier_lower in ("pro", "business", "enterprise", "starter"):
+ return QuotaResult(
+ allowed=True,
+ remaining=-1,
+ reset_at_utc=reset_at,
+ current_usage=0,
+ limit=0,
+ )
+ # Free tier
+ date_str = self._date_str()
+ redis_client = self._redis_client()
+ if redis_client:
+ try:
+ key = f"{KEY_PREFIX}:{user_id}:{date_str}"
+ count = await redis_client.get(key)
+ count = int(count or 0)
+ except Exception as e:
+ logger.warning("Tier quota Redis get failed: %s, using in-memory", e)
+ count = _memory_get(user_id, date_str)
+ else:
+ count = _memory_get(user_id, date_str)
+ remaining = max(0, FREE_TIER_DAILY_LIMIT - count)
+ return QuotaResult(
+ allowed=count < FREE_TIER_DAILY_LIMIT,
+ remaining=remaining,
+ reset_at_utc=reset_at,
+ current_usage=count,
+ limit=FREE_TIER_DAILY_LIMIT,
+ )
+
+ async def increment_on_success(self, user_id: str) -> None:
+ """Increment daily translation count for user (call after successful translation)."""
+ date_str = self._date_str()
+ redis_client = self._redis_client()
+ if redis_client:
+ try:
+ key = f"{KEY_PREFIX}:{user_id}:{date_str}"
+ pipe = redis_client.pipeline()
+ pipe.incr(key)
+ pipe.expire(key, 25 * 3600) # 25h so key expires after midnight UTC
+ await pipe.execute()
+ return
+ except Exception as e:
+ logger.warning("Tier quota Redis increment failed: %s, using in-memory", e)
+ _memory_incr(user_id, date_str)
+
+ def seconds_until_reset(self) -> int:
+ """Seconds until next midnight UTC (for Retry-After header)."""
+ return _seconds_until_midnight_utc()
+
+
+# Singleton for app use
+tier_quota_service = TierQuotaService()
diff --git a/middleware/validation.py b/middleware/validation.py
index d532be2..469e62c 100644
--- a/middleware/validation.py
+++ b/middleware/validation.py
@@ -2,10 +2,14 @@
Input Validation Module for SaaS robustness
Validates all user inputs before processing
"""
+
import re
import magic
+import ipaddress
+import socket
from pathlib import Path
-from typing import Optional, List, Set
+from urllib.parse import urlparse
+from typing import Optional, List, Set, Tuple
from fastapi import UploadFile, HTTPException
import logging
@@ -14,7 +18,13 @@ logger = logging.getLogger(__name__)
class ValidationError(Exception):
"""Custom validation error with user-friendly messages"""
- def __init__(self, message: str, code: str = "validation_error", details: Optional[dict] = None):
+
+ def __init__(
+ self,
+ message: str,
+ code: str = "validation_error",
+ details: Optional[dict] = None,
+ ):
self.message = message
self.code = code
self.details = details or {}
@@ -23,37 +33,46 @@ class ValidationError(Exception):
class ValidationResult:
"""Result of a validation check"""
- def __init__(self, is_valid: bool = True, errors: List[str] = None, warnings: List[str] = None, data: dict = None):
+
+ def __init__(
+ self,
+ is_valid: bool = True,
+ errors: Optional[List[str]] = None,
+ warnings: Optional[List[str]] = None,
+ data: Optional[dict] = None,
+ error_code: Optional[str] = None,
+ ):
self.is_valid = is_valid
self.errors = errors or []
self.warnings = warnings or []
self.data = data or {}
+ self.error_code = error_code
class FileValidator:
"""Validates uploaded files for security and compatibility"""
-
+
# Allowed MIME types mapped to extensions
ALLOWED_MIME_TYPES = {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
}
-
+
# Magic bytes for Office Open XML files (ZIP format)
OFFICE_MAGIC_BYTES = b"PK\x03\x04"
-
+
def __init__(
self,
max_size_mb: int = 50,
- allowed_extensions: Set[str] = None,
- scan_content: bool = True
+ allowed_extensions: Optional[Set[str]] = None,
+ scan_content: bool = True,
):
self.max_size_bytes = max_size_mb * 1024 * 1024
self.max_size_mb = max_size_mb
self.allowed_extensions = allowed_extensions or {".xlsx", ".docx", ".pptx"}
self.scan_content = scan_content
-
+
async def validate_async(self, file: UploadFile) -> ValidationResult:
"""
Validate an uploaded file asynchronously
@@ -62,77 +81,105 @@ class FileValidator:
errors = []
warnings = []
data = {}
-
+
try:
# Validate filename
if not file.filename:
- errors.append("Filename is required")
- return ValidationResult(is_valid=False, errors=errors)
-
+ errors.append("Le nom de fichier est requis")
+ return ValidationResult(
+ is_valid=False, errors=errors, error_code="missing_filename"
+ )
+
# Sanitize filename
try:
safe_filename = self._sanitize_filename(file.filename)
data["safe_filename"] = safe_filename
except ValidationError as e:
errors.append(str(e.message))
- return ValidationResult(is_valid=False, errors=errors)
-
+ return ValidationResult(
+ is_valid=False, errors=errors, error_code=e.code
+ )
+
# Validate extension
try:
extension = self._validate_extension(safe_filename)
data["extension"] = extension
except ValidationError as e:
errors.append(str(e.message))
- return ValidationResult(is_valid=False, errors=errors)
-
+ return ValidationResult(
+ is_valid=False, errors=errors, error_code=e.code
+ )
+
# Read file content for validation
content = await file.read()
await file.seek(0) # Reset for later processing
-
+
# Validate file size
file_size = len(content)
data["size_bytes"] = file_size
- data["size_mb"] = round(file_size / (1024*1024), 2)
-
+ data["size_mb"] = round(file_size / (1024 * 1024), 2)
+
if file_size > self.max_size_bytes:
- errors.append(f"File too large. Maximum size is {self.max_size_mb}MB, got {file_size / (1024*1024):.1f}MB")
- return ValidationResult(is_valid=False, errors=errors, data=data)
-
+ errors.append(
+ f"Fichier trop volumineux. La taille maximale est de {self.max_size_mb}Mo, "
+ f"vous avez envoye {file_size / (1024 * 1024):.1f}Mo"
+ )
+ return ValidationResult(
+ is_valid=False,
+ errors=errors,
+ data=data,
+ error_code="file_too_large",
+ )
+
if file_size == 0:
- errors.append("File is empty")
- return ValidationResult(is_valid=False, errors=errors, data=data)
-
+ errors.append("Le fichier est vide")
+ return ValidationResult(
+ is_valid=False, errors=errors, data=data, error_code="empty_file"
+ )
+
# Warn about large files
if file_size > self.max_size_bytes * 0.8:
- warnings.append(f"File is {data['size_mb']}MB, approaching the {self.max_size_mb}MB limit")
-
+ warnings.append(
+ f"Le fichier fait {data['size_mb']}Mo, approchant la limite de {self.max_size_mb}Mo"
+ )
+
# Validate magic bytes
if self.scan_content:
try:
self._validate_magic_bytes(content, extension)
except ValidationError as e:
errors.append(str(e.message))
- return ValidationResult(is_valid=False, errors=errors, data=data)
-
+ return ValidationResult(
+ is_valid=False, errors=errors, data=data, error_code=e.code
+ )
+
# Validate MIME type
try:
mime_type = self._detect_mime_type(content)
data["mime_type"] = mime_type
self._validate_mime_type(mime_type, extension)
except ValidationError as e:
- warnings.append(f"MIME type warning: {e.message}")
+ warnings.append(f"Avertissement MIME: {e.message}")
except Exception:
- warnings.append("Could not verify MIME type")
-
+ warnings.append("Impossible de verifier le type MIME")
+
data["original_filename"] = file.filename
-
- return ValidationResult(is_valid=True, errors=errors, warnings=warnings, data=data)
-
+
+ return ValidationResult(
+ is_valid=True, errors=errors, warnings=warnings, data=data
+ )
+
except Exception as e:
logger.error(f"Validation error: {str(e)}")
- errors.append(f"Validation failed: {str(e)}")
- return ValidationResult(is_valid=False, errors=errors, warnings=warnings, data=data)
-
+ errors.append(f"Erreur de validation: {str(e)}")
+ return ValidationResult(
+ is_valid=False,
+ errors=errors,
+ warnings=warnings,
+ data=data,
+ error_code="validation_error",
+ )
+
async def validate(self, file: UploadFile) -> dict:
"""
Validate an uploaded file
@@ -141,106 +188,107 @@ class FileValidator:
# Validate filename
if not file.filename:
raise ValidationError(
- "Filename is required",
- code="missing_filename"
+ "Le nom de fichier est requis", code="missing_filename"
)
-
+
# Sanitize filename
safe_filename = self._sanitize_filename(file.filename)
-
+
# Validate extension
extension = self._validate_extension(safe_filename)
-
+
# Read file content for validation
content = await file.read()
await file.seek(0) # Reset for later processing
-
+
# Validate file size
file_size = len(content)
if file_size > self.max_size_bytes:
raise ValidationError(
- f"File too large. Maximum size is {self.max_size_mb}MB, got {file_size / (1024*1024):.1f}MB",
+ f"Fichier trop volumineux. La taille maximale est de {self.max_size_mb}Mo, "
+ f"vous avez envoye {file_size / (1024 * 1024):.1f}Mo",
code="file_too_large",
- details={"max_mb": self.max_size_mb, "actual_mb": round(file_size / (1024*1024), 2)}
+ details={
+ "max_mb": self.max_size_mb,
+ "actual_mb": round(file_size / (1024 * 1024), 2),
+ },
)
-
+
if file_size == 0:
- raise ValidationError(
- "File is empty",
- code="empty_file"
- )
-
+ raise ValidationError("Le fichier est vide", code="empty_file")
+
# Validate magic bytes (file signature)
if self.scan_content:
self._validate_magic_bytes(content, extension)
-
+
# Validate MIME type
mime_type = self._detect_mime_type(content)
self._validate_mime_type(mime_type, extension)
-
+
return {
"original_filename": file.filename,
"safe_filename": safe_filename,
"extension": extension,
"size_bytes": file_size,
- "size_mb": round(file_size / (1024*1024), 2),
- "mime_type": mime_type
+ "size_mb": round(file_size / (1024 * 1024), 2),
+ "mime_type": mime_type,
}
-
+
def _sanitize_filename(self, filename: str) -> str:
"""Sanitize filename to prevent path traversal and other attacks"""
# Remove path components
filename = Path(filename).name
-
+
# Remove null bytes and control characters
- filename = re.sub(r'[\x00-\x1f\x7f-\x9f]', '', filename)
-
+ filename = re.sub(r"[\x00-\x1f\x7f-\x9f]", "", filename)
+
# Remove potentially dangerous characters
- filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
-
+ filename = re.sub(r'[<>:"/\\|?*]', "_", filename)
+
# Limit length
if len(filename) > 255:
- name, ext = filename.rsplit('.', 1) if '.' in filename else (filename, '')
- filename = name[:250] + ('.' + ext if ext else '')
-
+ name, ext = filename.rsplit(".", 1) if "." in filename else (filename, "")
+ filename = name[:250] + ("." + ext if ext else "")
+
# Ensure not empty after sanitization
- if not filename or filename.strip() == '':
- raise ValidationError(
- "Invalid filename",
- code="invalid_filename"
- )
-
+ if not filename or filename.strip() == "":
+ raise ValidationError("Nom de fichier invalide", code="invalid_filename")
+
return filename
-
+
def _validate_extension(self, filename: str) -> str:
"""Validate and return the file extension"""
- if '.' not in filename:
+ if "." not in filename:
raise ValidationError(
- f"File must have an extension. Supported: {', '.join(self.allowed_extensions)}",
+ f"Le fichier doit avoir une extension. Formats supportes : {', '.join(self.allowed_extensions)}",
code="missing_extension",
- details={"allowed_extensions": list(self.allowed_extensions)}
+ details={"allowed_extensions": list(self.allowed_extensions)},
)
-
- extension = '.' + filename.rsplit('.', 1)[1].lower()
-
+
+ extension = "." + filename.rsplit(".", 1)[1].lower()
+
if extension not in self.allowed_extensions:
raise ValidationError(
- f"File type '{extension}' not supported. Supported types: {', '.join(self.allowed_extensions)}",
+ f"Format de fichier '{extension}' non supporte. Formats acceptes : {', '.join(self.allowed_extensions)}",
code="unsupported_file_type",
- details={"extension": extension, "allowed_extensions": list(self.allowed_extensions)}
+ details={
+ "extension": extension,
+ "allowed_extensions": list(self.allowed_extensions),
+ },
)
-
+
return extension
-
+
def _validate_magic_bytes(self, content: bytes, extension: str):
"""Validate file magic bytes match expected format"""
# All supported formats are Office Open XML (ZIP-based)
if not content.startswith(self.OFFICE_MAGIC_BYTES):
raise ValidationError(
- "File content does not match expected format. The file may be corrupted or not a valid Office document.",
- code="invalid_file_content"
+ "Le contenu du fichier ne correspond pas au format attendu. "
+ "Le fichier est peut-etre corrompu ou n'est pas un document Office valide.",
+ code="invalid_file_content",
)
-
+
def _detect_mime_type(self, content: bytes) -> str:
"""Detect MIME type from file content"""
try:
@@ -251,77 +299,198 @@ class FileValidator:
if content.startswith(self.OFFICE_MAGIC_BYTES):
return "application/zip"
return "application/octet-stream"
-
+
def _validate_mime_type(self, mime_type: str, extension: str):
"""Validate MIME type matches extension"""
# Office Open XML files may be detected as ZIP
- allowed_mimes = list(self.ALLOWED_MIME_TYPES.keys()) + ["application/zip", "application/octet-stream"]
-
+ allowed_mimes = list(self.ALLOWED_MIME_TYPES.keys()) + [
+ "application/zip",
+ "application/octet-stream",
+ ]
+
if mime_type not in allowed_mimes:
raise ValidationError(
- f"Invalid file type detected. Expected Office document, got: {mime_type}",
+ f"Type de fichier invalide detecte. Document Office attendu, recu : {mime_type}",
code="invalid_mime_type",
- details={"detected_mime": mime_type}
+ details={"detected_mime": mime_type},
)
class LanguageValidator:
"""Validates language codes"""
-
+
SUPPORTED_LANGUAGES = {
# ISO 639-1 codes
- "af", "sq", "am", "ar", "hy", "az", "eu", "be", "bn", "bs",
- "bg", "ca", "ceb", "zh", "zh-CN", "zh-TW", "co", "hr", "cs",
- "da", "nl", "en", "eo", "et", "fi", "fr", "fy", "gl", "ka",
- "de", "el", "gu", "ht", "ha", "haw", "he", "hi", "hmn", "hu",
- "is", "ig", "id", "ga", "it", "ja", "jv", "kn", "kk", "km",
- "rw", "ko", "ku", "ky", "lo", "la", "lv", "lt", "lb", "mk",
- "mg", "ms", "ml", "mt", "mi", "mr", "mn", "my", "ne", "no",
- "ny", "or", "ps", "fa", "pl", "pt", "pa", "ro", "ru", "sm",
- "gd", "sr", "st", "sn", "sd", "si", "sk", "sl", "so", "es",
- "su", "sw", "sv", "tl", "tg", "ta", "tt", "te", "th", "tr",
- "tk", "uk", "ur", "ug", "uz", "vi", "cy", "xh", "yi", "yo",
- "zu", "auto"
+ "af",
+ "sq",
+ "am",
+ "ar",
+ "hy",
+ "az",
+ "eu",
+ "be",
+ "bn",
+ "bs",
+ "bg",
+ "ca",
+ "ceb",
+ "zh",
+ "zh-CN",
+ "zh-TW",
+ "co",
+ "hr",
+ "cs",
+ "da",
+ "nl",
+ "en",
+ "eo",
+ "et",
+ "fi",
+ "fr",
+ "fy",
+ "gl",
+ "ka",
+ "de",
+ "el",
+ "gu",
+ "ht",
+ "ha",
+ "haw",
+ "he",
+ "hi",
+ "hmn",
+ "hu",
+ "is",
+ "ig",
+ "id",
+ "ga",
+ "it",
+ "ja",
+ "jv",
+ "kn",
+ "kk",
+ "km",
+ "rw",
+ "ko",
+ "ku",
+ "ky",
+ "lo",
+ "la",
+ "lv",
+ "lt",
+ "lb",
+ "mk",
+ "mg",
+ "ms",
+ "ml",
+ "mt",
+ "mi",
+ "mr",
+ "mn",
+ "my",
+ "ne",
+ "no",
+ "ny",
+ "or",
+ "ps",
+ "fa",
+ "pl",
+ "pt",
+ "pa",
+ "ro",
+ "ru",
+ "sm",
+ "gd",
+ "sr",
+ "st",
+ "sn",
+ "sd",
+ "si",
+ "sk",
+ "sl",
+ "so",
+ "es",
+ "su",
+ "sw",
+ "sv",
+ "tl",
+ "tg",
+ "ta",
+ "tt",
+ "te",
+ "th",
+ "tr",
+ "tk",
+ "uk",
+ "ur",
+ "ug",
+ "uz",
+ "vi",
+ "cy",
+ "xh",
+ "yi",
+ "yo",
+ "zu",
+ "auto",
}
-
+
LANGUAGE_NAMES = {
- "en": "English", "es": "Spanish", "fr": "French", "de": "German",
- "it": "Italian", "pt": "Portuguese", "ru": "Russian", "zh": "Chinese",
- "zh-CN": "Chinese (Simplified)", "zh-TW": "Chinese (Traditional)",
- "ja": "Japanese", "ko": "Korean", "ar": "Arabic", "hi": "Hindi",
- "nl": "Dutch", "pl": "Polish", "tr": "Turkish", "sv": "Swedish",
- "da": "Danish", "no": "Norwegian", "fi": "Finnish", "cs": "Czech",
- "el": "Greek", "th": "Thai", "vi": "Vietnamese", "id": "Indonesian",
- "uk": "Ukrainian", "ro": "Romanian", "hu": "Hungarian", "auto": "Auto-detect"
+ "en": "English",
+ "es": "Spanish",
+ "fr": "French",
+ "de": "German",
+ "it": "Italian",
+ "pt": "Portuguese",
+ "ru": "Russian",
+ "zh": "Chinese",
+ "zh-CN": "Chinese (Simplified)",
+ "zh-TW": "Chinese (Traditional)",
+ "ja": "Japanese",
+ "ko": "Korean",
+ "ar": "Arabic",
+ "hi": "Hindi",
+ "nl": "Dutch",
+ "pl": "Polish",
+ "tr": "Turkish",
+ "sv": "Swedish",
+ "da": "Danish",
+ "no": "Norwegian",
+ "fi": "Finnish",
+ "cs": "Czech",
+ "el": "Greek",
+ "th": "Thai",
+ "vi": "Vietnamese",
+ "id": "Indonesian",
+ "uk": "Ukrainian",
+ "ro": "Romanian",
+ "hu": "Hungarian",
+ "auto": "Auto-detect",
}
-
+
@classmethod
def validate(cls, language_code: str, field_name: str = "language") -> str:
"""Validate and normalize language code"""
if not language_code:
- raise ValidationError(
- f"{field_name} is required",
- code="missing_language"
- )
-
+ raise ValidationError(f"{field_name} est requis", code="missing_language")
+
# Normalize
normalized = language_code.strip().lower()
-
+
# Handle common variations
if normalized in ["chinese", "cn"]:
normalized = "zh-CN"
elif normalized in ["chinese-traditional", "tw"]:
normalized = "zh-TW"
-
+
if normalized not in cls.SUPPORTED_LANGUAGES:
raise ValidationError(
- f"Unsupported language code: '{language_code}'. See /languages for supported codes.",
+ f"Code langue non supporte: '{language_code}'. Consultez /languages pour les codes supportes.",
code="unsupported_language",
- details={"language": language_code}
+ details={"language": language_code},
)
-
+
return normalized
-
+
@classmethod
def get_language_name(cls, code: str) -> str:
"""Get human-readable language name"""
@@ -330,104 +499,116 @@ class LanguageValidator:
class ProviderValidator:
"""Validates translation provider configuration"""
-
- SUPPORTED_PROVIDERS = {"google", "ollama", "deepl", "libre", "openai", "webllm", "openrouter"}
-
+
+ SUPPORTED_PROVIDERS = {
+ "google",
+ "ollama",
+ "deepl",
+ "libre",
+ "openai",
+ "webllm",
+ "openrouter",
+ "classic",
+ "llm",
+ }
+
@classmethod
def validate(cls, provider: str, **kwargs) -> dict:
"""Validate provider and its required configuration"""
if not provider:
raise ValidationError(
- "Translation provider is required",
- code="missing_provider"
+ "Le fournisseur de traduction est requis", code="missing_provider"
)
-
+
normalized = provider.strip().lower()
-
+
if normalized not in cls.SUPPORTED_PROVIDERS:
raise ValidationError(
- f"Unsupported provider: '{provider}'. Supported: {', '.join(cls.SUPPORTED_PROVIDERS)}",
+ f"Fournisseur non supporte: '{provider}'. Supportes: {', '.join(cls.SUPPORTED_PROVIDERS)}",
code="unsupported_provider",
- details={"provider": provider, "supported": list(cls.SUPPORTED_PROVIDERS)}
+ details={
+ "provider": provider,
+ "supported": list(cls.SUPPORTED_PROVIDERS),
+ },
)
-
+
# Provider-specific validation
if normalized == "deepl":
if not kwargs.get("deepl_api_key"):
raise ValidationError(
- "DeepL API key is required when using DeepL provider",
- code="missing_deepl_key"
+ "La cle API DeepL est requise pour utiliser le fournisseur DeepL",
+ code="missing_deepl_key",
)
-
+
elif normalized == "openai":
if not kwargs.get("openai_api_key"):
raise ValidationError(
- "OpenAI API key is required when using OpenAI provider",
- code="missing_openai_key"
+ "La cle API OpenAI est requise pour utiliser le fournisseur OpenAI",
+ code="missing_openai_key",
)
-
+
elif normalized == "ollama":
# Ollama doesn't require API key but may need model
model = kwargs.get("ollama_model", "")
if not model:
logger.warning("No Ollama model specified, will use default")
-
+
return {"provider": normalized, "validated": True}
class InputSanitizer:
"""Sanitizes user inputs to prevent injection attacks"""
-
+
@staticmethod
def sanitize_text(text: str, max_length: int = 10000) -> str:
"""Sanitize text input"""
if not text:
return ""
-
+
# Remove null bytes
- text = text.replace('\x00', '')
-
+ text = text.replace("\x00", "")
+
# Limit length
if len(text) > max_length:
text = text[:max_length]
-
+
return text.strip()
-
+
@staticmethod
def sanitize_language_code(code: str) -> str:
"""Sanitize and normalize language code"""
if not code:
return "auto"
-
+
# Remove dangerous characters, keep only alphanumeric and hyphen
- code = re.sub(r'[^a-zA-Z0-9\-]', '', code.strip())
-
+ code = re.sub(r"[^a-zA-Z0-9\-]", "", code.strip())
+
# Limit length
if len(code) > 10:
code = code[:10]
-
+
return code.lower() if code else "auto"
-
+
@staticmethod
def sanitize_url(url: str) -> str:
"""Sanitize URL input"""
if not url:
return ""
-
+
url = url.strip()
-
+
# Basic URL validation
- if not re.match(r'^https?://', url, re.IGNORECASE):
+ if not re.match(r"^https?://", url, re.IGNORECASE):
raise ValidationError(
- "Invalid URL format. Must start with http:// or https://",
- code="invalid_url"
+ "Format d'URL invalide. Doit commencer par http:// ou https://",
+ code="invalid_url",
)
-
+
# Remove trailing slashes
- url = url.rstrip('/')
-
+ url = url.rstrip("/")
+
return url
-
+
@staticmethod
def sanitize_api_key(key: str) -> str:
"""Sanitize API key (just trim, no logging)"""
@@ -436,5 +617,117 @@ class InputSanitizer:
return key.strip()
+class WebhookURLValidator:
+ """
+ Validator for webhook URLs with security checks.
+
+ Prevents SSRF attacks by blocking private IPs and localhost.
+ Story 3.7: Webhook - Spécification URL
+ """
+
+ # Allowed URL schemes
+ ALLOWED_SCHEMES = ("http", "https")
+
+ # Blocked hostnames
+ BLOCKED_HOSTNAMES = {"localhost", "127.0.0.1", "::1", "0.0.0.0"}
+
+ def __init__(
+ self,
+ allowed_schemes: Tuple[str, ...] = ALLOWED_SCHEMES,
+ block_private_ips: bool = True
+ ):
+ self.allowed_schemes = allowed_schemes
+ self.block_private_ips = block_private_ips
+
+ def validate(self, url: Optional[str]) -> Tuple[bool, Optional[str], Optional[dict]]:
+ """
+ Validate webhook URL format and security.
+
+ Args:
+ url: The webhook URL to validate (can be None or empty for optional parameter)
+
+ Returns:
+ Tuple of (is_valid, error_message, details)
+ """
+ # Empty or None URLs are valid (optional parameter)
+ if not url:
+ return True, None, None
+
+ try:
+ parsed = urlparse(url)
+
+ # Check scheme
+ if parsed.scheme.lower() not in self.allowed_schemes:
+ return False, (
+ f"L'URL doit utiliser {' ou '.join(self.allowed_schemes)}"
+ ), {
+ "field": "webhook_url",
+ "allowed_schemes": list(self.allowed_schemes),
+ "detected_scheme": parsed.scheme or "none"
+ }
+
+ # Check for credentials in URL
+ if parsed.username or parsed.password:
+ return False, (
+ "L'URL ne doit pas contenir d'identifiants (credentials)"
+ ), {"field": "webhook_url", "reason": "credentials_in_url"}
+
+ # Check hostname
+ hostname = parsed.hostname
+ if not hostname:
+ return False, (
+ "URL invalide: nom d'hôte manquant"
+ ), {"field": "webhook_url", "reason": "missing_hostname"}
+
+ # Block localhost and common local addresses
+ if hostname.lower() in self.BLOCKED_HOSTNAMES:
+ return False, (
+ "Les URLs localhost ne sont pas autorisées"
+ ), {"field": "webhook_url", "reason": "localhost_blocked"}
+
+ # Check for private IPs (SSRF protection)
+ if self.block_private_ips:
+ try:
+ # Try to parse as IP directly
+ try:
+ ip = ipaddress.ip_address(hostname)
+ if self._is_blocked_ip(ip):
+ return False, (
+ "Les adresses IP privées ne sont pas autorisées"
+ ), {"field": "webhook_url", "reason": "private_ip_blocked"}
+ except ValueError:
+ # Not an IP, try DNS resolution
+ ip_str = socket.gethostbyname(hostname)
+ ip = ipaddress.ip_address(ip_str)
+ if self._is_blocked_ip(ip):
+ return False, (
+ "Les adresses IP privées ne sont pas autorisées"
+ ), {"field": "webhook_url", "reason": "private_ip_blocked"}
+ except socket.gaierror:
+ # DNS resolution failed - let it through
+ # Will fail at webhook send time
+ pass
+ except Exception:
+ pass
+
+ return True, None, None
+
+ except Exception as e:
+ return False, (
+ f"Format d'URL invalide: {str(e)}"
+ ), {"field": "webhook_url", "error": str(e)}
+
+ def _is_blocked_ip(self, ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
+ """Check if IP is private, loopback, or link-local."""
+ return (
+ ip.is_private or
+ ip.is_loopback or
+ ip.is_link_local or
+ ip.is_reserved or
+ ip.is_multicast
+ )
+
+
# Default validators
file_validator = FileValidator()
+webhook_validator = WebhookURLValidator()
diff --git a/models/subscription.py b/models/subscription.py
index dd407bf..c26a2c3 100644
--- a/models/subscription.py
+++ b/models/subscription.py
@@ -1,10 +1,12 @@
"""
Subscription and User models for the monetization system
"""
-from pydantic import BaseModel, EmailStr, Field
+
+from pydantic import BaseModel, EmailStr, Field, field_validator
from typing import Optional, List, Dict, Any
-from datetime import datetime
+from datetime import datetime, timezone
from enum import Enum
+import re
class PlanType(str, Enum):
@@ -22,157 +24,209 @@ class SubscriptionStatus(str, Enum):
TRIALING = "trialing"
PAUSED = "paused"
+
import os
-# Plan definitions with limits
-# NOTE: Stripe price IDs should be set via environment variables in production
-# Create products and prices in Stripe Dashboard: https://dashboard.stripe.com/products
+# Plan definitions — Pricing reviewed March 2026
+# NOTE: Stripe price IDs must be set via env vars in production.
+# Create products/prices in Stripe Dashboard: https://dashboard.stripe.com/products
+#
+# LLM models used (via OpenRouter — March 2026):
+# Essentielle : deepseek/deepseek-v3.2 ($0.25 / $0.38 per 1M tokens)
+# Premium : anthropic/claude-3.5-haiku ($0.25 / $1.25 per 1M tokens)
+# or google/gemini-3-flash ($0.15 / $0.60)
PLANS = {
PlanType.FREE: {
- "name": "Free",
+ "name": "Gratuit",
"price_monthly": 0,
"price_yearly": 0,
- "docs_per_month": 3,
- "max_pages_per_doc": 10,
+ "docs_per_month": 5,
+ "max_pages_per_doc": 15,
"max_file_size_mb": 5,
- "providers": ["ollama"], # Only self-hosted
+ "max_chars_per_month": 50_000,
+ "providers": ["google"],
"features": [
- "3 documents per day",
- "Up to 10 pages per document",
- "Ollama (self-hosted) only",
- "Basic support via community",
+ "5 documents / mois",
+ "Jusqu'à 15 pages par document",
+ "Google Traduction inclus",
+ "Toutes les langues (130+)",
+ "Support communautaire",
],
+ "ai_translation": False,
"api_access": False,
"priority_processing": False,
"stripe_price_id_monthly": None,
"stripe_price_id_yearly": None,
+ "highlight": None,
+ "description": "Parfait pour découvrir l'application",
+ "badge": None,
},
PlanType.STARTER: {
"name": "Starter",
- "price_monthly": 12, # Updated pricing
- "price_yearly": 120, # 2 months free
+ "price_monthly": 7.99,
+ "price_yearly": 76.70, # -20 %
"docs_per_month": 50,
"max_pages_per_doc": 50,
- "max_file_size_mb": 25,
- "providers": ["ollama", "google", "libre"],
+ "max_file_size_mb": 10,
+ "max_chars_per_month": 500_000,
+ "providers": ["google", "deepl"],
"features": [
- "50 documents per month",
- "Up to 50 pages per document",
- "Google Translate included",
- "LibreTranslate included",
- "Email support",
+ "50 documents / mois",
+ "Jusqu'à 50 pages par document",
+ "Google Traduction + DeepL",
+ "Fichiers jusqu'à 10 Mo",
+ "Support par e-mail",
+ "Historique 30 jours",
],
+ "ai_translation": False,
"api_access": False,
"priority_processing": False,
"stripe_price_id_monthly": os.getenv("STRIPE_PRICE_STARTER_MONTHLY", ""),
"stripe_price_id_yearly": os.getenv("STRIPE_PRICE_STARTER_YEARLY", ""),
+ "highlight": None,
+ "description": "Pour les particuliers et petits projets",
+ "badge": None,
},
PlanType.PRO: {
"name": "Pro",
- "price_monthly": 39, # Updated pricing
- "price_yearly": 390, # 2 months free
+ "price_monthly": 19.99,
+ "price_yearly": 191.90, # -20 %
"docs_per_month": 200,
"max_pages_per_doc": 200,
- "max_file_size_mb": 100,
- "providers": ["ollama", "google", "deepl", "openai", "libre", "openrouter"],
+ "max_file_size_mb": 25,
+ "max_chars_per_month": 2_000_000,
+ "providers": ["google", "deepl", "openrouter"],
+ "ai_model_essential": "deepseek/deepseek-v3.2",
"features": [
- "200 documents per month",
- "Up to 200 pages per document",
- "All translation providers",
- "DeepL & OpenAI included",
- "API access (1000 calls/month)",
- "Priority email support",
+ "200 documents / mois",
+ "Jusqu'à 200 pages par document",
+ "Traduction IA Essentielle incluse (DeepSeek V3.2)",
+ "Google Traduction + DeepL",
+ "Fichiers jusqu'à 25 Mo",
+ "Glossaires personnalisés",
+ "Support prioritaire par e-mail",
+ "Historique 90 jours",
],
- "api_access": True,
- "api_calls_per_month": 1000,
+ "ai_translation": True,
+ "ai_tier": "essential",
+ "api_access": False,
"priority_processing": True,
"stripe_price_id_monthly": os.getenv("STRIPE_PRICE_PRO_MONTHLY", ""),
"stripe_price_id_yearly": os.getenv("STRIPE_PRICE_PRO_YEARLY", ""),
+ "highlight": "Le plus populaire",
+ "description": "Pour les professionnels et équipes en croissance",
+ "badge": "POPULAIRE",
},
PlanType.BUSINESS: {
"name": "Business",
- "price_monthly": 99, # Updated pricing
- "price_yearly": 990, # 2 months free
+ "price_monthly": 49.99,
+ "price_yearly": 479.90, # -20 %
"docs_per_month": 1000,
"max_pages_per_doc": 500,
- "max_file_size_mb": 250,
- "providers": ["ollama", "google", "deepl", "openai", "libre", "openrouter", "azure"],
+ "max_file_size_mb": 50,
+ "max_chars_per_month": 10_000_000,
+ "providers": ["google", "deepl", "openrouter", "openrouter_premium", "openai", "zai"],
+ "ai_model_essential": "deepseek/deepseek-v3.2",
+ "ai_model_premium": "anthropic/claude-3.5-haiku",
"features": [
- "1000 documents per month",
- "Up to 500 pages per document",
- "All translation providers",
- "Azure Translator included",
- "Unlimited API access",
- "Priority processing queue",
- "Dedicated support",
- "Team management (up to 5 users)",
+ "1 000 documents / mois",
+ "Jusqu'à 500 pages par document",
+ "Traduction IA Essentielle + Premium (Claude Haiku)",
+ "Tous les fournisseurs de traduction",
+ "Fichiers jusqu'à 50 Mo",
+ "Accès API (10 000 appels/mois)",
+ "Webhooks de notification",
+ "Glossaires + Prompts personnalisés",
+ "Support dédié",
+ "Historique 1 an",
+ "Analytiques avancées",
],
+ "ai_translation": True,
+ "ai_tier": "premium",
"api_access": True,
- "api_calls_per_month": -1, # Unlimited
+ "api_calls_per_month": 10_000,
"priority_processing": True,
"team_seats": 5,
"stripe_price_id_monthly": os.getenv("STRIPE_PRICE_BUSINESS_MONTHLY", ""),
"stripe_price_id_yearly": os.getenv("STRIPE_PRICE_BUSINESS_YEARLY", ""),
+ "highlight": None,
+ "description": "Pour les équipes et organisations",
+ "badge": None,
},
PlanType.ENTERPRISE: {
- "name": "Enterprise",
- "price_monthly": -1, # Custom
+ "name": "Entreprise",
+ "price_monthly": -1,
"price_yearly": -1,
- "docs_per_month": -1, # Unlimited
+ "docs_per_month": -1,
"max_pages_per_doc": -1,
"max_file_size_mb": -1,
- "providers": ["ollama", "google", "deepl", "openai", "libre", "openrouter", "azure", "custom"],
+ "max_chars_per_month": -1,
+ "providers": ["google", "deepl", "openrouter", "openrouter_premium", "openai", "zai", "custom"],
"features": [
- "Unlimited documents",
- "Unlimited pages",
- "Custom integrations",
- "On-premise deployment",
- "SLA guarantee",
- "24/7 dedicated support",
- "Custom AI models",
- "White-label option",
+ "Documents illimités",
+ "Tous les modèles IA (GPT-5, Claude Opus 4.6...)",
+ "Déploiement on-premise ou cloud dédié",
+ "SLA 99,9 % garanti",
+ "Support 24/7 dédié",
+ "Modèles IA personnalisés",
+ "Marque blanche (white-label)",
+ "Équipes illimitées",
+ "Intégrations sur mesure",
],
+ "ai_translation": True,
+ "ai_tier": "custom",
"api_access": True,
"api_calls_per_month": -1,
"priority_processing": True,
- "team_seats": -1, # Unlimited
- "stripe_price_id_monthly": None, # Contact sales
+ "team_seats": -1,
+ "stripe_price_id_monthly": None,
"stripe_price_id_yearly": None,
+ "highlight": None,
+ "description": "Solutions sur mesure pour grandes organisations",
+ "badge": "SUR DEVIS",
},
}
+def _utc_now() -> datetime:
+ """Return current UTC datetime."""
+ return datetime.now(timezone.utc)
+
+
class User(BaseModel):
id: str
email: EmailStr
name: str
password_hash: str
- created_at: datetime = Field(default_factory=datetime.utcnow)
- updated_at: datetime = Field(default_factory=datetime.utcnow)
+ created_at: datetime = Field(default_factory=_utc_now)
+ updated_at: datetime = Field(default_factory=_utc_now)
email_verified: bool = False
avatar_url: Optional[str] = None
-
+
# Subscription info
plan: PlanType = PlanType.FREE
subscription_status: SubscriptionStatus = SubscriptionStatus.ACTIVE
stripe_customer_id: Optional[str] = None
stripe_subscription_id: Optional[str] = None
subscription_ends_at: Optional[datetime] = None
-
+
# Usage tracking
docs_translated_this_month: int = 0
pages_translated_this_month: int = 0
api_calls_this_month: int = 0
- usage_reset_date: datetime = Field(default_factory=datetime.utcnow)
-
+ daily_translation_count: int = (
+ 0 # Daily count (reset at midnight UTC; synced with tier quota)
+ )
+ usage_reset_date: datetime = Field(default_factory=_utc_now)
+
# Extra credits (purchased separately)
extra_credits: int = 0 # Each credit = 1 page
-
+
# Settings
default_source_lang: str = "auto"
default_target_lang: str = "en"
default_provider: str = "google"
-
+
# Ollama self-hosted config
ollama_endpoint: Optional[str] = None
ollama_model: Optional[str] = None
@@ -180,8 +234,22 @@ class User(BaseModel):
class UserCreate(BaseModel):
email: EmailStr
- name: str
- password: str
+ name: str = Field(..., min_length=1, max_length=100)
+ password: str = Field(..., min_length=8)
+
+ @field_validator("password")
+ @classmethod
+ def validate_password_strength(cls, v: str) -> str:
+ """Validate password meets minimum security requirements."""
+ if len(v) < 8:
+ raise ValueError("Le mot de passe doit contenir au moins 8 caractères")
+ if not re.search(r"[A-Z]", v):
+ raise ValueError("Le mot de passe doit contenir au moins une majuscule")
+ if not re.search(r"[a-z]", v):
+ raise ValueError("Le mot de passe doit contenir au moins une minuscule")
+ if not re.search(r"[0-9]", v):
+ raise ValueError("Le mot de passe doit contenir au moins un chiffre")
+ return v
class UserLogin(BaseModel):
@@ -195,14 +263,14 @@ class UserResponse(BaseModel):
name: str
avatar_url: Optional[str] = None
plan: PlanType
+ tier: PlanType
subscription_status: SubscriptionStatus
docs_translated_this_month: int
pages_translated_this_month: int
api_calls_this_month: int
extra_credits: int
created_at: datetime
-
- # Plan limits for display
+
plan_limits: Dict[str, Any] = {}
@@ -216,7 +284,7 @@ class Subscription(BaseModel):
current_period_start: datetime
current_period_end: datetime
cancel_at_period_end: bool = False
- created_at: datetime = Field(default_factory=datetime.utcnow)
+ created_at: datetime = Field(default_factory=_utc_now)
class UsageRecord(BaseModel):
@@ -230,24 +298,51 @@ class UsageRecord(BaseModel):
provider: str
processing_time_seconds: float
credits_used: int
- created_at: datetime = Field(default_factory=datetime.utcnow)
+ created_at: datetime = Field(default_factory=_utc_now)
class CreditPurchase(BaseModel):
"""For buying extra credits (pay-per-use)"""
+
id: str
user_id: str
credits_amount: int
price_paid: float # in cents
stripe_payment_id: str
- created_at: datetime = Field(default_factory=datetime.utcnow)
+ created_at: datetime = Field(default_factory=_utc_now)
# Credit packages for purchase
CREDIT_PACKAGES = [
- {"credits": 50, "price": 5.00, "price_per_credit": 0.10, "stripe_price_id": "price_credits_50"},
- {"credits": 100, "price": 9.00, "price_per_credit": 0.09, "stripe_price_id": "price_credits_100", "popular": True},
- {"credits": 250, "price": 20.00, "price_per_credit": 0.08, "stripe_price_id": "price_credits_250"},
- {"credits": 500, "price": 35.00, "price_per_credit": 0.07, "stripe_price_id": "price_credits_500"},
- {"credits": 1000, "price": 60.00, "price_per_credit": 0.06, "stripe_price_id": "price_credits_1000"},
+ {
+ "credits": 50,
+ "price": 5.00,
+ "price_per_credit": 0.10,
+ "stripe_price_id": "price_credits_50",
+ },
+ {
+ "credits": 100,
+ "price": 9.00,
+ "price_per_credit": 0.09,
+ "stripe_price_id": "price_credits_100",
+ "popular": True,
+ },
+ {
+ "credits": 250,
+ "price": 20.00,
+ "price_per_credit": 0.08,
+ "stripe_price_id": "price_credits_250",
+ },
+ {
+ "credits": 500,
+ "price": 35.00,
+ "price_per_credit": 0.07,
+ "stripe_price_id": "price_credits_500",
+ },
+ {
+ "credits": 1000,
+ "price": 60.00,
+ "price_per_credit": 0.06,
+ "stripe_price_id": "price_credits_1000",
+ },
]
diff --git a/requirements.txt b/requirements.txt
index 10eda51..fbff7db 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,6 +9,7 @@ python-dotenv==1.0.0
pydantic==2.5.3
pydantic[email]==2.5.3
aiofiles==23.2.1
+httpx>=0.27.0
Pillow==10.2.0
matplotlib==3.8.2
pandas==2.1.4
@@ -16,19 +17,23 @@ requests==2.31.0
ipykernel==6.27.1
openai>=1.0.0
-# SaaS robustness dependencies
psutil==5.9.8
-python-magic-bin==0.4.14 # For Windows, use python-magic on Linux
+python-magic-bin==0.4.14
-# Authentication & Payments
PyJWT==2.8.0
passlib[bcrypt]==1.7.4
stripe==7.0.0
-# Session storage & caching (optional but recommended for production)
redis==5.0.1
-# Database (recommended for production)
+structlog>=24.1.0
+
sqlalchemy==2.0.25
-psycopg2-binary==2.9.9 # PostgreSQL driver
-alembic==1.13.1 # Database migrations
+alembic==1.13.1
+
+aiosqlite>=0.19.0
+asyncpg>=0.29.0
+greenlet>=3.0.0
+
+pytest>=7.0.0
+pytest-asyncio>=0.21.0
diff --git a/routes/admin_routes.py b/routes/admin_routes.py
new file mode 100644
index 0000000..52b5438
--- /dev/null
+++ b/routes/admin_routes.py
@@ -0,0 +1,977 @@
+"""
+Admin API v1 Endpoints
+All admin endpoints under /api/v1/admin/
+Story 3.5: API Versioning - Migrated from main.py
+"""
+
+import os
+import secrets
+import time
+import logging
+from datetime import datetime, timezone
+from typing import Optional, Literal
+
+from fastapi import APIRouter, Depends, Header, HTTPException, Form, Request, Query
+from fastapi.responses import JSONResponse
+from passlib.context import CryptContext
+from pydantic import BaseModel
+
+from config import config
+from models.subscription import PlanType, PLANS
+
+pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/admin", tags=["Admin"])
+
+ADMIN_USERNAME = os.getenv("ADMIN_USERNAME")
+ADMIN_PASSWORD_HASH = os.getenv("ADMIN_PASSWORD_HASH")
+ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
+if not ADMIN_PASSWORD_HASH and not ADMIN_PASSWORD:
+ ADMIN_PASSWORD = os.getenv("ADMIN_DEV_DEFAULT")
+
+_admin_token_secret = os.getenv("ADMIN_TOKEN_SECRET")
+if not _admin_token_secret:
+ _admin_token_secret = secrets.token_hex(32)
+ logger.critical(
+ "SECURITY: ADMIN_TOKEN_SECRET is not configured! Using an ephemeral random key. "
+ "ALL ADMIN SESSIONS WILL BE INVALIDATED ON EVERY RESTART. "
+ "Set ADMIN_TOKEN_SECRET in your .env file immediately."
+ )
+ADMIN_TOKEN_SECRET = _admin_token_secret
+
+REDIS_URL = os.getenv("REDIS_URL", "")
+_redis_client = None
+_memory_sessions: dict = {}
+
+# Brute-force protection: IP → (failed_count, first_fail_ts)
+_login_attempts: dict[str, tuple[int, float]] = {}
+_MAX_LOGIN_ATTEMPTS = 5
+_LOCKOUT_SECONDS = 300 # 5 minutes
+
+
+def get_redis_client():
+ global _redis_client
+ if _redis_client is None and REDIS_URL:
+ try:
+ import redis
+
+ _redis_client = redis.from_url(REDIS_URL, decode_responses=True)
+ _redis_client.ping()
+ logger.info("Connected to Redis for session storage")
+ except Exception as e:
+ logger.warning(f"Redis connection failed: {e}. Using in-memory sessions.")
+ _redis_client = False
+ return _redis_client if _redis_client else None
+
+
+def hash_password(password: str) -> str:
+ return pwd_context.hash(password)
+
+
+def verify_admin_password(password: str) -> bool:
+ if not ADMIN_PASSWORD_HASH and not ADMIN_PASSWORD:
+ return False
+ p = (password or "").strip()
+ if ADMIN_PASSWORD_HASH:
+ try:
+ return pwd_context.verify(p, ADMIN_PASSWORD_HASH)
+ except Exception:
+ return False
+ return p == (ADMIN_PASSWORD or "").strip()
+
+
+def _get_session_key(token: str) -> str:
+ return f"admin_session:{token}"
+
+
+def create_admin_token() -> str:
+ token = secrets.token_urlsafe(32)
+ expiry = int(time.time()) + (24 * 60 * 60)
+ redis_client = get_redis_client()
+ if redis_client:
+ try:
+ redis_client.setex(_get_session_key(token), 24 * 60 * 60, str(expiry))
+ except Exception as e:
+ logger.warning(f"Redis session save failed: {e}")
+ _memory_sessions[token] = expiry
+ else:
+ _memory_sessions[token] = expiry
+ return token
+
+
+def verify_admin_token(token: str) -> bool:
+ redis_client = get_redis_client()
+ if redis_client:
+ try:
+ expiry = redis_client.get(_get_session_key(token))
+ if expiry and int(expiry) > time.time():
+ return True
+ return False
+ except Exception as e:
+ logger.warning(f"Redis session check failed: {e}")
+ if token not in _memory_sessions:
+ return False
+ if time.time() > _memory_sessions[token]:
+ del _memory_sessions[token]
+ return False
+ return True
+
+
+def delete_admin_token(token: str):
+ redis_client = get_redis_client()
+ if redis_client:
+ try:
+ redis_client.delete(_get_session_key(token))
+ except Exception:
+ pass
+ if token in _memory_sessions:
+ del _memory_sessions[token]
+
+
+async def require_admin(authorization: Optional[str] = Header(None)) -> str:
+ if not ADMIN_USERNAME or (not ADMIN_PASSWORD_HASH and not ADMIN_PASSWORD):
+ raise HTTPException(
+ status_code=503, detail="Admin authentication not configured"
+ )
+ if not authorization:
+ raise HTTPException(status_code=401, detail="Authorization header required")
+ parts = authorization.split(" ")
+ if len(parts) != 2 or parts[0].lower() != "bearer":
+ raise HTTPException(
+ status_code=401, detail="Invalid authorization format. Use: Bearer "
+ )
+ token = parts[1]
+ if not verify_admin_token(token):
+ raise HTTPException(status_code=401, detail="Invalid or expired token")
+ return ADMIN_USERNAME
+
+
+class AdminLoginRequest(BaseModel):
+ password: str
+
+
+class AdminUpdateUserTierRequest(BaseModel):
+ plan: Literal["free", "starter", "pro", "business", "enterprise"]
+
+
+@router.post("/login")
+async def admin_login(request: AdminLoginRequest, req: Request):
+ """Admin login endpoint - Returns a bearer token for authenticated admin access"""
+ client_ip = req.client.host if req.client else "unknown"
+
+ # Brute-force protection
+ now = time.time()
+ attempts, first_fail = _login_attempts.get(client_ip, (0, now))
+ if attempts >= _MAX_LOGIN_ATTEMPTS:
+ elapsed = now - first_fail
+ if elapsed < _LOCKOUT_SECONDS:
+ remaining = int(_LOCKOUT_SECONDS - elapsed)
+ logger.warning(f"Admin login blocked (brute-force) for IP {client_ip}")
+ raise HTTPException(
+ status_code=429,
+ detail=f"Too many failed attempts. Try again in {remaining}s.",
+ headers={"Retry-After": str(remaining)},
+ )
+ else:
+ _login_attempts.pop(client_ip, None)
+
+ if not verify_admin_password(request.password):
+ count, first = _login_attempts.get(client_ip, (0, now))
+ _login_attempts[client_ip] = (count + 1, first if count > 0 else now)
+ logger.warning(f"Failed admin login attempt from {client_ip} ({count + 1}/{_MAX_LOGIN_ATTEMPTS})")
+ raise HTTPException(status_code=401, detail="Invalid credentials")
+
+ _login_attempts.pop(client_ip, None)
+ token = create_admin_token()
+ logger.info(f"Admin login successful from {client_ip}")
+ return {
+ "status": "success",
+ "access_token": token,
+ "token_type": "bearer",
+ "expires_in": 86400,
+ "message": "Login successful",
+ }
+
+
+@router.post("/logout")
+async def admin_logout(authorization: Optional[str] = Header(None)):
+ """Logout and invalidate admin token"""
+ if authorization:
+ parts = authorization.split(" ")
+ if len(parts) == 2 and parts[0].lower() == "bearer":
+ token = parts[1]
+ delete_admin_token(token)
+ logger.info("Admin logout successful")
+ return {"status": "success", "message": "Logged out"}
+
+
+@router.get("/verify")
+async def verify_admin_session(is_admin: bool = Depends(require_admin)):
+ """Verify admin token is still valid"""
+ return {"status": "valid", "authenticated": True}
+
+
+@router.get("/dashboard")
+async def get_admin_dashboard(is_admin: bool = Depends(require_admin)):
+ """Get comprehensive admin dashboard data"""
+ from middleware.cleanup import create_cleanup_manager
+ from middleware.rate_limiting import RateLimitManager, RateLimitConfig
+ from services.translation_service import _translation_cache
+
+ cleanup_manager = create_cleanup_manager(config)
+ rate_limit_config = RateLimitConfig(
+ requests_per_minute=int(os.getenv("RATE_LIMIT_PER_MINUTE", "30")),
+ requests_per_hour=int(os.getenv("RATE_LIMIT_PER_HOUR", "200")),
+ translations_per_minute=int(os.getenv("TRANSLATIONS_PER_MINUTE", "10")),
+ translations_per_hour=int(os.getenv("TRANSLATIONS_PER_HOUR", "50")),
+ max_concurrent_translations=int(os.getenv("MAX_CONCURRENT_TRANSLATIONS", "5")),
+ )
+ rate_limit_manager = RateLimitManager(rate_limit_config)
+
+ health_status = {
+ "status": "healthy",
+ "timestamp": datetime.now(timezone.utc).isoformat(),
+ }
+ cleanup_stats = cleanup_manager.get_stats()
+ rate_limit_stats = rate_limit_manager.get_stats()
+ tracked_files = cleanup_manager.get_tracked_files()
+
+ providers_status = {}
+ try:
+ from services.providers.google_provider import get_google_provider
+
+ google_health = get_google_provider().health_check()
+ providers_status["google"] = google_health.model_dump()
+ except Exception as e:
+ providers_status["google"] = {
+ "name": "google",
+ "available": False,
+ "error": str(e)[:100],
+ "last_check": None,
+ }
+
+ return {
+ "timestamp": health_status.get("timestamp"),
+ "status": health_status.get("status"),
+ "system": {"memory": {}, "disk": {}},
+ "providers": providers_status,
+ "cleanup": {**cleanup_stats, "tracked_files_count": len(tracked_files)},
+ "rate_limits": rate_limit_stats,
+ "config": {
+ "max_file_size_mb": config.MAX_FILE_SIZE_MB,
+ "supported_extensions": list(config.SUPPORTED_EXTENSIONS),
+ "translation_service": config.TRANSLATION_SERVICE,
+ },
+ }
+
+
+@router.get("/users")
+async def get_admin_users(is_admin: bool = Depends(require_admin)):
+ """Get all users with their usage stats"""
+ from services.auth_service import USE_DATABASE, DATABASE_AVAILABLE, load_users
+ from database.connection import get_sync_session
+ from database.models import ApiKey
+
+ users_list = []
+
+ with get_sync_session() as session:
+ if USE_DATABASE and DATABASE_AVAILABLE:
+ from database.models import User as DBUser
+
+ db_users = session.query(DBUser).order_by(DBUser.created_at.desc()).all()
+ for db_user in db_users:
+ plan = db_user.plan or "free"
+ plan_info = PLANS.get(plan, PLANS["free"])
+
+ active_api_keys = (
+ session.query(ApiKey)
+ .filter(ApiKey.user_id == db_user.id, ApiKey.is_active == True)
+ .all()
+ )
+ users_list.append(
+ {
+ "id": str(db_user.id),
+ "email": db_user.email or "",
+ "name": db_user.name or "",
+ "plan": plan,
+ "subscription_status": db_user.subscription_status or "active",
+ "docs_translated_this_month": db_user.docs_translated_this_month or 0,
+ "pages_translated_this_month": db_user.pages_translated_this_month or 0,
+ "extra_credits": db_user.extra_credits or 0,
+ "created_at": db_user.created_at.isoformat() if db_user.created_at else "",
+ "plan_limits": {
+ "docs_per_month": plan_info.get("docs_per_month", 0),
+ "max_pages_per_doc": plan_info.get("max_pages_per_doc", 0),
+ },
+ "api_keys_count": len(active_api_keys),
+ "api_key_ids": [key.id for key in active_api_keys],
+ }
+ )
+ else:
+ users_data = load_users()
+ for user_id, user_data in users_data.items():
+ plan = user_data.get("plan", "free")
+ plan_info = PLANS.get(plan, PLANS["free"])
+
+ active_api_keys = (
+ session.query(ApiKey)
+ .filter(ApiKey.user_id == user_id, ApiKey.is_active == True)
+ .all()
+ )
+ users_list.append(
+ {
+ "id": user_id,
+ "email": user_data.get("email", ""),
+ "name": user_data.get("name", ""),
+ "plan": plan,
+ "subscription_status": user_data.get("subscription_status", "active"),
+ "docs_translated_this_month": user_data.get("docs_translated_this_month", 0),
+ "pages_translated_this_month": user_data.get("pages_translated_this_month", 0),
+ "extra_credits": user_data.get("extra_credits", 0),
+ "created_at": user_data.get("created_at", ""),
+ "plan_limits": {
+ "docs_per_month": plan_info.get("docs_per_month", 0),
+ "max_pages_per_doc": plan_info.get("max_pages_per_doc", 0),
+ },
+ "api_keys_count": len(active_api_keys),
+ "api_key_ids": [key.id for key in active_api_keys],
+ }
+ )
+ users_list.sort(key=lambda x: x.get("created_at", ""), reverse=True)
+
+ return {"total": len(users_list), "users": users_list}
+
+
+@router.patch("/users/{user_id}")
+async def patch_admin_user_tier(
+ user_id: str,
+ body: AdminUpdateUserTierRequest,
+ is_admin: bool = Depends(require_admin),
+):
+ """Update a user's plan/tier - Admin only"""
+ from services.auth_service import get_user_by_id, update_user_plan
+
+ user = get_user_by_id(user_id)
+ if not user:
+ return JSONResponse(
+ status_code=404,
+ content={"error": "NOT_FOUND", "message": "User not found"},
+ )
+
+ updated = update_user_plan(user_id, body.plan)
+ if not updated:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_PLAN",
+ "message": "Invalid plan. Allowed: free, starter, pro, business, enterprise",
+ "details": {
+ "allowed": ["free", "starter", "pro", "business", "enterprise"]
+ },
+ },
+ )
+
+ plan_value = (
+ updated.plan.value if hasattr(updated.plan, "value") else str(updated.plan)
+ )
+ new_tier = (
+ "pro"
+ if updated.plan in (PlanType.PRO, PlanType.BUSINESS, PlanType.ENTERPRISE)
+ else "free"
+ )
+
+ logger.info(
+ "admin_tier_change",
+ extra={
+ "event": "admin_tier_change",
+ "target_user_id": user_id,
+ "new_tier": new_tier,
+ "new_plan": plan_value,
+ "admin_id": "admin_session",
+ "timestamp": datetime.now(timezone.utc).isoformat(),
+ },
+ )
+
+ return {
+ "data": {
+ "id": updated.id,
+ "email": updated.email,
+ "name": getattr(updated, "name", ""),
+ "plan": plan_value,
+ "tier": new_tier,
+ },
+ "meta": {},
+ }
+
+
+@router.get("/stats")
+async def get_admin_stats(is_admin: bool = Depends(require_admin)):
+ """Get comprehensive admin statistics"""
+ from services.auth_service import load_users
+ from services.translation_service import _translation_cache
+
+ users_data = load_users()
+
+ total_users = len(users_data)
+ plan_distribution = {}
+ total_docs_translated = 0
+ total_pages_translated = 0
+ active_users = 0
+
+ for user_data in users_data.values():
+ plan = user_data.get("plan", "free")
+ plan_distribution[plan] = plan_distribution.get(plan, 0) + 1
+
+ docs = user_data.get("docs_translated_this_month", 0)
+ pages = user_data.get("pages_translated_this_month", 0)
+ total_docs_translated += docs
+ total_pages_translated += pages
+
+ if docs > 0:
+ active_users += 1
+
+ cache_stats = _translation_cache.get_stats()
+
+ return {
+ "users": {
+ "total": total_users,
+ "active_this_month": active_users,
+ "by_plan": plan_distribution,
+ },
+ "translations": {
+ "docs_this_month": total_docs_translated,
+ "pages_this_month": total_pages_translated,
+ },
+ "cache": cache_stats,
+ "config": {
+ "translation_service": config.TRANSLATION_SERVICE,
+ "max_file_size_mb": config.MAX_FILE_SIZE_MB,
+ "supported_extensions": list(config.SUPPORTED_EXTENSIONS),
+ },
+ }
+
+
+@router.post("/cleanup/trigger")
+async def trigger_cleanup(is_admin: bool = Depends(require_admin)):
+ """Trigger manual cleanup of expired files"""
+ from middleware.cleanup import create_cleanup_manager
+
+ cleanup_manager = create_cleanup_manager(config)
+ try:
+ cleaned = await cleanup_manager.cleanup_expired()
+ return {
+ "status": "success",
+ "files_cleaned": cleaned,
+ "message": f"Cleaned up {cleaned} expired files",
+ }
+ except Exception as e:
+ logger.error(f"Manual cleanup failed: {str(e)}")
+ raise HTTPException(status_code=500, detail=f"Cleanup failed: {str(e)}")
+
+
+@router.get("/files/tracked")
+async def get_tracked_files(is_admin: bool = Depends(require_admin)):
+ """Get list of currently tracked files"""
+ from middleware.cleanup import create_cleanup_manager
+
+ cleanup_manager = create_cleanup_manager(config)
+ tracked = cleanup_manager.get_tracked_files()
+ return {"count": len(tracked), "files": tracked}
+
+
+@router.post("/config/provider")
+async def update_default_provider(
+ provider: str = Form(...),
+ is_admin: bool = Depends(require_admin),
+):
+ """Update the default translation provider"""
+ valid_providers = [
+ "google",
+ "deepl",
+ "openai",
+ "ollama",
+ "openrouter",
+ "zai",
+ "libre",
+ "classic",
+ "llm",
+ ]
+ if provider not in valid_providers:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Invalid provider. Must be one of: {valid_providers}",
+ )
+
+ config.TRANSLATION_SERVICE = provider
+
+ return {
+ "status": "success",
+ "message": f"Default provider updated to {provider}",
+ "provider": provider,
+ }
+
+
+class AdminRevokeApiKeyRequest(BaseModel):
+ reason: Optional[str] = None
+
+
+@router.delete("/api-keys/{key_id}")
+async def admin_revoke_api_key(
+ key_id: str,
+ body: Optional[AdminRevokeApiKeyRequest] = None,
+ admin_id: str = Depends(require_admin),
+):
+ """Revoke any user's API key - Admin only"""
+ from database.connection import get_sync_session
+ from database.models import ApiKey
+
+ revoke_reason = body.reason if body else None
+
+ with get_sync_session() as session:
+ api_key = (
+ session.query(ApiKey)
+ .filter(ApiKey.id == key_id, ApiKey.is_active == True)
+ .first()
+ )
+
+ if not api_key:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "API_KEY_NOT_FOUND",
+ "message": "Clé API non trouvée ou déjà révoquée",
+ },
+ )
+
+ owner_user_id = api_key.user_id
+
+ api_key.is_active = False
+ api_key.revoked_at = datetime.now(timezone.utc)
+ session.commit()
+
+ logger.info(
+ "admin_api_key_revoked",
+ extra={
+ "admin_id": admin_id,
+ "key_id": key_id,
+ "owner_user_id": owner_user_id,
+ "reason": revoke_reason,
+ "timestamp": datetime.now(timezone.utc).isoformat(),
+ },
+ )
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": {
+ "id": api_key.id,
+ "revoked": True,
+ "revoked_at": datetime.now(timezone.utc).isoformat(),
+ "owner_user_id": owner_user_id,
+ "reason": revoke_reason,
+ },
+ "meta": {},
+ },
+ )
+
+
+def _extract_error_code(error_message: Optional[str]) -> Optional[str]:
+ """Extract a short error code from error message for log display (NFR: no document content)."""
+ if not error_message or not error_message.strip():
+ return None
+ import re
+ m = re.search(r"\b([A-Z][A-Z0-9_]{2,})\b", error_message)
+ if m:
+ return m.group(1)
+ first = error_message.strip().split()[0] if error_message.strip() else ""
+ if first:
+ return first.upper()[:20]
+ return None
+
+
+@router.get("/logs")
+def get_admin_logs(
+ is_admin: str = Depends(require_admin),
+ level: str = Query(default="all", pattern="^(all|error|warning|info)$"),
+ search: str = Query(default="", max_length=200),
+ page: int = Query(default=1, ge=1),
+ per_page: int = Query(default=50, ge=1, le=200),
+):
+ """Get admin error logs from failed translations. No document content or original_filename exposed (NFR11, NFR16).
+ Search matches user_id and error_message (error codes typically appear in error_message)."""
+ from database.connection import get_sync_session
+ from database.models import Translation
+ from sqlalchemy import or_, desc
+
+ if level == "warning" or level == "info":
+ return {
+ "data": {
+ "logs": [],
+ "total": 0,
+ "page": page,
+ "per_page": per_page,
+ },
+ "meta": {"generated_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")},
+ }
+
+ with get_sync_session() as session:
+ base = session.query(Translation).filter(Translation.status == "failed")
+
+ if search and search.strip():
+ term = f"%{search.strip()}%"
+ base = base.filter(
+ or_(
+ Translation.user_id.ilike(term),
+ Translation.error_message.ilike(term),
+ )
+ )
+
+ total = base.count()
+ rows = (
+ base.order_by(desc(Translation.created_at))
+ .offset((page - 1) * per_page)
+ .limit(per_page)
+ .all()
+ )
+
+ def _ts(created_at):
+ if not created_at:
+ return ""
+ s = created_at.isoformat()
+ return s.replace("+00:00", "Z") if "+00:00" in s else s + "Z"
+
+ logs = [
+ {
+ "timestamp": _ts(t.created_at),
+ "level": "error",
+ "message": (t.error_message or "Translation failed").strip()[:500],
+ "user_id": t.user_id,
+ "error_code": _extract_error_code(t.error_message),
+ "provider": t.provider,
+ "file_type": t.file_type,
+ }
+ for t in rows
+ ]
+
+ return {
+ "data": {
+ "logs": logs,
+ "total": total,
+ "page": page,
+ "per_page": per_page,
+ },
+ "meta": {"generated_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")},
+ }
+
+
+SETTINGS_FILE = "data/provider_settings.json"
+
+
+class ProviderSettings(BaseModel):
+ enabled: bool = False
+ api_key: Optional[str] = None
+ base_url: Optional[str] = None
+ model: Optional[str] = None
+ timeout: int = 30
+ max_retries: int = 3
+
+
+class SettingsConfig(BaseModel):
+ google: ProviderSettings = ProviderSettings(enabled=True)
+ deepl: ProviderSettings = ProviderSettings()
+ openai: ProviderSettings = ProviderSettings()
+ ollama: ProviderSettings = ProviderSettings() # dev-only in UI
+ openrouter: ProviderSettings = ProviderSettings() # "Traduction IA Essentielle"
+ openrouter_premium: ProviderSettings = ProviderSettings() # "Traduction IA Premium"
+ zai: ProviderSettings = ProviderSettings()
+ fallback_chain: str = "google,deepl,openai,ollama,openrouter,openrouter_premium,zai"
+ fallback_chain_classic: str = "google,deepl"
+ fallback_chain_llm: str = "openrouter,openrouter_premium,openai,zai,ollama"
+
+
+def load_settings() -> SettingsConfig:
+ try:
+ import json
+ from pathlib import Path
+
+ settings_path = Path(SETTINGS_FILE)
+ if settings_path.exists():
+ with open(settings_path) as f:
+ data = json.load(f)
+ return SettingsConfig(**data)
+ except Exception as e:
+ logger.warning(f"Failed to load settings: {e}")
+ return SettingsConfig()
+
+
+def save_settings(settings: SettingsConfig):
+ import json
+ from pathlib import Path
+
+ settings_path = Path(SETTINGS_FILE)
+ settings_path.parent.mkdir(exist_ok=True)
+ with open(settings_path, "w") as f:
+ json.dump(settings.model_dump(), f, indent=2)
+
+
+@router.get("/settings")
+async def get_settings(admin_id: str = Depends(require_admin)):
+ settings = load_settings()
+
+ # Merge env-var values into provider configs when JSON has no value.
+ # Env vars fill models/URLs; API keys are never exposed (only hinted via env_info).
+ # If an admin explicitly saves a value in the UI, JSON takes priority.
+ def _merge_env(
+ provider_settings: ProviderSettings,
+ key_env: str = "",
+ model_env: str = "",
+ url_env: str = "",
+ default_model: str = "",
+ default_url: str = "",
+ ) -> dict:
+ d = provider_settings.model_dump()
+ # Model: env var > JSON null > code default
+ if model_env and not d.get("model"):
+ d["model"] = os.getenv(model_env, "").strip() or default_model or None
+ elif not d.get("model") and default_model:
+ d["model"] = default_model
+ # Base URL: env var > JSON null > code default
+ if url_env and not d.get("base_url"):
+ d["base_url"] = os.getenv(url_env, "").strip() or default_url or None
+ elif not d.get("base_url") and default_url:
+ d["base_url"] = default_url
+ # API key: never expose; leave empty (UI shows "clé dans .env" badge via env_info)
+ return d
+
+ payload = settings.model_dump()
+ # Essentielle : DeepSeek V3.2 — meilleur rapport qualité/prix (mars 2026)
+ payload["openrouter"] = _merge_env(settings.openrouter, key_env="OPENROUTER_API_KEY", model_env="OPENROUTER_MODEL", default_model="deepseek/deepseek-v3.2")
+ # Premium : Claude 3.5 Haiku — précision maximale sur documents complexes
+ payload["openrouter_premium"] = _merge_env(settings.openrouter_premium, key_env="OPENROUTER_API_KEY", model_env="OPENROUTER_PREMIUM_MODEL", default_model="anthropic/claude-3.5-haiku")
+ payload["openai"] = _merge_env(settings.openai, key_env="OPENAI_API_KEY", model_env="OPENAI_MODEL", default_model="gpt-4o-mini")
+ payload["deepl"] = _merge_env(settings.deepl, key_env="DEEPL_API_KEY")
+ payload["zai"] = _merge_env(settings.zai, key_env="ZAI_API_KEY", model_env="ZAI_MODEL", url_env="ZAI_BASE_URL", default_model="grok-2-1212", default_url="https://api.x.ai/v1")
+ payload["ollama"] = _merge_env(settings.ollama, url_env="OLLAMA_BASE_URL", model_env="OLLAMA_MODEL", default_url="http://localhost:11434", default_model="llama3")
+
+ # Inform the frontend which providers have API keys configured via env vars
+ # (boolean only — never expose actual values)
+ has_openrouter = bool(os.getenv("OPENROUTER_API_KEY", "").strip())
+ env_info = {
+ "deepl": bool(os.getenv("DEEPL_API_KEY", "").strip()),
+ "openai": bool(os.getenv("OPENAI_API_KEY", "").strip()),
+ "openrouter": has_openrouter,
+ "openrouter_premium": has_openrouter, # same key, different model
+ "zai": bool(os.getenv("ZAI_API_KEY", "").strip()),
+ "ollama": bool(os.getenv("OLLAMA_BASE_URL", "").strip()),
+ }
+ return JSONResponse(
+ status_code=200,
+ content={"data": payload, "env_info": env_info, "meta": {}},
+ )
+
+
+@router.put("/settings")
+async def update_settings(
+ settings: SettingsConfig, admin_id: str = Depends(require_admin)
+):
+ save_settings(settings)
+ logger.info(f"admin_settings_updated by {admin_id}")
+ return JSONResponse(
+ status_code=200, content={"data": settings.model_dump(), "meta": {}}
+ )
+
+
+@router.post("/providers/{provider}/test")
+async def test_provider(provider: str, admin_id: str = Depends(require_admin)):
+ """Test a provider connection. Works even when provider is disabled.
+ Always falls back to env vars when the JSON api_key is empty."""
+ settings = load_settings()
+
+ provider_config = getattr(settings, provider, None)
+ if not provider_config:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "PROVIDER_NOT_FOUND",
+ "message": f"Provider {provider} not found",
+ },
+ )
+
+ # Helper: resolve api_key from JSON config first, then env var
+ def _key(json_val: Optional[str], env_var: str) -> str:
+ return (json_val or "").strip() or os.getenv(env_var, "").strip()
+
+ try:
+ if provider == "google":
+ from deep_translator import GoogleTranslator
+
+ result = GoogleTranslator(source="auto", target="en").translate("bonjour")
+ return JSONResponse(
+ status_code=200, content={"available": True, "test_result": result}
+ )
+
+ elif provider == "deepl":
+ api_key = _key(provider_config.api_key, "DEEPL_API_KEY")
+ if not api_key:
+ return JSONResponse(
+ status_code=400,
+ content={"available": False, "error": "Aucune clé API DeepL trouvée (JSON ou .env)"},
+ )
+ import deepl
+
+ translator = deepl.Translator(api_key)
+ usage = translator.get_usage()
+ return JSONResponse(
+ status_code=200, content={"available": True, "usage": str(usage)}
+ )
+
+ elif provider == "openai":
+ api_key = _key(provider_config.api_key, "OPENAI_API_KEY")
+ if not api_key:
+ return JSONResponse(
+ status_code=400,
+ content={"available": False, "error": "Aucune clé API OpenAI trouvée (JSON ou .env)"},
+ )
+ import openai as _openai
+
+ client = _openai.OpenAI(api_key=api_key)
+ models = list(client.models.list())
+ return JSONResponse(
+ status_code=200,
+ content={"available": True, "models_count": len(models)},
+ )
+
+ elif provider == "ollama":
+ import requests as _requests
+
+ base_url = (provider_config.base_url or "").strip() or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+ resp = _requests.get(f"{base_url}/api/tags", timeout=5)
+ if resp.ok:
+ return JSONResponse(
+ status_code=200,
+ content={
+ "available": True,
+ "models": resp.json().get("models", []),
+ },
+ )
+ return JSONResponse(
+ status_code=500, content={"available": False, "error": str(resp.text)}
+ )
+
+ elif provider in ("openrouter", "openrouter_premium"):
+ api_key = _key(provider_config.api_key, "OPENROUTER_API_KEY")
+ if not api_key:
+ # openrouter_premium shares the same key as openrouter
+ api_key = os.getenv("OPENROUTER_API_KEY", "").strip()
+ if not api_key:
+ return JSONResponse(
+ status_code=400,
+ content={"available": False, "error": "Aucune clé API OpenRouter trouvée (JSON ou .env)"},
+ )
+ import requests as _requests
+
+ resp = _requests.get(
+ "https://openrouter.ai/api/v1/auth/key",
+ headers={"Authorization": f"Bearer {api_key}"},
+ timeout=10,
+ )
+ if resp.ok:
+ data = resp.json()
+ return JSONResponse(
+ status_code=200,
+ content={"available": True, "label": data.get("data", {}).get("label", "OK")},
+ )
+ return JSONResponse(
+ status_code=500, content={"available": False, "error": f"HTTP {resp.status_code}: {resp.text[:200]}"}
+ )
+
+ elif provider == "zai":
+ api_key = _key(provider_config.api_key, "ZAI_API_KEY")
+ if not api_key:
+ return JSONResponse(
+ status_code=400,
+ content={"available": False, "error": "Aucune clé API xAI trouvée (JSON ou .env)"},
+ )
+ import openai as _openai
+
+ base_url = (provider_config.base_url or "").strip() or os.getenv("ZAI_BASE_URL", "https://api.x.ai/v1")
+ client = _openai.OpenAI(api_key=api_key, base_url=base_url)
+ try:
+ models = list(client.models.list())
+ return JSONResponse(
+ status_code=200,
+ content={
+ "available": True,
+ "models_count": len(models),
+ "sample_models": [m.id for m in models[:5]],
+ },
+ )
+ except _openai.AuthenticationError:
+ return JSONResponse(
+ status_code=401,
+ content={"available": False, "error": "Clé xAI invalide"},
+ )
+
+ else:
+ return JSONResponse(
+ status_code=404,
+ content={"available": False, "error": "Provider inconnu"},
+ )
+
+ except Exception as e:
+ logger.error(f"Provider test failed: {e}")
+ return JSONResponse(
+ status_code=500, content={"available": False, "error": str(e)}
+ )
+
+
+@router.get("/providers/ollama/models")
+async def list_ollama_models(admin_id: str = Depends(require_admin)):
+ """List available models from Ollama server"""
+ import requests
+ from config import config as app_config
+
+ settings = load_settings()
+ base_url = (
+ settings.ollama.base_url
+ or app_config.OLLAMA_BASE_URL
+ or "http://localhost:11434"
+ )
+
+ try:
+ response = requests.get(f"{base_url}/api/tags", timeout=5)
+ if response.ok:
+ data = response.json()
+ models = []
+ for model in data.get("models", []):
+ models.append(
+ {
+ "name": model.get("name", ""),
+ "size": model.get("size", 0),
+ "modified_at": model.get("modified_at", ""),
+ }
+ )
+ return JSONResponse(status_code=200, content={"data": models, "meta": {}})
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "OLLAMA_UNAVAILABLE",
+ "message": f"Ollama returned: {response.text}",
+ },
+ )
+ except requests.exceptions.ConnectionError:
+ return JSONResponse(
+ status_code=503,
+ content={
+ "error": "OLLAMA_CONNECTION_ERROR",
+ "message": f"Cannot connect to Ollama at {base_url}",
+ },
+ )
+ except Exception as e:
+ logger.error(f"List Ollama models failed: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={"error": "INTERNAL_ERROR", "message": str(e)},
+ )
diff --git a/routes/api_key_routes.py b/routes/api_key_routes.py
new file mode 100644
index 0000000..d3e2116
--- /dev/null
+++ b/routes/api_key_routes.py
@@ -0,0 +1,330 @@
+"""
+API Key management routes for Pro users
+Story 3.1: Modèle API Key & Génération
+"""
+
+import hashlib
+import logging
+import secrets
+from datetime import datetime, timezone
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+from fastapi import APIRouter, Depends, Request, HTTPException
+from fastapi.responses import JSONResponse
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from pydantic import BaseModel, Field
+
+from services.auth_service import verify_token, get_user_by_id
+from database.connection import get_sync_session
+from database.models import ApiKey
+
+router = APIRouter(prefix="/api/v1/api-keys", tags=["API Keys v1"])
+security = HTTPBearer(auto_error=False)
+
+MAX_API_KEYS_PER_USER = 10
+
+
+class ApiKeyCreateRequest(BaseModel):
+ name: Optional[str] = Field(default="Default API Key", max_length=100)
+
+
+class ApiKeyResponse(BaseModel):
+ id: str
+ key: str
+ name: str
+ key_prefix: str
+ created_at: str
+
+
+class ProUser:
+ """Wrapper for authenticated Pro user with tier info"""
+
+ def __init__(self, user):
+ self._user = user
+ self.id = user.id
+ self.email = getattr(user, "email", None)
+ self._tier = None
+
+ @property
+ def tier(self) -> str:
+ if self._tier is None:
+ user_tier = getattr(self._user, "tier", None)
+ if user_tier:
+ self._tier = user_tier
+ else:
+ plan_value = getattr(self._user, "plan", None)
+ if plan_value and hasattr(plan_value, "value"):
+ if plan_value.value in ("pro", "business", "enterprise"):
+ self._tier = "pro"
+ else:
+ self._tier = "free"
+ else:
+ self._tier = "free"
+ return self._tier
+
+
+def _require_auth(
+ credentials: Optional[HTTPAuthorizationCredentials] = Depends(security),
+):
+ """Dependency that requires a valid JWT token (any authenticated user)"""
+ if not credentials:
+ raise HTTPException(
+ status_code=401,
+ detail={
+ "error": "UNAUTHORIZED",
+ "message": "Authentification requise",
+ },
+ )
+
+ payload = verify_token(credentials.credentials)
+ if not payload:
+ raise HTTPException(
+ status_code=401,
+ detail={
+ "error": "UNAUTHORIZED",
+ "message": "Token invalide ou expiré",
+ },
+ )
+
+ sub = payload.get("sub")
+ if not sub or not isinstance(sub, str):
+ raise HTTPException(
+ status_code=401,
+ detail={
+ "error": "UNAUTHORIZED",
+ "message": "Token invalide",
+ },
+ )
+
+ user = get_user_by_id(sub)
+ if not user:
+ raise HTTPException(
+ status_code=401,
+ detail={
+ "error": "UNAUTHORIZED",
+ "message": "Utilisateur non trouvé",
+ },
+ )
+
+ return user
+
+
+def _require_pro_user(user=Depends(_require_auth)) -> ProUser:
+ """Dependency that requires a valid Pro user JWT token"""
+ pro_user = ProUser(user)
+
+ if pro_user.tier != "pro":
+ raise HTTPException(
+ status_code=403,
+ detail={
+ "error": "PRO_FEATURE_REQUIRED",
+ "message": "Cette fonctionnalité nécessite un abonnement Pro",
+ },
+ )
+
+ return pro_user
+
+
+def _generate_api_key() -> tuple[str, str, str]:
+ """
+ Generate a secure API key with sk_live_ prefix.
+
+ Returns:
+ tuple: (raw_key, key_hash, key_prefix)
+ """
+ raw_random = secrets.token_urlsafe(32)
+ raw_key = f"sk_live_{raw_random}"
+ key_hash = hashlib.sha256(raw_key.encode()).hexdigest()
+ key_prefix = raw_key[:8]
+
+ return raw_key, key_hash, key_prefix
+
+
+@router.post("")
+async def create_api_key(
+ request: Request,
+ body: Optional[ApiKeyCreateRequest] = None,
+ user: ProUser = Depends(_require_pro_user),
+):
+ """
+ Create a new API key for the authenticated Pro user.
+
+ Returns:
+ 201: API key created successfully (key shown ONCE)
+ 401: Authentication required
+ 403: Pro subscription required
+ 429: Maximum API keys reached
+ """
+ key_name = body.name if body and body.name else "Default API Key"
+
+ raw_key, key_hash, key_prefix = _generate_api_key()
+
+ with get_sync_session() as session:
+ existing_count = (
+ session.query(ApiKey)
+ .filter(
+ ApiKey.user_id == user.id,
+ ApiKey.is_active == True,
+ )
+ .count()
+ )
+
+ if existing_count >= MAX_API_KEYS_PER_USER:
+ return JSONResponse(
+ status_code=429,
+ content={
+ "error": "API_KEY_LIMIT_REACHED",
+ "message": f"Maximum de {MAX_API_KEYS_PER_USER} clés API atteint. Supprimez une clé existante.",
+ },
+ )
+
+ api_key = ApiKey(
+ user_id=user.id,
+ name=key_name,
+ key_hash=key_hash,
+ key_prefix=key_prefix,
+ is_active=True,
+ scopes=["translate"],
+ created_at=datetime.now(timezone.utc),
+ )
+ session.add(api_key)
+ session.commit()
+ session.refresh(api_key)
+
+ return JSONResponse(
+ status_code=201,
+ content={
+ "data": {
+ "id": api_key.id,
+ "key": raw_key,
+ "name": api_key.name,
+ "key_prefix": api_key.key_prefix,
+ "created_at": api_key.created_at.isoformat()
+ if api_key.created_at
+ else None,
+ },
+ "meta": {},
+ },
+ )
+
+
+@router.get("")
+async def list_api_keys(
+ request: Request,
+ user: ProUser = Depends(_require_pro_user),
+):
+ """
+ List all API keys for the authenticated Pro user.
+
+ Note: Keys are returned without the secret (only prefix visible).
+
+ Returns:
+ 200: List of API keys
+ 401: Authentication required
+ 403: Pro subscription required
+ """
+ with get_sync_session() as session:
+ keys = (
+ session.query(ApiKey)
+ .filter(ApiKey.user_id == user.id)
+ .order_by(ApiKey.created_at.desc())
+ .all()
+ )
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": [
+ {
+ "id": key.id,
+ "name": key.name,
+ "key_prefix": key.key_prefix,
+ "is_active": key.is_active,
+ "last_used_at": key.last_used_at.isoformat()
+ if key.last_used_at
+ else None,
+ "usage_count": key.usage_count,
+ "created_at": key.created_at.isoformat()
+ if key.created_at
+ else None,
+ }
+ for key in keys
+ ],
+ "meta": {"total": len(keys)},
+ },
+ )
+
+
+@router.delete("/{key_id}")
+async def revoke_api_key(
+ key_id: str,
+ user: ProUser = Depends(_require_pro_user),
+):
+ """
+ Revoke an API key for the authenticated Pro user.
+
+ This performs a soft delete by setting is_active=False.
+ Only the owner of the key can revoke it.
+ Only active keys can be revoked.
+
+ Returns:
+ 200: API key revoked successfully
+ 401: Authentication required
+ 403: Pro subscription required
+ 404: API key not found or already revoked
+ """
+ # Validate key_id format (UUID)
+ try:
+ import uuid as uuid_module
+ uuid_module.UUID(key_id)
+ except ValueError:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_KEY_ID",
+ "message": "Format d'identifiant de clé API invalide.",
+ },
+ )
+
+ with get_sync_session() as session:
+ # Security: Filter by user_id AND is_active so only the owner can revoke active keys
+ api_key = (
+ session.query(ApiKey)
+ .filter(
+ ApiKey.id == key_id,
+ ApiKey.user_id == user.id,
+ ApiKey.is_active == True, # Only active keys can be revoked
+ )
+ .first()
+ )
+
+ if not api_key:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "API_KEY_NOT_FOUND",
+ "message": "Clé API non trouvée, n'appartient pas à l'utilisateur ou déjà révoquée.",
+ },
+ )
+
+ # Soft delete - mark as inactive and record revocation timestamp
+ revoked_at = datetime.now(timezone.utc)
+ api_key.is_active = False
+ api_key.revoked_at = revoked_at
+ session.commit()
+
+ logger.info(f"API key {key_id} revoked by user {user.id}")
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": {
+ "id": api_key.id,
+ "revoked": True,
+ "revoked_at": revoked_at.isoformat(),
+ },
+ "meta": {},
+ },
+ )
diff --git a/routes/api_v1_router.py b/routes/api_v1_router.py
new file mode 100644
index 0000000..844c582
--- /dev/null
+++ b/routes/api_v1_router.py
@@ -0,0 +1,25 @@
+"""
+Main API v1 Router
+Aggregates all v1 sub-routers under /api/v1 prefix
+Story 3.5: API Versioning
+"""
+
+from fastapi import APIRouter
+
+router = APIRouter(tags=["API v1"])
+
+from routes.translate_routes import router_v1 as translate_router
+from routes.auth_routes import router_v1 as auth_router
+from routes.api_key_routes import router as api_key_router
+from routes.admin_routes import router as admin_router
+from routes.legacy_routes import router as legacy_router
+from routes.glossary_routes import router as glossary_router
+from routes.prompt_routes import router as prompt_router
+
+router.include_router(translate_router, tags=["Translation"])
+router.include_router(auth_router, tags=["Authentication"])
+router.include_router(api_key_router, tags=["API Keys"])
+router.include_router(admin_router, tags=["Admin"])
+router.include_router(legacy_router, tags=["Legacy"])
+router.include_router(glossary_router, tags=["Glossaries"])
+router.include_router(prompt_router, tags=["Prompts"])
diff --git a/routes/auth_routes.py b/routes/auth_routes.py
index eaa1941..374aec1 100644
--- a/routes/auth_routes.py
+++ b/routes/auth_routes.py
@@ -1,25 +1,48 @@
"""
Authentication and User API routes
+Story 3.6: Documentation OpenAPI complète avec exemples et codes d'erreur
"""
+
+import os
+from datetime import timedelta
from fastapi import APIRouter, HTTPException, Depends, Header, Request
+from fastapi.responses import JSONResponse
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-from pydantic import BaseModel, EmailStr
+from pydantic import BaseModel, EmailStr, ValidationError as PydanticValidationError
from typing import Optional, Dict, Any
-from models.subscription import UserCreate, UserLogin, UserResponse, PlanType, PLANS, CREDIT_PACKAGES
+from models.subscription import (
+ UserCreate,
+ UserLogin,
+ UserResponse,
+ PlanType,
+ PLANS,
+ CREDIT_PACKAGES,
+)
from services.auth_service import (
- create_user, authenticate_user, get_user_by_id,
- create_access_token, create_refresh_token, verify_token,
- check_usage_limits, update_user
+ create_user,
+ authenticate_user,
+ get_user_by_id,
+ create_access_token,
+ create_refresh_token,
+ verify_token,
+ revoke_token_jti,
+ check_usage_limits,
+ update_user,
+ get_user_by_email,
+ verify_password,
+ PASSLIB_AVAILABLE,
)
from services.payment_service import (
- create_checkout_session, create_credits_checkout,
- cancel_subscription, get_billing_portal_url,
- handle_webhook, is_stripe_configured
+ create_checkout_session,
+ create_credits_checkout,
+ cancel_subscription,
+ get_billing_portal_url,
+ handle_webhook,
+ is_stripe_configured,
)
-router = APIRouter(prefix="/api/auth", tags=["Authentication"])
security = HTTPBearer(auto_error=False)
@@ -44,31 +67,26 @@ class CreditsCheckoutRequest(BaseModel):
package_index: int
-# Dependency to get current user
-async def get_current_user(credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)):
+async def get_current_user(
+ credentials: Optional[HTTPAuthorizationCredentials] = Depends(security),
+):
if not credentials:
return None
-
payload = verify_token(credentials.credentials)
if not payload:
return None
-
- user = get_user_by_id(payload.get("sub"))
- return user
+ return get_user_by_id(payload.get("sub"))
async def require_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
if not credentials:
raise HTTPException(status_code=401, detail="Not authenticated")
-
payload = verify_token(credentials.credentials)
if not payload:
raise HTTPException(status_code=401, detail="Invalid or expired token")
-
user = get_user_by_id(payload.get("sub"))
if not user:
raise HTTPException(status_code=401, detail="User not found")
-
return user
@@ -81,6 +99,7 @@ def user_to_response(user) -> UserResponse:
name=user.name,
avatar_url=user.avatar_url,
plan=user.plan,
+ tier=user.plan,
subscription_status=user.subscription_status,
docs_translated_this_month=user.docs_translated_this_month,
pages_translated_this_month=user.pages_translated_this_month,
@@ -94,188 +113,610 @@ def user_to_response(user) -> UserResponse:
"providers": plan_limits["providers"],
"features": plan_limits["features"],
"api_access": plan_limits.get("api_access", False),
- }
+ },
)
-# Auth endpoints
-@router.post("/register", response_model=TokenResponse)
-async def register(user_create: UserCreate):
- """Register a new user"""
+# ============== API v1 Router ==============
+
+router_v1 = APIRouter(prefix="/api/v1/auth", tags=["Authentication v1"])
+
+
+def _has_email_validation_error(exc: PydanticValidationError) -> bool:
+ """Return True when pydantic validation errors include the email field."""
+ for err in exc.errors():
+ loc = err.get("loc", ())
+ if isinstance(loc, (list, tuple)) and "email" in loc:
+ return True
+ return False
+
+
+@router_v1.post(
+ "/register",
+ status_code=201,
+ summary="Inscription d'un nouvel utilisateur",
+ description="""
+Créer un nouveau compte utilisateur.
+
+**Paramètres requis:**
+- `email`: Adresse email valide (sera utilisée pour la connexion)
+- `password`: Mot de passe (minimum 8 caractères)
+- `name`: Nom complet (optionnel)
+
+**Réponse:**
+- HTTP 201 avec les données de l'utilisateur créé
+- Le `tier` par défaut est "free"
+
+**Codes d'erreur:**
+- `INVALID_EMAIL`: Format d'email invalide
+- `EMAIL_EXISTS`: Un compte existe déjà avec cet email
+- `INVALID_REQUEST`: Données d'inscription invalides
+ """,
+ responses={
+ 201: {
+ "description": "Utilisateur créé avec succès",
+ "content": {
+ "application/json": {
+ "example": {
+ "data": {
+ "id": "usr_abc123def456",
+ "email": "utilisateur@exemple.com",
+ "tier": "free",
+ },
+ "meta": {},
+ }
+ }
+ },
+ },
+ 400: {
+ "description": "Erreur de validation",
+ "content": {
+ "application/json": {
+ "examples": {
+ "INVALID_EMAIL": {
+ "summary": "Format d'email invalide",
+ "value": {
+ "error": "INVALID_EMAIL",
+ "message": "Format d'email invalide",
+ },
+ },
+ "EMAIL_EXISTS": {
+ "summary": "Email déjà utilisé",
+ "value": {
+ "error": "EMAIL_EXISTS",
+ "message": "Un compte existe déjà avec cette adresse email",
+ },
+ },
+ }
+ }
+ },
+ },
+ },
+)
+async def register_v1(request: Request):
+ """Inscription d'un nouvel utilisateur (API v1) — retourne 201 avec données utilisateur"""
+ try:
+ body = await request.json()
+ except Exception:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_REQUEST",
+ "message": "Corps de requete JSON invalide",
+ },
+ )
+
+ try:
+ user_create = UserCreate.model_validate(body)
+ except PydanticValidationError as exc:
+ if _has_email_validation_error(exc):
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_EMAIL",
+ "message": "Format d'email invalide",
+ },
+ )
+ # Check if it's a password validation error
+ for error in exc.errors():
+ loc = error.get("loc", ())
+ if "password" in loc:
+ msg = error.get("msg", "")
+ # If password is missing entirely, return INVALID_REQUEST
+ if "Field required" in msg or "required" in msg.lower():
+ break
+ # Otherwise it's a weak password
+ # Translate common pydantic messages to French
+ if "at least 8 characters" in msg.lower() or "8 caractères" in msg:
+ msg = "Le mot de passe doit contenir au moins 8 caractères"
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "WEAK_PASSWORD",
+ "message": msg,
+ },
+ )
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_REQUEST",
+ "message": "Données d'inscription invalides",
+ },
+ )
+
+ # In production, registration must rely on passlib[bcrypt] hashing.
+ if (
+ os.getenv("ENVIRONMENT", "development").lower() == "production"
+ and not PASSLIB_AVAILABLE
+ ):
+ return JSONResponse(
+ status_code=503,
+ content={
+ "error": "AUTH_HASHING_UNAVAILABLE",
+ "message": "Service d'inscription temporairement indisponible",
+ },
+ )
+
try:
user = create_user(user_create)
- except ValueError as e:
- raise HTTPException(status_code=400, detail=str(e))
-
- access_token = create_access_token(user.id)
- refresh_token = create_refresh_token(user.id)
-
- return TokenResponse(
- access_token=access_token,
- refresh_token=refresh_token,
- user=user_to_response(user)
+ except ValueError as exc:
+ msg = str(exc).strip().lower()
+ if "email already registered" in msg or "email already" in msg:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "EMAIL_EXISTS",
+ "message": "Un compte existe déjà avec cette adresse email",
+ },
+ )
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "REGISTRATION_FAILED",
+ "message": "Impossible de créer le compte avec les données fournies",
+ },
+ )
+
+ return JSONResponse(
+ status_code=201,
+ content={
+ "data": {
+ "id": user.id,
+ "email": user.email,
+ "tier": user.plan.value,
+ },
+ "meta": {},
+ },
)
-@router.post("/login", response_model=TokenResponse)
-async def login(credentials: UserLogin):
- """Login with email and password"""
- user = authenticate_user(credentials.email, credentials.password)
+@router_v1.post(
+ "/logout",
+ summary="Déconnexion utilisateur",
+ description="""
+Déconnecte l'utilisateur en révoquant son token d'accès.
+
+**Authentification requise:** Bearer token dans le header Authorization
+
+**Paramètres optionnels:**
+- `refresh_token`: Peut être fourni pour révoquer également le refresh token
+
+**Réponse:**
+- HTTP 200 avec message de confirmation
+
+**Codes d'erreur:**
+- `TOKEN_MISSING`: Token d'authentification manquant
+- `TOKEN_INVALID`: Token invalide ou expiré
+ """,
+ responses={
+ 200: {
+ "description": "Déconnexion réussie",
+ "content": {
+ "application/json": {
+ "example": {"data": {"message": "Déconnexion réussie"}, "meta": {}}
+ }
+ },
+ },
+ 401: {
+ "description": "Erreur d'authentification",
+ "content": {
+ "application/json": {
+ "examples": {
+ "TOKEN_MISSING": {
+ "summary": "Token manquant",
+ "value": {
+ "error": "TOKEN_MISSING",
+ "message": "Token d'authentification requis",
+ },
+ },
+ "TOKEN_INVALID": {
+ "summary": "Token invalide",
+ "value": {
+ "error": "TOKEN_INVALID",
+ "message": "Token invalide ou expiré",
+ },
+ },
+ }
+ }
+ },
+ },
+ },
+)
+async def logout_v1(request: Request):
+ """Logout utilisateur (API v1) — révoque l'access token et optionnellement le refresh token"""
+ auth_header = request.headers.get("Authorization", "")
+ if not auth_header.startswith("Bearer "):
+ return JSONResponse(
+ status_code=401,
+ content={
+ "error": "TOKEN_MISSING",
+ "message": "Token d'authentification requis",
+ },
+ )
+ access_token = auth_header[7:]
+
+ payload = verify_token(access_token)
+ if not payload:
+ return JSONResponse(
+ status_code=401,
+ content={
+ "error": "TOKEN_INVALID",
+ "message": "Token invalide ou expiré",
+ },
+ )
+
+ jti = payload.get("jti")
+ if jti:
+ revoke_token_jti(jti, float(payload.get("exp", 0)))
+
+ try:
+ body = await request.json()
+ refresh_token = body.get("refresh_token")
+ if refresh_token:
+ refresh_payload = verify_token(refresh_token)
+ if refresh_payload and refresh_payload.get("jti"):
+ revoke_token_jti(
+ refresh_payload["jti"],
+ float(refresh_payload.get("exp", 0)),
+ )
+ except Exception:
+ pass
+
+ return JSONResponse(
+ status_code=200,
+ content={"data": {"message": "Déconnexion réussie"}, "meta": {}},
+ )
+
+
+@router_v1.post(
+ "/login",
+ summary="Connexion utilisateur",
+ description="""
+Authentifie un utilisateur et retourne les tokens JWT.
+
+**Paramètres requis:**
+- `email`: Adresse email de l'utilisateur
+- `password`: Mot de passe
+
+**Réponse:**
+- HTTP 200 avec `access_token` (expire dans 15 min) et `refresh_token` (expire dans 7 jours)
+
+**Codes d'erreur:**
+- `INVALID_REQUEST`: Corps de requête JSON invalide
+- `INVALID_EMAIL`: Format d'email invalide
+- `INVALID_CREDENTIALS`: Email ou mot de passe incorrect
+ """,
+ responses={
+ 200: {
+ "description": "Connexion réussie",
+ "content": {
+ "application/json": {
+ "example": {
+ "data": {
+ "access_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+ "refresh_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+ "token_type": "bearer",
+ },
+ "meta": {},
+ }
+ }
+ },
+ },
+ 400: {
+ "description": "Erreur de validation",
+ "content": {
+ "application/json": {
+ "examples": {
+ "INVALID_REQUEST": {
+ "summary": "Corps invalide",
+ "value": {
+ "error": "INVALID_REQUEST",
+ "message": "Corps de requete JSON invalide",
+ },
+ },
+ "INVALID_EMAIL": {
+ "summary": "Email invalide",
+ "value": {
+ "error": "INVALID_EMAIL",
+ "message": "Format d'email invalide",
+ },
+ },
+ }
+ }
+ },
+ },
+ 401: {
+ "description": "Identifiants invalides",
+ "content": {
+ "application/json": {
+ "example": {
+ "error": "INVALID_CREDENTIALS",
+ "message": "Email ou mot de passe incorrect",
+ }
+ }
+ },
+ },
+ },
+)
+async def login_v1(request: Request):
+ """Login utilisateur (API v1) — retourne access_token (15min) et refresh_token (7j)"""
+ try:
+ body = await request.json()
+ except Exception:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_REQUEST",
+ "message": "Corps de requete JSON invalide",
+ },
+ )
+
+ try:
+ user_login = UserLogin.model_validate(body)
+ except PydanticValidationError as exc:
+ if _has_email_validation_error(exc):
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_EMAIL",
+ "message": "Format d'email invalide",
+ },
+ )
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_REQUEST",
+ "message": "Données d'inscription invalides",
+ },
+ )
+
+ user = get_user_by_email(user_login.email)
if not user:
- raise HTTPException(status_code=401, detail="Invalid email or password")
-
- access_token = create_access_token(user.id)
- refresh_token = create_refresh_token(user.id)
-
- return TokenResponse(
- access_token=access_token,
- refresh_token=refresh_token,
- user=user_to_response(user)
+ # Constant-time dummy verify to prevent user enumeration via response time
+ verify_password("__dummy__", "$2b$12$mBw4RxPJBaaS1FtEZcT/E.E35YUCk1Zx0ICzIzNUSdzHmQmko1.WW")
+ return JSONResponse(
+ status_code=401,
+ content={
+ "error": "INVALID_CREDENTIALS",
+ "message": "Email ou mot de passe incorrect",
+ },
+ )
+
+ if not verify_password(user_login.password, user.password_hash):
+ return JSONResponse(
+ status_code=401,
+ content={
+ "error": "INVALID_CREDENTIALS",
+ "message": "Email ou mot de passe incorrect",
+ },
+ )
+
+ access_token = create_access_token(
+ user.id, tier=user.plan.value, expires_delta=timedelta(minutes=15)
+ )
+ refresh_token = create_refresh_token(user.id, expires_delta=timedelta(days=7))
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": {
+ "access_token": access_token,
+ "refresh_token": refresh_token,
+ "token_type": "bearer",
+ },
+ "meta": {},
+ },
)
-@router.post("/refresh", response_model=TokenResponse)
-async def refresh_tokens(request: RefreshRequest):
- """Refresh access token"""
- payload = verify_token(request.refresh_token)
- if not payload or payload.get("type") != "refresh":
- raise HTTPException(status_code=401, detail="Invalid refresh token")
-
+@router_v1.post("/refresh")
+async def refresh_v1(request: Request):
+ """Refresh tokens (API v1) — accepte refresh_token en corps, retourne nouvel access_token et refresh_token."""
+ try:
+ body = await request.json()
+ except Exception:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_REQUEST",
+ "message": "Corps de requete JSON invalide",
+ },
+ )
+
+ if not isinstance(body, dict):
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_REQUEST",
+ "message": "Corps de requete JSON invalide",
+ },
+ )
+
+ refresh_token = body.get("refresh_token")
+ if (
+ not refresh_token
+ or not isinstance(refresh_token, str)
+ or not refresh_token.strip()
+ ):
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_REQUEST",
+ "message": "Refresh token requis",
+ },
+ )
+
+ payload = verify_token(refresh_token)
+ if not payload:
+ return JSONResponse(
+ status_code=401,
+ content={
+ "error": "TOKEN_EXPIRED",
+ "message": "Token invalide ou expiré",
+ },
+ )
+
+ if payload.get("type") != "refresh":
+ return JSONResponse(
+ status_code=401,
+ content={
+ "error": "TOKEN_EXPIRED",
+ "message": "Token invalide ou expiré",
+ },
+ )
+
user = get_user_by_id(payload.get("sub"))
if not user:
- raise HTTPException(status_code=401, detail="User not found")
-
- access_token = create_access_token(user.id)
- refresh_token = create_refresh_token(user.id)
-
- return TokenResponse(
- access_token=access_token,
- refresh_token=refresh_token,
- user=user_to_response(user)
+ return JSONResponse(
+ status_code=401,
+ content={
+ "error": "TOKEN_EXPIRED",
+ "message": "Token invalide ou expiré",
+ },
+ )
+
+ access_token = create_access_token(
+ user.id, tier=user.plan.value, expires_delta=timedelta(minutes=15)
+ )
+ new_refresh_token = create_refresh_token(user.id, expires_delta=timedelta(days=7))
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": {
+ "access_token": access_token,
+ "refresh_token": new_refresh_token,
+ "token_type": "bearer",
+ },
+ "meta": {},
+ },
)
-@router.get("/me", response_model=UserResponse)
-async def get_me(user=Depends(require_user)):
- """Get current user info"""
- return user_to_response(user)
-
-
-@router.get("/usage")
-async def get_usage(user=Depends(require_user)):
- """Get current usage and limits"""
- return check_usage_limits(user)
-
-
-@router.put("/settings")
-async def update_settings(settings: Dict[str, Any], user=Depends(require_user)):
- """Update user settings"""
- allowed_fields = [
- "default_source_lang", "default_target_lang", "default_provider",
- "ollama_endpoint", "ollama_model", "name"
- ]
-
- updates = {k: v for k, v in settings.items() if k in allowed_fields}
- updated_user = update_user(user.id, updates)
-
- if not updated_user:
- raise HTTPException(status_code=400, detail="Failed to update settings")
-
- return user_to_response(updated_user)
-
-
-# Plans endpoint (public)
-@router.get("/plans")
-async def get_plans():
- """Get all available plans"""
- plans = []
- for plan_type, config in PLANS.items():
- plans.append({
- "id": plan_type.value,
- "name": config["name"],
- "price_monthly": config["price_monthly"],
- "price_yearly": config["price_yearly"],
- "features": config["features"],
- "docs_per_month": config["docs_per_month"],
- "max_pages_per_doc": config["max_pages_per_doc"],
- "max_file_size_mb": config["max_file_size_mb"],
- "providers": config["providers"],
- "api_access": config.get("api_access", False),
- "popular": plan_type == PlanType.PRO,
- })
- return {"plans": plans, "credit_packages": CREDIT_PACKAGES}
-
-
-# Payment endpoints
-@router.post("/checkout/subscription")
-async def checkout_subscription(request: CheckoutRequest, user=Depends(require_user)):
- """Create Stripe checkout session for subscription"""
- if not is_stripe_configured():
- # Demo mode - just upgrade the user
- update_user(user.id, {"plan": request.plan.value})
- return {"demo_mode": True, "message": "Upgraded in demo mode", "plan": request.plan.value}
-
- result = await create_checkout_session(
- user.id,
- request.plan,
- request.billing_period
+@router_v1.get(
+ "/me",
+ summary="Informations utilisateur",
+ description="Retourne les informations de l'utilisateur authentifié.",
+)
+async def get_me_v1(user=Depends(require_user)):
+ return JSONResponse(
+ status_code=200,
+ content={"data": user_to_response(user).model_dump(mode="json"), "meta": {}},
)
-
- if "error" in result:
- raise HTTPException(status_code=400, detail=result["error"])
-
- return result
-@router.post("/checkout/credits")
-async def checkout_credits(request: CreditsCheckoutRequest, user=Depends(require_user)):
- """Create Stripe checkout session for credits"""
- if not is_stripe_configured():
- # Demo mode - add credits directly
- from services.auth_service import add_credits
- credits = CREDIT_PACKAGES[request.package_index]["credits"]
- add_credits(user.id, credits)
- return {"demo_mode": True, "message": f"Added {credits} credits in demo mode"}
-
- result = await create_credits_checkout(user.id, request.package_index)
-
- if "error" in result:
- raise HTTPException(status_code=400, detail=result["error"])
-
- return result
+@router_v1.get(
+ "/plans",
+ summary="Liste des forfaits disponibles",
+ description="Retourne tous les forfaits et packs de crédits disponibles (endpoint public).",
+)
+async def get_plans_v1():
+ from models.subscription import PLANS, CREDIT_PACKAGES
+
+ plans_list = []
+ for plan_type, plan in PLANS.items():
+ plans_list.append(
+ {
+ "id": plan_type.value,
+ "name": plan["name"],
+ "price_monthly": plan["price_monthly"],
+ "price_yearly": plan["price_yearly"],
+ "docs_per_month": plan["docs_per_month"],
+ "max_pages_per_doc": plan["max_pages_per_doc"],
+ "max_file_size_mb": plan["max_file_size_mb"],
+ "max_chars_per_month": plan.get("max_chars_per_month", -1),
+ "providers": plan["providers"],
+ "features": plan["features"],
+ "ai_translation": plan.get("ai_translation", False),
+ "ai_tier": plan.get("ai_tier"),
+ "api_access": plan.get("api_access", False),
+ "priority_processing": plan.get("priority_processing", False),
+ "team_seats": plan.get("team_seats"),
+ "highlight": plan.get("highlight"),
+ "description": plan.get("description"),
+ "badge": plan.get("badge"),
+ "popular": plan.get("badge") == "POPULAIRE",
+ }
+ )
+
+ packages_list = []
+ for pkg in CREDIT_PACKAGES:
+ packages_list.append(
+ {
+ "credits": pkg["credits"],
+ "price": pkg["price"],
+ "price_per_credit": round(pkg["price"] / pkg["credits"], 4),
+ "popular": pkg.get("popular", False),
+ }
+ )
+
+ return JSONResponse(
+ status_code=200,
+ content={"data": {"plans": plans_list, "credit_packages": packages_list}, "meta": {}},
+ )
-@router.post("/subscription/cancel")
-async def cancel_user_subscription(user=Depends(require_user)):
- """Cancel subscription"""
- result = await cancel_subscription(user.id)
-
- if "error" in result:
- raise HTTPException(status_code=400, detail=result["error"])
-
- return result
+@router_v1.get(
+ "/usage",
+ summary="Utilisation et limites",
+ description="Retourne l'utilisation actuelle et les limites du plan de l'utilisateur.",
+)
+async def get_usage_v1(user=Depends(require_user)):
+ return JSONResponse(
+ status_code=200,
+ content={"data": check_usage_limits(user), "meta": {}},
+ )
-@router.get("/billing-portal")
-async def get_billing_portal(user=Depends(require_user)):
- """Get Stripe billing portal URL"""
+@router_v1.get(
+ "/billing-portal",
+ summary="Portail de facturation",
+ description="Retourne l'URL du portail de facturation Stripe.",
+)
+async def get_billing_portal_v1(user=Depends(require_user)):
url = await get_billing_portal_url(user.id)
-
if not url:
- raise HTTPException(status_code=400, detail="Billing portal not available")
-
- return {"url": url}
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "BILLING_UNAVAILABLE",
+ "message": "Portail de facturation non disponible",
+ },
+ )
+ return JSONResponse(status_code=200, content={"data": {"url": url}, "meta": {}})
-# Stripe webhook
-@router.post("/webhook/stripe")
+# ============== Stripe webhook (versioned) ==============
+
+
+@router_v1.post("/webhook/stripe")
async def stripe_webhook(request: Request, stripe_signature: str = Header(None)):
"""Handle Stripe webhooks"""
payload = await request.body()
-
+
result = await handle_webhook(payload, stripe_signature or "")
-
+
if "error" in result:
raise HTTPException(status_code=400, detail=result["error"])
-
+
return result
diff --git a/routes/deps.py b/routes/deps.py
new file mode 100644
index 0000000..82229b1
--- /dev/null
+++ b/routes/deps.py
@@ -0,0 +1,106 @@
+"""
+Shared authentication dependencies for routes.
+Story 3.9: Extracted from api_key_routes.py and glossary_routes.py
+"""
+
+import logging
+from typing import Optional
+
+from fastapi import Depends, HTTPException
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+
+from services.auth_service import verify_token, get_user_by_id
+
+logger = logging.getLogger(__name__)
+
+security = HTTPBearer(auto_error=False)
+
+
+class ProUser:
+ """Wrapper for authenticated user with tier info."""
+
+ def __init__(self, user):
+ self._user = user
+ self.id = user.id
+ self.email = getattr(user, "email", None)
+ self._tier = None
+
+ @property
+ def tier(self) -> str:
+ if self._tier is None:
+ user_tier = getattr(self._user, "tier", None)
+ if user_tier:
+ self._tier = user_tier
+ else:
+ plan_value = getattr(self._user, "plan", None)
+ if plan_value and hasattr(plan_value, "value"):
+ if plan_value.value in ("pro", "business", "enterprise"):
+ self._tier = "pro"
+ else:
+ self._tier = "free"
+ else:
+ self._tier = "free"
+ return self._tier
+
+
+def require_auth(
+ credentials: Optional[HTTPAuthorizationCredentials] = Depends(security),
+):
+ """Dependency that requires a valid JWT token."""
+ if not credentials:
+ raise HTTPException(
+ status_code=401,
+ detail={
+ "error": "UNAUTHORIZED",
+ "message": "Authentification requise",
+ },
+ )
+
+ payload = verify_token(credentials.credentials)
+ if not payload:
+ raise HTTPException(
+ status_code=401,
+ detail={
+ "error": "UNAUTHORIZED",
+ "message": "Token invalide ou expiré",
+ },
+ )
+
+ sub = payload.get("sub")
+ if not sub or not isinstance(sub, str):
+ raise HTTPException(
+ status_code=401,
+ detail={
+ "error": "UNAUTHORIZED",
+ "message": "Token invalide",
+ },
+ )
+
+ user = get_user_by_id(sub)
+ if not user:
+ raise HTTPException(
+ status_code=401,
+ detail={
+ "error": "UNAUTHORIZED",
+ "message": "Utilisateur non trouvé",
+ },
+ )
+
+ return user
+
+
+def require_pro_user(user=Depends(require_auth)) -> ProUser:
+ """Dependency that requires a valid Pro user JWT token."""
+ pro_user = ProUser(user)
+
+ if pro_user.tier != "pro":
+ raise HTTPException(
+ status_code=403,
+ detail={
+ "error": "PRO_FEATURE_REQUIRED",
+ "message": "Cette fonctionnalité nécessite un abonnement Pro.",
+ "details": {"current_tier": pro_user.tier},
+ },
+ )
+
+ return pro_user
diff --git a/routes/glossary_routes.py b/routes/glossary_routes.py
new file mode 100644
index 0000000..8b1eec7
--- /dev/null
+++ b/routes/glossary_routes.py
@@ -0,0 +1,625 @@
+"""
+Glossary CRUD routes for Pro users
+Story 3.9: Glossaires - Endpoint CRUD
+"""
+
+import json
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi.responses import JSONResponse
+from sqlalchemy.orm import joinedload
+
+from routes.deps import require_pro_user, ProUser
+from services.auth_service import verify_token, get_user_by_id
+from database.connection import get_sync_session
+from database.models import Glossary, GlossaryTerm
+from schemas.glossary_schemas import (
+ GlossaryCreate,
+ GlossaryUpdate,
+ GlossaryResponse,
+ GlossaryListItem,
+ GlossaryListResponse,
+ GlossaryDetailResponse,
+)
+
+logger = logging.getLogger(__name__)
+
+GLOSSARIES_DIR = Path(__file__).parent.parent / "data/glossaries"
+
+router = APIRouter(prefix="/api/v1/glossaries", tags=["Glossaries v1"])
+
+# Maximum number of terms per glossary
+MAX_TERMS_PER_GLOSSARY = 500
+
+# Default pagination
+DEFAULT_PAGE_SIZE = 50
+MAX_PAGE_SIZE = 100
+
+
+def _format_term(term: GlossaryTerm) -> dict:
+ """Format a GlossaryTerm for JSON response."""
+ return {
+ "id": term.id,
+ "source": term.source,
+ "target": term.target,
+ "created_at": term.created_at.isoformat() if term.created_at else None,
+ }
+
+
+def _format_glossary(glossary: Glossary) -> dict:
+ """Format a Glossary for JSON response."""
+ return {
+ "id": glossary.id,
+ "name": glossary.name,
+ "terms": [_format_term(t) for t in glossary.terms] if glossary.terms else [],
+ "created_at": glossary.created_at.isoformat() if glossary.created_at else None,
+ "updated_at": glossary.updated_at.isoformat() if glossary.updated_at else None,
+ }
+
+
+@router.post(
+ "",
+ response_model=GlossaryDetailResponse,
+ status_code=201,
+ summary="Créer un glossaire",
+ description="""
+ Crée un nouveau glossaire avec une liste de termes source→cible.
+
+ **Restriction:** Uniquement disponible pour les utilisateurs Pro.
+
+ **Exemple de requête:**
+ ```json
+ {
+ "name": "Glossaire Technique FR-EN",
+ "terms": [
+ {"source": "serveur", "target": "server"},
+ {"source": "base de données", "target": "database"}
+ ]
+ }
+ ```
+ """,
+)
+async def create_glossary(
+ body: GlossaryCreate,
+ user: ProUser = Depends(require_pro_user),
+):
+ """Create a new glossary for the authenticated Pro user."""
+ # Validate max terms
+ if len(body.terms) > MAX_TERMS_PER_GLOSSARY:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "TERMS_LIMIT_EXCEEDED",
+ "message": f"Maximum {MAX_TERMS_PER_GLOSSARY} terms per glossary allowed.",
+ },
+ )
+
+ try:
+ with get_sync_session() as session:
+ glossary = Glossary(
+ user_id=user.id,
+ name=body.name,
+ created_at=datetime.now(timezone.utc),
+ updated_at=datetime.now(timezone.utc),
+ )
+
+ for term_data in body.terms:
+ term = GlossaryTerm(
+ glossary=glossary,
+ source=term_data.source,
+ target=term_data.target,
+ created_at=datetime.now(timezone.utc),
+ )
+ session.add(term)
+
+ session.add(glossary)
+ session.commit()
+ session.refresh(glossary)
+
+ logger.info(
+ f"Glossary created: id={glossary.id}, user_id={user.id}, "
+ f"name={glossary.name}, terms_count={len(body.terms)}"
+ )
+
+ return JSONResponse(
+ status_code=201,
+ content={
+ "data": _format_glossary(glossary),
+ "meta": {},
+ },
+ )
+ except Exception as e:
+ logger.error(f"Failed to create glossary for user {user.id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "DATABASE_ERROR",
+ "message": "Une erreur est survenue lors de la création du glossaire.",
+ },
+ )
+
+
+@router.get(
+ "",
+ response_model=GlossaryListResponse,
+ summary="Lister les glossaires",
+ description="Retourne la liste paginée des glossaires de l'utilisateur.",
+)
+async def list_glossaries(
+ page: int = Query(1, ge=1, description="Page number"),
+ per_page: int = Query(
+ DEFAULT_PAGE_SIZE, ge=1, le=MAX_PAGE_SIZE, description="Items per page"
+ ),
+ user: ProUser = Depends(require_pro_user),
+):
+ """List all glossaries for the authenticated Pro user with pagination."""
+ try:
+ with get_sync_session() as session:
+ # Get total count
+ total_count = (
+ session.query(Glossary).filter(Glossary.user_id == user.id).count()
+ )
+
+ # Get paginated results with eager loading of terms (fixes N+1)
+ offset = (page - 1) * per_page
+ glossaries = (
+ session.query(Glossary)
+ .options(joinedload(Glossary.terms))
+ .filter(Glossary.user_id == user.id)
+ .order_by(Glossary.created_at.desc())
+ .offset(offset)
+ .limit(per_page)
+ .all()
+ )
+
+ items = [
+ GlossaryListItem(
+ id=g.id,
+ name=g.name,
+ terms_count=len(g.terms) if g.terms else 0,
+ created_at=g.created_at,
+ )
+ for g in glossaries
+ ]
+
+ total_pages = (total_count + per_page - 1) // per_page
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": [item.model_dump(mode="json") for item in items],
+ "meta": {
+ "total": total_count,
+ "page": page,
+ "per_page": per_page,
+ "total_pages": total_pages,
+ },
+ },
+ )
+ except Exception as e:
+ logger.error(f"Failed to list glossaries for user {user.id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "DATABASE_ERROR",
+ "message": "Une erreur est survenue lors de la récupération des glossaires.",
+ },
+ )
+
+
+@router.get(
+ "/{glossary_id}",
+ response_model=GlossaryDetailResponse,
+ summary="Détail d'un glossaire",
+ description="Retourne les détails d'un glossaire avec tous ses termes.",
+)
+async def get_glossary(
+ glossary_id: str,
+ user: ProUser = Depends(require_pro_user),
+):
+ """Get a specific glossary by ID."""
+ # Validate UUID format
+ try:
+ import uuid
+
+ uuid.UUID(glossary_id)
+ except ValueError:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_GLOSSARY_ID",
+ "message": "Format d'identifiant de glossaire invalide.",
+ },
+ )
+
+ try:
+ with get_sync_session() as session:
+ glossary = (
+ session.query(Glossary)
+ .options(joinedload(Glossary.terms))
+ .filter(Glossary.id == glossary_id, Glossary.user_id == user.id)
+ .first()
+ )
+
+ if not glossary:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "GLOSSARY_NOT_FOUND",
+ "message": "Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
+ },
+ )
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": _format_glossary(glossary),
+ "meta": {},
+ },
+ )
+ except Exception as e:
+ logger.error(f"Failed to get glossary {glossary_id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "DATABASE_ERROR",
+ "message": "Une erreur est survenue.",
+ },
+ )
+
+
+@router.patch(
+ "/{glossary_id}",
+ response_model=GlossaryDetailResponse,
+ summary="Mettre à jour un glossaire",
+ description="Met à jour le nom et/ou les termes d'un glossaire existant.",
+)
+async def update_glossary(
+ glossary_id: str,
+ body: GlossaryUpdate,
+ user: ProUser = Depends(require_pro_user),
+):
+ """Update a glossary's name and/or terms."""
+ # Validate UUID format
+ try:
+ import uuid
+
+ uuid.UUID(glossary_id)
+ except ValueError:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_GLOSSARY_ID",
+ "message": "Format d'identifiant de glossaire invalide.",
+ },
+ )
+
+ # Validate max terms if provided
+ if body.terms is not None and len(body.terms) > MAX_TERMS_PER_GLOSSARY:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "TERMS_LIMIT_EXCEEDED",
+ "message": f"Maximum {MAX_TERMS_PER_GLOSSARY} terms per glossary allowed.",
+ },
+ )
+
+ try:
+ with get_sync_session() as session:
+ glossary = (
+ session.query(Glossary)
+ .options(joinedload(Glossary.terms))
+ .filter(Glossary.id == glossary_id, Glossary.user_id == user.id)
+ .first()
+ )
+
+ if not glossary:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "GLOSSARY_NOT_FOUND",
+ "message": "Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
+ },
+ )
+
+ old_name = glossary.name
+
+ if body.name is not None:
+ glossary.name = body.name
+
+ if body.terms is not None:
+ # Delete existing terms
+ session.query(GlossaryTerm).filter(
+ GlossaryTerm.glossary_id == glossary.id
+ ).delete()
+
+ # Add new terms
+ for term_data in body.terms:
+ term = GlossaryTerm(
+ glossary_id=glossary.id,
+ source=term_data.source,
+ target=term_data.target,
+ created_at=datetime.now(timezone.utc),
+ )
+ session.add(term)
+
+ glossary.updated_at = datetime.now(timezone.utc)
+ session.commit()
+ session.refresh(glossary)
+
+ logger.info(
+ f"Glossary updated: id={glossary.id}, user_id={user.id}, "
+ f"old_name={old_name}, new_name={glossary.name}"
+ )
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": _format_glossary(glossary),
+ "meta": {},
+ },
+ )
+ except Exception as e:
+ logger.error(f"Failed to update glossary {glossary_id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "DATABASE_ERROR",
+ "message": "Une erreur est survenue lors de la mise à jour.",
+ },
+ )
+
+
+@router.delete(
+ "/{glossary_id}",
+ status_code=204,
+ summary="Supprimer un glossaire",
+ description="Supprime un glossaire et tous ses termes associés.",
+)
+async def delete_glossary(
+ glossary_id: str,
+ user: ProUser = Depends(require_pro_user),
+):
+ """Delete a glossary and all its terms."""
+ # Validate UUID format
+ try:
+ import uuid
+
+ uuid.UUID(glossary_id)
+ except ValueError:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_GLOSSARY_ID",
+ "message": "Format d'identifiant de glossaire invalide.",
+ },
+ )
+
+ try:
+ with get_sync_session() as session:
+ glossary = (
+ session.query(Glossary)
+ .filter(Glossary.id == glossary_id, Glossary.user_id == user.id)
+ .first()
+ )
+
+ if not glossary:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "GLOSSARY_NOT_FOUND",
+ "message": "Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
+ },
+ )
+
+ glossary_name = glossary.name
+ session.delete(glossary)
+ session.commit()
+
+ logger.info(
+ f"Glossary deleted: id={glossary_id}, user_id={user.id}, "
+ f"name={glossary_name}"
+ )
+
+ return JSONResponse(
+ status_code=204,
+ content=None,
+ )
+ except Exception as e:
+ logger.error(f"Failed to delete glossary {glossary_id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "DATABASE_ERROR",
+ "message": "Une erreur est survenue lors de la suppression.",
+ },
+ )
+
+
+@router.get(
+ "/templates/list",
+ summary="Lister les templates de glossaires",
+ description="""
+ Retourne la liste des glossaires pré-définis disponibles (templates).
+
+ Ces templates couvrent différents domaines : juridique, technologie, finance, médical, marketing, RH, scientifique, e-commerce.
+
+ Utilisez ensuite `POST /glossaries/import` pour importer un template dans votre compte.
+ """,
+)
+async def list_glossary_templates():
+ """List all available glossary templates."""
+ try:
+ index_path = GLOSSARIES_DIR / "index.json"
+ if not index_path.exists():
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "TEMPLATES_NOT_FOUND",
+ "message": "Les templates de glossaires ne sont pas disponibles.",
+ },
+ )
+
+ with open(index_path, "r", encoding="utf-8") as f:
+ index_data = json.load(f)
+
+ templates = []
+ for category_id, category_data in index_data.get("categories", {}).items():
+ templates.append({
+ "id": category_id,
+ "name": category_data.get("name", category_id),
+ "description": category_data.get("description", ""),
+ "source_lang": category_data.get("source_lang", "fr"),
+ "target_lang": category_data.get("target_lang", "en"),
+ "terms_count": category_data.get("terms_count", 0),
+ "file": category_data.get("file", f"{category_id}_fr_en.json"),
+ })
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": templates,
+ "meta": {
+ "total": len(templates),
+ },
+ },
+ )
+ except Exception as e:
+ logger.error(f"Failed to list glossary templates: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "TEMPLATES_ERROR",
+ "message": "Une erreur est survenue lors de la récupération des templates.",
+ },
+ )
+
+
+@router.post(
+ "/import",
+ response_model=GlossaryDetailResponse,
+ status_code=201,
+ summary="Importer un template de glossaire",
+ description="""
+ Importe un glossaire pré-défini dans votre compte.
+
+ **Paramètres:**
+ - `template_id`: L'identifiant du template (ex: "legal", "tech", "finance", "medical", "marketing", "hr", "scientific", "ecommerce")
+ - `name` (optionnel): Nom personnalisé pour le glossaire. Si non fourni, le nom du template sera utilisé.
+
+ **Exemple:**
+ ```json
+ {
+ "template_id": "legal",
+ "name": "Mon glossaire juridique"
+ }
+ ```
+ """,
+)
+async def import_glossary_template(
+ template_id: str = Query(..., description="ID du template à importer"),
+ name: Optional[str] = Query(None, description="Nom personnalisé pour le glossaire"),
+ user: ProUser = Depends(require_pro_user),
+):
+ """Import a pre-built glossary template into the user's account."""
+ try:
+ index_path = GLOSSARIES_DIR / "index.json"
+ if not index_path.exists():
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "TEMPLATES_NOT_FOUND",
+ "message": "Les templates de glossaires ne sont pas disponibles.",
+ },
+ )
+
+ with open(index_path, "r", encoding="utf-8") as f:
+ index_data = json.load(f)
+
+ categories = index_data.get("categories", {})
+ if template_id not in categories:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "TEMPLATE_NOT_FOUND",
+ "message": f"Template '{template_id}' introuvable. Templates disponibles: {', '.join(categories.keys())}",
+ },
+ )
+
+ template_info = categories[template_id]
+ template_file = template_info.get("file", f"{template_id}_fr_en.json")
+ template_path = GLOSSARIES_DIR / template_file
+
+ if not template_path.exists():
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "TEMPLATE_FILE_NOT_FOUND",
+ "message": f"Le fichier de template '{template_file}' est introuvable.",
+ },
+ )
+
+ with open(template_path, "r", encoding="utf-8") as f:
+ template_data = json.load(f)
+
+ terms = template_data.get("terms", [])
+ if len(terms) > MAX_TERMS_PER_GLOSSARY:
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "TERMS_LIMIT_EXCEEDED",
+ "message": f"Le template contient {len(terms)} termes, ce qui dépasse la limite de {MAX_TERMS_PER_GLOSSARY}.",
+ },
+ )
+
+ glossary_name = name or template_data.get("name", template_info.get("name", template_id))
+
+ with get_sync_session() as session:
+ glossary = Glossary(
+ user_id=user.id,
+ name=glossary_name,
+ created_at=datetime.now(timezone.utc),
+ updated_at=datetime.now(timezone.utc),
+ )
+
+ for term_data in terms:
+ term = GlossaryTerm(
+ glossary=glossary,
+ source=term_data.get("source", ""),
+ target=term_data.get("target", ""),
+ created_at=datetime.now(timezone.utc),
+ )
+ session.add(term)
+
+ session.add(glossary)
+ session.commit()
+ session.refresh(glossary)
+
+ logger.info(
+ f"Glossary template imported: id={glossary.id}, user_id={user.id}, "
+ f"template={template_id}, name={glossary_name}, terms_count={len(terms)}"
+ )
+
+ return JSONResponse(
+ status_code=201,
+ content={
+ "data": _format_glossary(glossary),
+ "meta": {
+ "template_id": template_id,
+ "imported_terms": len(terms),
+ },
+ },
+ )
+ except Exception as e:
+ logger.error(f"Failed to import glossary template {template_id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "IMPORT_ERROR",
+ "message": "Une erreur est survenue lors de l'import du template.",
+ },
+ )
diff --git a/routes/legacy_routes.py b/routes/legacy_routes.py
new file mode 100644
index 0000000..8a371dd
--- /dev/null
+++ b/routes/legacy_routes.py
@@ -0,0 +1,661 @@
+"""
+Legacy API v1 Endpoints
+Endpoints migrated from main.py that don't fit in other routers
+Story 3.5: API Versioning
+"""
+
+import logging
+import os
+from pathlib import Path
+from typing import Optional
+
+from fastapi import APIRouter, File, Form, UploadFile, HTTPException, Request
+from fastapi.responses import FileResponse, JSONResponse
+
+from config import config
+from utils import file_handler
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1", tags=["Legacy"])
+
+
+def _resolve_model(
+ cfg_model: Optional[str],
+ model_env: str,
+ default: str,
+) -> str:
+ """Resolve effective model: JSON config > env var > default."""
+ v = (cfg_model or "").strip() or os.getenv(model_env, "").strip()
+ return v or default
+
+
+@router.get("/providers/available")
+async def get_available_providers():
+ """
+ Return every provider that is enabled — checking BOTH the admin settings JSON
+ AND environment variables (env vars act as a fallback / override).
+
+ Rules:
+ - Google Translate is always shown.
+ - Ollama is only shown in DEV mode (APP_ENV=development or SHOW_OLLAMA=true).
+ - openrouter → shown as "Traduction IA Essentielle" (cheap models).
+ - openrouter_premium → shown as "Traduction IA Premium" (premium models).
+ """
+ from routes.admin_routes import load_settings
+
+ settings = load_settings()
+ is_dev = os.getenv("APP_ENV", "production").lower() == "development" or \
+ os.getenv("SHOW_OLLAMA", "false").lower() == "true"
+
+ def _key_ready(key_var: str) -> bool:
+ return bool(os.getenv(key_var, "").strip())
+
+ def _url_ready(url_var: str) -> bool:
+ return bool(os.getenv(url_var, "").strip())
+
+ def _is_enabled(name: str, key_var: str = "", url_var: str = "") -> bool:
+ cfg = getattr(settings, name, None)
+ if cfg and cfg.enabled:
+ return True
+ if key_var and _key_ready(key_var):
+ return True
+ if url_var and _url_ready(url_var):
+ return True
+ return False
+
+ available = []
+
+ # Google Translate — always available
+ available.append({
+ "id": "google",
+ "label": "Google Traduction",
+ "description": "Traduction rapide, 130+ langues, fiable",
+ "mode": "classic",
+ "tier": "free",
+ })
+
+ # DeepL — if configured
+ if _is_enabled("deepl", key_var="DEEPL_API_KEY"):
+ available.append({
+ "id": "deepl",
+ "label": "DeepL",
+ "description": "Traduction professionnelle haute qualité (langues européennes)",
+ "mode": "classic",
+ "tier": "pro",
+ })
+
+ # AI Essentielle (OpenRouter — cheap model)
+ if _is_enabled("openrouter", key_var="OPENROUTER_API_KEY"):
+ or_cfg = getattr(settings, "openrouter", None)
+ model = _resolve_model(
+ or_cfg.model if or_cfg else None,
+ "OPENROUTER_MODEL",
+ "deepseek/deepseek-v3.2",
+ )
+ available.append({
+ "id": "openrouter",
+ "label": "Traduction IA Essentielle",
+ "description": "IA rapide et économique — idéale pour documents techniques",
+ "mode": "llm",
+ "tier": "pro",
+ "model": model,
+ })
+
+ # AI Premium (OpenRouter — premium model)
+ if _is_enabled("openrouter_premium", key_var="OPENROUTER_API_KEY"):
+ orp_cfg = getattr(settings, "openrouter_premium", None)
+ model = _resolve_model(
+ orp_cfg.model if orp_cfg else None,
+ "OPENROUTER_PREMIUM_MODEL",
+ "anthropic/claude-3.5-haiku",
+ )
+ available.append({
+ "id": "openrouter_premium",
+ "label": "Traduction IA Premium",
+ "description": "IA haute précision (GPT-4, Claude) — meilleure qualité littéraire",
+ "mode": "llm",
+ "tier": "business",
+ "model": model,
+ })
+
+ # OpenAI direct — if configured
+ if _is_enabled("openai", key_var="OPENAI_API_KEY"):
+ oai_cfg = getattr(settings, "openai", None)
+ model = _resolve_model(
+ oai_cfg.model if oai_cfg else None,
+ "OPENAI_MODEL",
+ "gpt-4o-mini",
+ )
+ available.append({
+ "id": "openai",
+ "label": "OpenAI GPT",
+ "description": "Traduction IA via OpenAI directement",
+ "mode": "llm",
+ "tier": "business",
+ "model": model,
+ })
+
+ # z.AI / xAI Grok — if configured
+ if _is_enabled("zai", key_var="ZAI_API_KEY"):
+ zai_cfg = getattr(settings, "zai", None)
+ model = _resolve_model(
+ zai_cfg.model if zai_cfg else None,
+ "ZAI_MODEL",
+ "grok-2-1212",
+ )
+ available.append({
+ "id": "zai",
+ "label": "Grok (xAI)",
+ "description": "IA Grok par xAI — traduction avancée",
+ "mode": "llm",
+ "tier": "business",
+ "model": model,
+ })
+
+ # Ollama — dev only
+ if is_dev and _is_enabled("ollama", url_var="OLLAMA_BASE_URL"):
+ oll_cfg = getattr(settings, "ollama", None)
+ model = _resolve_model(
+ oll_cfg.model if oll_cfg else None,
+ "OLLAMA_MODEL",
+ "llama3",
+ )
+ available.append({
+ "id": "ollama",
+ "label": "Ollama (Local)",
+ "description": "Modèle LLM local — développement uniquement",
+ "mode": "llm",
+ "tier": "dev",
+ "model": model,
+ })
+
+ return {"providers": available}
+
+
+@router.get("/languages")
+async def get_supported_languages():
+ """Get list of supported language codes, ordered by internet popularity"""
+ return {
+ "supported_languages": {
+ # Top 5 — dominant on the internet
+ "en": "English",
+ "es": "Spanish",
+ "de": "German",
+ "fr": "French",
+ "ja": "Japanese",
+ # Top 6-15
+ "pt": "Portuguese",
+ "ru": "Russian",
+ "it": "Italian",
+ "zh-CN": "Chinese (Simplified)",
+ "zh-TW": "Chinese (Traditional)",
+ "pl": "Polish",
+ "nl": "Dutch",
+ "tr": "Turkish",
+ "ko": "Korean",
+ "ar": "Arabic",
+ # Top 16-25
+ "fa": "Persian (Farsi)",
+ "vi": "Vietnamese",
+ "id": "Indonesian",
+ "uk": "Ukrainian",
+ "sv": "Swedish",
+ "cs": "Czech",
+ "el": "Greek",
+ "he": "Hebrew",
+ "hi": "Hindi",
+ "ro": "Romanian",
+ # Others
+ "da": "Danish",
+ "fi": "Finnish",
+ "no": "Norwegian",
+ "hu": "Hungarian",
+ "th": "Thai",
+ "sk": "Slovak",
+ "bg": "Bulgarian",
+ "hr": "Croatian",
+ "ca": "Catalan",
+ "ms": "Malay",
+ },
+ "note": "Supported languages may vary depending on the translation service configured",
+ }
+
+
+@router.post("/translate-batch")
+async def translate_batch_documents(
+ files: list[UploadFile] = File(
+ ..., description="Multiple document files to translate"
+ ),
+ target_language: str = Form(..., description="Target language code"),
+ source_language: str = Form(default="auto", description="Source language code"),
+):
+ """Translate multiple documents in batch"""
+ from translators import excel_translator, word_translator, pptx_translator
+
+ results = []
+
+ for file in files:
+ try:
+ file_extension = file_handler.validate_file_extension(file.filename)
+ file_handler.validate_file_size(file)
+
+ input_filename = file_handler.generate_unique_filename(
+ file.filename, "input"
+ )
+ output_filename = file_handler.generate_unique_filename(
+ file.filename, "translated"
+ )
+
+ input_path = config.UPLOAD_DIR / input_filename
+ output_path = config.OUTPUT_DIR / output_filename
+
+ file_handler.save_upload_file(file, input_path)
+
+ if file_extension == ".xlsx":
+ excel_translator.translate_file(
+ input_path, output_path, target_language, source_language
+ )
+ elif file_extension == ".docx":
+ word_translator.translate_file(
+ input_path, output_path, target_language, source_language
+ )
+ elif file_extension == ".pptx":
+ pptx_translator.translate_file(
+ input_path, output_path, target_language, source_language
+ )
+
+ file_handler.cleanup_file(input_path)
+
+ results.append(
+ {
+ "filename": file.filename,
+ "status": "success",
+ "output_file": output_filename,
+ "download_url": f"/api/v1/download/{output_filename}",
+ }
+ )
+
+ except Exception as e:
+ logger.exception(f"Error processing {file.filename}")
+ results.append(
+ {
+ "filename": file.filename,
+ "status": "error",
+ "error": "INTERNAL_ERROR",
+ "message": "Erreur lors du traitement du fichier.",
+ "details": {},
+ }
+ )
+
+ return {
+ "total_files": len(files),
+ "successful": len([r for r in results if r["status"] == "success"]),
+ "failed": len([r for r in results if r["status"] == "error"]),
+ "results": results,
+ }
+
+
+@router.get("/download/{filename}")
+async def download_file(filename: str):
+ """Download a translated file by filename"""
+ file_path = config.OUTPUT_DIR / filename
+
+ if not file_path.exists():
+ raise HTTPException(status_code=404, detail="File not found")
+
+ return FileResponse(
+ path=file_path,
+ filename=filename,
+ media_type="application/octet-stream",
+ )
+
+
+@router.delete("/cleanup/{filename}")
+async def cleanup_translated_file(filename: str):
+ """Cleanup a translated file after download"""
+ try:
+ file_path = config.OUTPUT_DIR / filename
+
+ if not file_path.exists():
+ raise HTTPException(status_code=404, detail="File not found")
+
+ file_handler.cleanup_file(file_path)
+
+ return {"message": f"File {filename} deleted successfully"}
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.exception("Cleanup error")
+ raise HTTPException(
+ status_code=500, detail="Erreur lors de la suppression du fichier."
+ )
+
+
+@router.post("/extract-texts")
+async def extract_texts_from_document(
+ file: UploadFile = File(..., description="Document file to extract texts from"),
+):
+ """Extract all translatable texts from a document for client-side translation"""
+ import uuid
+ import json
+
+ try:
+ file_extension = file_handler.validate_file_extension(file.filename)
+ logger.info(f"Extracting texts from {file_extension} file: {file.filename}")
+
+ file_handler.validate_file_size(file)
+
+ session_id = str(uuid.uuid4())
+
+ input_filename = f"session_{session_id}{file_extension}"
+ input_path = config.UPLOAD_DIR / input_filename
+ file_handler.save_upload_file(file, input_path)
+
+ texts = []
+
+ if file_extension == ".xlsx":
+ from openpyxl import load_workbook
+
+ wb = load_workbook(input_path)
+ for sheet in wb.worksheets:
+ for row in sheet.iter_rows():
+ for cell in row:
+ if (
+ cell.value
+ and isinstance(cell.value, str)
+ and cell.value.strip()
+ ):
+ texts.append(
+ {
+ "id": f"{sheet.title}!{cell.coordinate}",
+ "text": cell.value,
+ }
+ )
+ wb.close()
+ elif file_extension == ".docx":
+ from docx import Document
+
+ doc = Document(input_path)
+ para_idx = 0
+ for para in doc.paragraphs:
+ if para.text.strip():
+ texts.append({"id": f"para_{para_idx}", "text": para.text})
+ para_idx += 1
+ table_idx = 0
+ for table in doc.tables:
+ for row_idx, row in enumerate(table.rows):
+ for cell_idx, cell in enumerate(row.cells):
+ if cell.text.strip():
+ texts.append(
+ {
+ "id": f"table_{table_idx}_r{row_idx}_c{cell_idx}",
+ "text": cell.text,
+ }
+ )
+ table_idx += 1
+ elif file_extension == ".pptx":
+ from pptx import Presentation
+
+ prs = Presentation(input_path)
+ for slide_idx, slide in enumerate(prs.slides):
+ for shape_idx, shape in enumerate(slide.shapes):
+ if shape.has_text_frame:
+ for para_idx, para in enumerate(shape.text_frame.paragraphs):
+ for run_idx, run in enumerate(para.runs):
+ if run.text.strip():
+ texts.append(
+ {
+ "id": f"slide_{slide_idx}_shape_{shape_idx}_para_{para_idx}_run_{run_idx}",
+ "text": run.text,
+ }
+ )
+
+ session_data = {
+ "original_filename": file.filename,
+ "file_extension": file_extension,
+ "input_path": str(input_path),
+ "text_count": len(texts),
+ }
+ session_file = config.UPLOAD_DIR / f"session_{session_id}.json"
+ with open(session_file, "w", encoding="utf-8") as f:
+ json.dump(session_data, f)
+
+ logger.info(
+ f"Extracted {len(texts)} texts from {file.filename}, session: {session_id}"
+ )
+
+ return {
+ "session_id": session_id,
+ "texts": texts,
+ "file_type": file_extension,
+ "text_count": len(texts),
+ }
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.exception("Text extraction error")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "INTERNAL_ERROR",
+ "message": "Erreur lors de l'extraction des textes. Veuillez reessayer.",
+ },
+ )
+
+
+@router.post("/reconstruct-document")
+async def reconstruct_document(
+ session_id: str = Form(..., description="Session ID from extract-texts"),
+ translations: str = Form(
+ ..., description="JSON array of {id, translated_text} objects"
+ ),
+ target_language: str = Form(..., description="Target language code"),
+):
+ """Reconstruct a document with translated texts"""
+ import json
+
+ try:
+ session_file = config.UPLOAD_DIR / f"session_{session_id}.json"
+ if not session_file.exists():
+ raise HTTPException(status_code=404, detail="Session not found or expired")
+
+ with open(session_file, "r", encoding="utf-8") as f:
+ session_data = json.load(f)
+
+ input_path = Path(session_data["input_path"])
+ file_extension = session_data["file_extension"]
+ original_filename = session_data["original_filename"]
+
+ if not input_path.exists():
+ raise HTTPException(
+ status_code=404, detail="Source file not found or expired"
+ )
+
+ translation_list = json.loads(translations)
+ translation_map = {t["id"]: t["translated_text"] for t in translation_list}
+
+ output_filename = file_handler.generate_unique_filename(
+ original_filename, "translated"
+ )
+ output_path = config.OUTPUT_DIR / output_filename
+
+ if file_extension == ".xlsx":
+ from openpyxl import load_workbook
+ import shutil
+
+ shutil.copy(input_path, output_path)
+ wb = load_workbook(output_path)
+ for sheet in wb.worksheets:
+ for row in sheet.iter_rows():
+ for cell in row:
+ cell_id = f"{sheet.title}!{cell.coordinate}"
+ if cell_id in translation_map:
+ cell.value = translation_map[cell_id]
+ wb.save(output_path)
+ wb.close()
+
+ elif file_extension == ".docx":
+ from docx import Document
+ import shutil
+
+ shutil.copy(input_path, output_path)
+ doc = Document(output_path)
+ para_idx = 0
+ for para in doc.paragraphs:
+ para_id = f"para_{para_idx}"
+ if para_id in translation_map and para.text.strip():
+ for run in para.runs:
+ run.text = ""
+ if para.runs:
+ para.runs[0].text = translation_map[para_id]
+ else:
+ para.text = translation_map[para_id]
+ para_idx += 1
+ table_idx = 0
+ for table in doc.tables:
+ for row_idx, row in enumerate(table.rows):
+ for cell_idx, cell in enumerate(row.cells):
+ cell_id = f"table_{table_idx}_r{row_idx}_c{cell_idx}"
+ if cell_id in translation_map:
+ for para in cell.paragraphs:
+ for run in para.runs:
+ run.text = ""
+ if cell.paragraphs and cell.paragraphs[0].runs:
+ cell.paragraphs[0].runs[0].text = translation_map[
+ cell_id
+ ]
+ elif cell.paragraphs:
+ cell.paragraphs[0].text = translation_map[cell_id]
+ table_idx += 1
+ doc.save(output_path)
+
+ elif file_extension == ".pptx":
+ from pptx import Presentation
+ import shutil
+
+ shutil.copy(input_path, output_path)
+ prs = Presentation(output_path)
+ for slide_idx, slide in enumerate(prs.slides):
+ for shape_idx, shape in enumerate(slide.shapes):
+ if shape.has_text_frame:
+ for para_idx, para in enumerate(shape.text_frame.paragraphs):
+ for run_idx, run in enumerate(para.runs):
+ run_id = f"slide_{slide_idx}_shape_{shape_idx}_para_{para_idx}_run_{run_idx}"
+ if run_id in translation_map:
+ run.text = translation_map[run_id]
+ prs.save(output_path)
+
+ file_handler.cleanup_file(input_path)
+ file_handler.cleanup_file(session_file)
+
+ logger.info(f"Reconstructed document: {output_path}")
+
+ return FileResponse(
+ path=output_path,
+ filename=f"translated_{original_filename}",
+ media_type="application/octet-stream",
+ )
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.exception("Reconstruction error")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "INTERNAL_ERROR",
+ "message": "Erreur lors de la reconstruction du document. Veuillez reessayer.",
+ },
+ )
+
+
+@router.get("/ollama/models")
+async def list_ollama_models(base_url: Optional[str] = None):
+ """List available Ollama models"""
+ from services.translation_service import OllamaTranslationProvider
+
+ url = base_url or config.OLLAMA_BASE_URL
+ models = OllamaTranslationProvider.list_models(url)
+
+ return {"ollama_url": url, "models": models, "count": len(models)}
+
+
+@router.post("/ollama/configure")
+async def configure_ollama(base_url: str = Form(...), model: str = Form(...)):
+ """Configure Ollama settings"""
+ config.OLLAMA_BASE_URL = base_url
+ config.OLLAMA_MODEL = model
+
+ return {
+ "status": "success",
+ "message": "Ollama configuration updated",
+ "ollama_url": base_url,
+ "model": model,
+ }
+
+
+@router.get("/metrics")
+async def get_metrics():
+ """Get system metrics and statistics for monitoring"""
+ from middleware.cleanup import create_cleanup_manager
+ from middleware.rate_limiting import RateLimitManager, RateLimitConfig
+
+ cleanup_manager = create_cleanup_manager(config)
+ rate_limit_config = RateLimitConfig(
+ requests_per_minute=config.RATE_LIMIT_PER_MINUTE,
+ requests_per_hour=config.RATE_LIMIT_PER_HOUR,
+ translations_per_minute=config.TRANSLATIONS_PER_MINUTE,
+ translations_per_hour=config.TRANSLATIONS_PER_HOUR,
+ max_concurrent_translations=config.MAX_CONCURRENT_TRANSLATIONS,
+ )
+ rate_limit_manager = RateLimitManager(rate_limit_config)
+
+ cleanup_stats = cleanup_manager.get_stats()
+ rate_limit_stats = rate_limit_manager.get_stats()
+
+ return {
+ "system": {
+ "memory": {},
+ "disk": {},
+ "status": "healthy",
+ },
+ "cleanup": cleanup_stats,
+ "rate_limits": rate_limit_stats,
+ "config": {
+ "max_file_size_mb": config.MAX_FILE_SIZE_MB,
+ "supported_extensions": list(config.SUPPORTED_EXTENSIONS),
+ "translation_service": config.TRANSLATION_SERVICE,
+ },
+ }
+
+
+@router.get("/rate-limit/status")
+async def get_rate_limit_status(request: Request):
+ """Get current rate limit status for the requesting client"""
+ from middleware.rate_limiting import RateLimitManager, RateLimitConfig
+
+ rate_limit_config = RateLimitConfig(
+ requests_per_minute=config.RATE_LIMIT_PER_MINUTE,
+ requests_per_hour=config.RATE_LIMIT_PER_HOUR,
+ translations_per_minute=config.TRANSLATIONS_PER_MINUTE,
+ translations_per_hour=config.TRANSLATIONS_PER_HOUR,
+ max_concurrent_translations=config.MAX_CONCURRENT_TRANSLATIONS,
+ )
+ rate_limit_manager = RateLimitManager(rate_limit_config)
+
+ client_ip = request.client.host if request.client else "unknown"
+ status = await rate_limit_manager.get_client_status(client_ip)
+
+ return {
+ "client_ip": client_ip,
+ "limits": {
+ "requests_per_minute": rate_limit_config.requests_per_minute,
+ "requests_per_hour": rate_limit_config.requests_per_hour,
+ "translations_per_minute": rate_limit_config.translations_per_minute,
+ "translations_per_hour": rate_limit_config.translations_per_hour,
+ },
+ "current_usage": status,
+ }
diff --git a/routes/prompt_routes.py b/routes/prompt_routes.py
new file mode 100644
index 0000000..b647bb8
--- /dev/null
+++ b/routes/prompt_routes.py
@@ -0,0 +1,377 @@
+"""
+Custom Prompt CRUD routes for Pro users
+Story 3.11: Custom Prompts - Endpoint CRUD
+"""
+
+import logging
+from datetime import datetime, timezone
+from typing import Optional
+
+from fastapi import APIRouter, Depends, Query
+from fastapi.responses import JSONResponse
+
+from routes.deps import require_pro_user, ProUser
+from database.connection import get_sync_session
+from database.models import CustomPrompt
+from schemas.prompt_schemas import (
+ PromptCreate,
+ PromptUpdate,
+ PromptResponse,
+ PromptListItem,
+ PromptListResponse,
+ PromptDetailResponse,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/prompts", tags=["Prompts v1"])
+
+DEFAULT_PAGE_SIZE = 50
+MAX_PAGE_SIZE = 100
+
+
+def _validate_uuid(prompt_id: str) -> tuple[bool, dict | None]:
+ """Validate UUID format. Returns (is_valid, error_response)."""
+ import uuid as uuid_lib
+
+ try:
+ uuid_lib.UUID(prompt_id)
+ return True, None
+ except ValueError:
+ return False, {
+ "error": "INVALID_PROMPT_ID",
+ "message": "Format d'identifiant de prompt invalide.",
+ }
+
+
+def _format_prompt(prompt: CustomPrompt) -> dict:
+ """Format a CustomPrompt for JSON response."""
+ return {
+ "id": prompt.id,
+ "name": prompt.name,
+ "content": prompt.content,
+ "created_at": prompt.created_at.isoformat() if prompt.created_at else None,
+ "updated_at": prompt.updated_at.isoformat() if prompt.updated_at else None,
+ }
+
+
+def _format_prompt_list_item(prompt: CustomPrompt) -> dict:
+ """Format a CustomPrompt for list view with content preview."""
+ content_preview = prompt.content[:100] if prompt.content else ""
+ return {
+ "id": prompt.id,
+ "name": prompt.name,
+ "content_preview": content_preview,
+ "created_at": prompt.created_at.isoformat() if prompt.created_at else None,
+ "updated_at": prompt.updated_at.isoformat() if prompt.updated_at else None,
+ }
+
+
+@router.post(
+ "",
+ response_model=PromptDetailResponse,
+ status_code=201,
+ summary="Créer un prompt",
+ description="""
+ Crée un nouveau prompt système personnalisé.
+
+ **Restriction:** Uniquement disponible pour les utilisateurs Pro.
+
+ **Exemple de requête:**
+ ```json
+ {
+ "name": "Prompt Technique FR-EN",
+ "content": "Tu es un traducteur technique expert. Traduis en préservant la terminologie technique..."
+ }
+ ```
+ """,
+)
+async def create_prompt(
+ body: PromptCreate,
+ user: ProUser = Depends(require_pro_user),
+):
+ """Create a new prompt for the authenticated Pro user."""
+ try:
+ with get_sync_session() as session:
+ prompt = CustomPrompt(
+ user_id=user.id,
+ name=body.name,
+ content=body.content,
+ created_at=datetime.now(timezone.utc),
+ updated_at=datetime.now(timezone.utc),
+ )
+
+ session.add(prompt)
+ session.commit()
+ session.refresh(prompt)
+
+ logger.info(
+ f"Prompt created: id={prompt.id}, user_id={user.id}, name={prompt.name}"
+ )
+
+ return JSONResponse(
+ status_code=201,
+ content={
+ "data": _format_prompt(prompt),
+ "meta": {},
+ },
+ )
+ except Exception as e:
+ logger.error(f"Failed to create prompt for user {user.id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "DATABASE_ERROR",
+ "message": "Une erreur est survenue lors de la création du prompt.",
+ },
+ )
+
+
+@router.get(
+ "",
+ response_model=PromptListResponse,
+ summary="Lister les prompts",
+ description="Retourne la liste paginée des prompts de l'utilisateur.",
+)
+async def list_prompts(
+ page: int = Query(1, ge=1, description="Numéro de page"),
+ per_page: int = Query(
+ DEFAULT_PAGE_SIZE, ge=1, le=MAX_PAGE_SIZE, description="Éléments par page"
+ ),
+ user: ProUser = Depends(require_pro_user),
+):
+ """List all prompts for the authenticated Pro user with pagination."""
+ try:
+ with get_sync_session() as session:
+ total_count = (
+ session.query(CustomPrompt)
+ .filter(CustomPrompt.user_id == user.id)
+ .count()
+ )
+
+ offset = (page - 1) * per_page
+ prompts = (
+ session.query(CustomPrompt)
+ .filter(CustomPrompt.user_id == user.id)
+ .order_by(CustomPrompt.created_at.desc())
+ .offset(offset)
+ .limit(per_page)
+ .all()
+ )
+
+ items = [
+ PromptListItem(
+ id=p.id,
+ name=p.name,
+ content_preview=p.content[:100] if p.content else "",
+ created_at=p.created_at,
+ updated_at=p.updated_at,
+ )
+ for p in prompts
+ ]
+
+ total_pages = (total_count + per_page - 1) // per_page
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": [item.model_dump(mode="json") for item in items],
+ "meta": {
+ "total": total_count,
+ "page": page,
+ "per_page": per_page,
+ "total_pages": total_pages,
+ },
+ },
+ )
+ except Exception as e:
+ logger.error(f"Failed to list prompts for user {user.id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "DATABASE_ERROR",
+ "message": "Une erreur est survenue lors de la récupération des prompts.",
+ },
+ )
+
+
+@router.get(
+ "/{prompt_id}",
+ response_model=PromptDetailResponse,
+ summary="Détail d'un prompt",
+ description="Retourne les détails d'un prompt spécifique.",
+)
+async def get_prompt(
+ prompt_id: str,
+ user: ProUser = Depends(require_pro_user),
+):
+ """Get a specific prompt by ID."""
+ is_valid, error = _validate_uuid(prompt_id)
+ if not is_valid:
+ return JSONResponse(status_code=400, content=error)
+
+ try:
+ with get_sync_session() as session:
+ prompt = (
+ session.query(CustomPrompt)
+ .filter(CustomPrompt.id == prompt_id, CustomPrompt.user_id == user.id)
+ .first()
+ )
+
+ if not prompt:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "PROMPT_NOT_FOUND",
+ "message": "Prompt introuvable ou vous n'avez pas accès à cette ressource.",
+ },
+ )
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": _format_prompt(prompt),
+ "meta": {},
+ },
+ )
+ except Exception as e:
+ logger.error(f"Failed to get prompt {prompt_id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "DATABASE_ERROR",
+ "message": "Une erreur est survenue.",
+ },
+ )
+
+
+@router.patch(
+ "/{prompt_id}",
+ response_model=PromptDetailResponse,
+ summary="Mettre à jour un prompt",
+ description="Met à jour le nom et/ou le contenu d'un prompt existant.",
+)
+async def update_prompt(
+ prompt_id: str,
+ body: PromptUpdate,
+ user: ProUser = Depends(require_pro_user),
+):
+ """Update a prompt's name and/or content."""
+ if not body.has_updates():
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "NO_UPDATE_FIELDS",
+ "message": "Au moins un champ (name ou content) doit être fourni.",
+ },
+ )
+
+ is_valid, error = _validate_uuid(prompt_id)
+ if not is_valid:
+ return JSONResponse(status_code=400, content=error)
+
+ try:
+ with get_sync_session() as session:
+ prompt = (
+ session.query(CustomPrompt)
+ .filter(CustomPrompt.id == prompt_id, CustomPrompt.user_id == user.id)
+ .first()
+ )
+
+ if not prompt:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "PROMPT_NOT_FOUND",
+ "message": "Prompt introuvable ou vous n'avez pas accès à cette ressource.",
+ },
+ )
+
+ old_name = prompt.name
+
+ if body.name is not None:
+ prompt.name = body.name
+
+ if body.content is not None:
+ prompt.content = body.content
+
+ prompt.updated_at = datetime.now(timezone.utc)
+ session.commit()
+ session.refresh(prompt)
+
+ logger.info(
+ f"Prompt updated: id={prompt.id}, user_id={user.id}, "
+ f"old_name={old_name}, new_name={prompt.name}"
+ )
+
+ return JSONResponse(
+ status_code=200,
+ content={
+ "data": _format_prompt(prompt),
+ "meta": {},
+ },
+ )
+ except Exception as e:
+ logger.error(f"Failed to update prompt {prompt_id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "DATABASE_ERROR",
+ "message": "Une erreur est survenue lors de la mise à jour.",
+ },
+ )
+
+
+@router.delete(
+ "/{prompt_id}",
+ status_code=204,
+ summary="Supprimer un prompt",
+ description="Supprime un prompt.",
+)
+async def delete_prompt(
+ prompt_id: str,
+ user: ProUser = Depends(require_pro_user),
+):
+ """Delete a prompt."""
+ is_valid, error = _validate_uuid(prompt_id)
+ if not is_valid:
+ return JSONResponse(status_code=400, content=error)
+
+ try:
+ with get_sync_session() as session:
+ prompt = (
+ session.query(CustomPrompt)
+ .filter(CustomPrompt.id == prompt_id, CustomPrompt.user_id == user.id)
+ .first()
+ )
+
+ if not prompt:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "PROMPT_NOT_FOUND",
+ "message": "Prompt introuvable ou vous n'avez pas accès à cette ressource.",
+ },
+ )
+
+ prompt_name = prompt.name
+ session.delete(prompt)
+ session.commit()
+
+ logger.info(
+ f"Prompt deleted: id={prompt_id}, user_id={user.id}, name={prompt_name}"
+ )
+
+ return JSONResponse(
+ status_code=204,
+ content=None,
+ )
+ except Exception as e:
+ logger.error(f"Failed to delete prompt {prompt_id}: {e}")
+ return JSONResponse(
+ status_code=500,
+ content={
+ "error": "DATABASE_ERROR",
+ "message": "Une erreur est survenue lors de la suppression.",
+ },
+ )
diff --git a/routes/translate_routes.py b/routes/translate_routes.py
new file mode 100644
index 0000000..c32597b
--- /dev/null
+++ b/routes/translate_routes.py
@@ -0,0 +1,1314 @@
+"""
+API v1 Translate Endpoint (Story 2.10, 2.11, 2.12, 3.6)
+POST /api/v1/translate - Submit document for translation
+GET /api/v1/translations/{id} - Get translation status with real-time progress
+GET /api/v1/download/{id} - Download translated file
+
+Story 3.6: Documentation OpenAPI complète avec exemples et codes d'erreur
+"""
+
+import os
+import re
+import uuid
+import time
+import socket
+import asyncio
+import ipaddress
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional, Any, Literal, Dict
+from urllib.parse import urlparse, unquote
+
+import aiofiles
+
+JOB_ID_PATTERN = re.compile(r"^tr_[a-zA-Z0-9_\-]+$")
+
+import httpx
+from fastapi import (
+ APIRouter,
+ File,
+ Form,
+ Header,
+ HTTPException,
+ Request,
+ UploadFile,
+ Depends,
+)
+from fastapi.responses import JSONResponse, FileResponse
+from starlette.background import BackgroundTask
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from pydantic import BaseModel, Field, field_validator
+from typing_extensions import Annotated
+
+from config import config
+from models.subscription import PlanType
+from middleware.tier_quota import tier_quota_service
+from middleware.validation import FileValidator, ValidationError, LanguageValidator, webhook_validator
+from middleware.api_key_auth import get_authenticated_user, get_user_from_api_key
+from utils import file_handler
+
+# Import models from schemas (Story 3.6 - DRY principle)
+from schemas.translation import (
+ TranslateResponseData,
+ TranslateResponseMeta,
+ TranslateResponse,
+ TranslationStatusData,
+ TranslationStatusMeta,
+ TranslationStatusResponse,
+)
+from schemas.errors import ErrorResponse
+from utils.file_handler import FileHandler
+from services.progress_tracker import ProgressTracker
+from services.storage_tracker import storage_tracker
+from services.glossary_service import get_glossary_terms, validate_glossary_access, build_full_prompt
+from services.prompt_service import get_prompt_content, validate_prompt_access
+from utils.exceptions import GlossaryNotFoundError, PromptNotFoundError
+
+logger = logging.getLogger(__name__)
+
+router_v1 = APIRouter(prefix="/api/v1", tags=["Translation v1"])
+security = HTTPBearer(auto_error=False)
+
+
+MAX_FILE_SIZE_MB = 50
+OFFICE_MAGIC_BYTES = b"PK\x03\x04"
+ACCEPTED_EXTENSIONS = {".xlsx", ".docx", ".pptx"}
+
+
+class TranslateEndpointError(Exception):
+ """Exception for translate endpoint errors with structured error codes."""
+
+ INVALID_FORMAT = "INVALID_FORMAT"
+ CORRUPTED_FILE = "CORRUPTED_FILE"
+ FILE_TOO_LARGE = "FILE_TOO_LARGE"
+ QUOTA_EXCEEDED = "QUOTA_EXCEEDED"
+ URL_DOWNLOAD_FAILED = "URL_DOWNLOAD_FAILED"
+ URL_UNREACHABLE = "URL_UNREACHABLE"
+ UNAUTHORIZED = "UNAUTHORIZED"
+ MISSING_FILE = "MISSING_FILE"
+ PRO_FEATURE_REQUIRED = "PRO_FEATURE_REQUIRED"
+
+ ERROR_MESSAGES = {
+ INVALID_FORMAT: "Format de fichier non supporte. Formats acceptes : .xlsx, .docx, .pptx",
+ CORRUPTED_FILE: "Le fichier semble corrompu ou n'est pas un document Office valide.",
+ FILE_TOO_LARGE: f"Le fichier est trop volumineux (max {MAX_FILE_SIZE_MB} Mo).",
+ QUOTA_EXCEEDED: "Limite quotidienne atteinte.",
+ URL_DOWNLOAD_FAILED: "Impossible de telecharger le fichier depuis l'URL.",
+ URL_UNREACHABLE: "URL inaccessible.",
+ UNAUTHORIZED: "Authentification requise.",
+ MISSING_FILE: "Fichier ou URL requis.",
+ PRO_FEATURE_REQUIRED: "Cette fonctionnalite necessite un abonnement Pro.",
+ }
+
+ def __init__(
+ self, code: str, message: Optional[str] = None, details: Optional[dict] = None
+ ):
+ self.code = code
+ self.message = message or self.ERROR_MESSAGES.get(code, "Erreur inconnue")
+ self.details = details or {}
+ super().__init__(self.message)
+
+ def to_dict(self) -> dict:
+ result = {
+ "error": self.code,
+ "message": self.message,
+ }
+ if self.details:
+ result["details"] = self.details
+ return result
+
+
+# NOTE: Response models are now imported from schemas/ module (DRY principle)
+# TranslateResponseData, TranslateResponseMeta, TranslateResponse,
+# TranslationStatusData, TranslationStatusMeta, TranslationStatusResponse, ErrorResponse
+
+file_validator = FileValidator(
+ max_size_mb=MAX_FILE_SIZE_MB, allowed_extensions=ACCEPTED_EXTENSIONS
+)
+file_handler_util = FileHandler()
+
+
+def _tier_for_quota(plan) -> str:
+ """Map plan to quota tier: pro (and equivalent) = unlimited, else free."""
+ if plan in (PlanType.PRO, PlanType.BUSINESS, PlanType.ENTERPRISE):
+ return "pro"
+ return "free"
+
+
+def _next_midnight_utc() -> datetime:
+ """Get next midnight UTC."""
+ now = datetime.now(timezone.utc)
+ from datetime import timedelta
+
+ tomorrow = now.date() + timedelta(days=1)
+ return datetime(tomorrow.year, tomorrow.month, tomorrow.day, tzinfo=timezone.utc)
+
+
+def _seconds_until_midnight_utc() -> int:
+ """Seconds until next midnight UTC."""
+ now = datetime.now(timezone.utc)
+ next_mid = _next_midnight_utc()
+ return max(0, int((next_mid - now).total_seconds()))
+
+
+async def validate_file_content(content: bytes, extension: str) -> None:
+ """Validate file content by checking magic bytes."""
+ if len(content) < 4:
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.CORRUPTED_FILE,
+ message="Le fichier est trop petit pour etre un document Office valide.",
+ details={"reason": "File is too small"},
+ )
+
+ header = content[:4]
+ if header != OFFICE_MAGIC_BYTES:
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.CORRUPTED_FILE,
+ message="Le fichier n'est pas un document Office valide ou est corrompu.",
+ details={
+ "accepted_formats": list(ACCEPTED_EXTENSIONS),
+ "hint": "Les fichiers .xlsx, .docx, .pptx doivent etre des archives ZIP valides.",
+ },
+ )
+
+
+def _parse_content_disposition(content_disp: str) -> Optional[str]:
+ """Parse filename from Content-Disposition header (RFC 5987 compliant)."""
+ import re
+
+ for part in content_disp.split(";"):
+ part = part.strip()
+ if part.lower().startswith("filename*="):
+ match = re.match(r"filename\*=([^']+)'([^']*)'(.+)", part, re.IGNORECASE)
+ if match:
+ from urllib.parse import unquote
+
+ return unquote(match.group(3))
+ if part.lower().startswith("filename="):
+ filename = part.split("=", 1)[1].strip().strip('"').strip("'")
+ if filename:
+ return filename
+ return None
+
+
+def _is_ssrf_risk(hostname: str) -> bool:
+ """Return True if hostname resolves to a private/reserved IP (SSRF prevention).
+
+ Blocks: loopback, private, link-local, reserved, multicast ranges.
+ Also blocks DNS resolution failures to avoid bypass via non-resolvable names.
+ """
+ try:
+ ip_str = socket.gethostbyname(hostname)
+ addr = ipaddress.ip_address(ip_str)
+ return (
+ addr.is_loopback
+ or addr.is_private
+ or addr.is_link_local
+ or addr.is_reserved
+ or addr.is_multicast
+ or addr.is_unspecified
+ )
+ except Exception:
+ return True
+
+
+async def download_from_url(url: str, timeout: int = 30) -> tuple[Path, str]:
+ """Download file from URL using streaming and return (temp_path, filename).
+
+ Uses HTTP streaming to avoid loading entire file in memory.
+ Validates file extension and magic bytes for security.
+ """
+ temp_path = None
+
+ parsed_url = urlparse(url)
+ if parsed_url.scheme not in ("http", "https"):
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.URL_UNREACHABLE,
+ message="Seules les URLs HTTP/HTTPS sont acceptees.",
+ details={"scheme": parsed_url.scheme or "none"},
+ )
+
+ hostname = parsed_url.hostname or ""
+ if not hostname or _is_ssrf_risk(hostname):
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.URL_UNREACHABLE,
+ message="L'URL pointe vers une adresse interdite (adresse privee ou interne).",
+ details={"reason": "ssrf_blocked"},
+ )
+
+ try:
+ async with httpx.AsyncClient(
+ timeout=timeout, follow_redirects=True, max_redirects=10
+ ) as client:
+ async with client.stream("GET", url) as response:
+ if response.status_code != 200:
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.URL_UNREACHABLE,
+ message=f"URL inaccessible (HTTP {response.status_code})",
+ details={"status_code": response.status_code, "url": url[:100]},
+ )
+
+ content_length = response.headers.get("content-length")
+ if content_length:
+ try:
+ file_size = int(content_length)
+ max_size_bytes = MAX_FILE_SIZE_MB * 1024 * 1024
+ if file_size > max_size_bytes:
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.FILE_TOO_LARGE,
+ message=f"Le fichier est trop volumineux ({round(file_size / (1024 * 1024), 2)} Mo, max {MAX_FILE_SIZE_MB} Mo).",
+ details={
+ "size_mb": round(file_size / (1024 * 1024), 2),
+ "max_mb": MAX_FILE_SIZE_MB,
+ },
+ )
+ except ValueError:
+ pass
+
+ filename = None
+ content_disp = response.headers.get("content-disposition", "")
+ if content_disp:
+ filename = _parse_content_disposition(content_disp)
+
+ if not filename:
+ filename = unquote(Path(parsed_url.path).name) or "downloaded_file"
+
+ extension = Path(filename).suffix.lower()
+ if extension not in ACCEPTED_EXTENSIONS:
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.INVALID_FORMAT,
+ details={
+ "detected_extension": extension or "none",
+ "accepted_formats": list(ACCEPTED_EXTENSIONS),
+ },
+ )
+
+ unique_id = str(uuid.uuid4())[:8]
+ safe_filename = f"{unique_id}_{filename}"
+ temp_path = config.UPLOAD_DIR / safe_filename
+
+ temp_path.parent.mkdir(parents=True, exist_ok=True)
+
+ max_size_bytes = MAX_FILE_SIZE_MB * 1024 * 1024
+ downloaded_bytes = 0
+
+ async with aiofiles.open(temp_path, "wb") as f:
+ async for chunk in response.aiter_bytes(chunk_size=65536):
+ downloaded_bytes += len(chunk)
+
+ if downloaded_bytes > max_size_bytes:
+ await f.close()
+ if temp_path.exists():
+ temp_path.unlink()
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.FILE_TOO_LARGE,
+ details={
+ "size_mb": round(
+ downloaded_bytes / (1024 * 1024), 2
+ ),
+ "max_mb": MAX_FILE_SIZE_MB,
+ },
+ )
+
+ await f.write(chunk)
+
+ async with aiofiles.open(temp_path, "rb") as f:
+ header = await f.read(4)
+ await validate_file_content(header, extension)
+
+ return temp_path, filename
+
+ except httpx.TimeoutException:
+ if temp_path and temp_path.exists():
+ temp_path.unlink()
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.URL_UNREACHABLE,
+ message="Timeout lors du telechargement.",
+ details={"timeout_seconds": timeout},
+ )
+ except httpx.RequestError as e:
+ if temp_path and temp_path.exists():
+ temp_path.unlink()
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.URL_DOWNLOAD_FAILED,
+ message=f"Erreur de telechargement: {str(e)}",
+ details={"error": str(e)},
+ )
+ except TranslateEndpointError:
+ if temp_path and temp_path.exists():
+ temp_path.unlink()
+ raise
+ except Exception as e:
+ if temp_path and temp_path.exists():
+ temp_path.unlink()
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.URL_DOWNLOAD_FAILED,
+ message=f"Erreur inattendue lors du telechargement: {str(e)}",
+ details={"error": str(e), "error_type": type(e).__name__},
+ )
+
+
+
+_translation_jobs: dict[str, dict] = {}
+_JOB_TTL_SECONDS = 3600
+_last_cleanup_ts: float = 0.0
+_CLEANUP_INTERVAL_SECONDS = 300 # run cleanup every 5 minutes at most
+
+
+def _cleanup_old_jobs() -> None:
+ """Remove completed/failed jobs older than TTL to prevent memory leak.
+
+ Throttled to run at most every _CLEANUP_INTERVAL_SECONDS to avoid
+ iterating the full dict on every translation request.
+ """
+ global _last_cleanup_ts
+ current_time = time.time()
+ if current_time - _last_cleanup_ts < _CLEANUP_INTERVAL_SECONDS:
+ return
+ _last_cleanup_ts = current_time
+
+ expired_job_ids = [
+ job_id
+ for job_id, job in _translation_jobs.items()
+ if job.get("status") in ("completed", "failed")
+ and (
+ (ts := job.get("completed_at") or job.get("failed_at"))
+ and _job_age_seconds(ts) > _JOB_TTL_SECONDS
+ )
+ ]
+
+ for job_id in expired_job_ids:
+ del _translation_jobs[job_id]
+ logger.debug(f"Cleaned up expired job: {job_id}")
+
+
+def _job_age_seconds(timestamp_str: str) -> float:
+ """Return how many seconds ago a ISO timestamp was."""
+ try:
+ ts = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")).timestamp()
+ return time.time() - ts
+ except Exception:
+ return 0.0
+
+
+@router_v1.post(
+ "/translate",
+ response_model=TranslateResponse,
+ responses={
+ 202: {"description": "Translation job accepted", "model": TranslateResponse},
+ 400: {"description": "Invalid request", "model": ErrorResponse},
+ 401: {"description": "Unauthorized", "model": ErrorResponse},
+ 403: {"description": "Pro feature required", "model": ErrorResponse},
+ 413: {"description": "File too large", "model": ErrorResponse},
+ 429: {"description": "Quota exceeded", "model": ErrorResponse},
+ },
+ status_code=202,
+)
+async def translate_document_v1(
+ request: Request,
+ file: Optional[UploadFile] = File(
+ None, description="Document file (.xlsx, .docx, .pptx)"
+ ),
+ file_url: Optional[str] = Form(None, description="URL to download file (Pro only)"),
+ source_lang: str = Form(default="auto", description="Source language code"),
+ target_lang: str = Form(..., description="Target language code"),
+ mode: Literal["classic", "llm"] = Form(
+ default="classic", description="Translation mode"
+ ),
+ provider: Optional[str] = Form(default=None, description="Provider override"),
+ webhook_url: Optional[str] = Form(None, description="Webhook URL for notification"),
+ glossary_id: Optional[str] = Form(None, description="Glossary ID (Pro only)"),
+ custom_prompt: Optional[str] = Form(None, description="Custom prompt (Pro only)"),
+ prompt_id: Optional[str] = Form(None, description="Prompt ID from saved prompts (Pro only)"),
+ current_user: Optional[Any] = Depends(get_authenticated_user),
+):
+ """
+ Submit a document for translation.
+
+ **Authentication:**
+ - JWT Bearer token in Authorization header (web users)
+ - X-API-Key header (automation users)
+
+ **File Input:**
+ - `file`: Upload file directly (multipart/form-data)
+ - `file_url`: URL to download file from (Pro feature)
+
+ **Parameters:**
+ - `source_lang`: Source language code (default: auto-detect)
+ - `target_lang`: Target language code (required)
+ - `mode`: Translation mode - "classic" or "llm" (default: classic)
+ - `provider`: Provider override (google, deepl, ollama, openai, openrouter)
+ - `webhook_url`: URL to receive POST notification when complete
+ - `glossary_id`: Glossary ID for LLM translation (Pro only)
+ - `custom_prompt`: Custom system prompt (Pro only)
+ - `prompt_id`: Saved prompt ID to use (Pro only). Takes priority over custom_prompt.
+
+ **Webhook Notification:**
+ If `webhook_url` is provided, a POST request will be sent when translation completes.
+
+ **Webhook Payload (Success):**
+ ```json
+ {
+ "event_id": "evt_abc123def456xyz",
+ "translation_id": "tr_abc123def456",
+ "status": "completed",
+ "timestamp": "2024-01-15T10:30:00Z",
+ "file_name": "report.xlsx",
+ "source_lang": "en",
+ "target_lang": "fr",
+ "error_message": null
+ }
+ ```
+
+ **Webhook Payload (Failure):**
+ ```json
+ {
+ "event_id": "evt_abc123def456xyz",
+ "translation_id": "tr_abc123def456",
+ "status": "failed",
+ "timestamp": "2024-01-15T10:30:00Z",
+ "file_name": "report.xlsx",
+ "source_lang": "en",
+ "target_lang": "fr",
+ "error_message": "Provider unavailable: connection timeout"
+ }
+ ```
+
+ **Webhook Fields:**
+ - `event_id`: Unique identifier for webhook deduplication (format: evt_xxxxxxxxxxxxxxxx)
+ - `translation_id`: The translation job ID
+ - `status`: "completed" or "failed"
+ - `timestamp`: ISO 8601 UTC timestamp
+ - `file_name`: Original file name
+ - `source_lang`: Source language code
+ - `target_lang`: Target language code
+ - `error_message`: Error description (null if successful)
+
+ **Webhook Behavior:**
+ - Timeout: 10 seconds
+ - Fire & Forget: Translation succeeds even if webhook fails
+ - Retries: None (implement retry logic on your server if needed)
+
+ **Returns:**
+ - HTTP 202 with job ID and status "processing"
+ """
+ request_id = getattr(request.state, "request_id", str(uuid.uuid4())[:8])
+
+ try:
+ if not file and not file_url:
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.MISSING_FILE,
+ details={"hint": "Provide either 'file' or 'file_url' parameter"},
+ )
+
+ tier = "free"
+ user_id = None
+ if current_user:
+ tier = _tier_for_quota(current_user.plan)
+ user_id = current_user.id
+
+ if file_url:
+ if tier == "free":
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.PRO_FEATURE_REQUIRED,
+ message="L'ingestion par URL est reservee aux utilisateurs Pro.",
+ details={"feature": "file_url", "tier": tier},
+ )
+
+ # Story 3.12: Include prompt_id in Pro feature check
+ if (glossary_id or custom_prompt or prompt_id) and tier == "free":
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.PRO_FEATURE_REQUIRED,
+ message="Les glossaires et prompts personnalises sont reserves aux utilisateurs Pro.",
+ details={"feature": "glossary_id, custom_prompt, or prompt_id", "tier": tier},
+ )
+
+ # Story 3.10: Validate glossary access before creating the job
+ if glossary_id and user_id:
+ try:
+ validate_glossary_access(glossary_id, user_id)
+ except GlossaryNotFoundError as e:
+ raise TranslateEndpointError(
+ code="GLOSSARY_NOT_FOUND",
+ message=str(e),
+ details={"glossary_id": glossary_id}
+ )
+
+ # Story 3.12: Validate prompt access before creating the job
+ if prompt_id and user_id:
+ try:
+ validate_prompt_access(prompt_id, user_id)
+ except PromptNotFoundError as e:
+ raise TranslateEndpointError(
+ code="PROMPT_NOT_FOUND",
+ message=str(e),
+ details={**e.details, "prompt_id": prompt_id} if e.details else {"prompt_id": prompt_id}
+ )
+
+ if webhook_url:
+ is_valid, error_msg, error_details = webhook_validator.validate(webhook_url)
+ if not is_valid:
+ raise TranslateEndpointError(
+ code="INVALID_WEBHOOK_URL",
+ message=error_msg,
+ details=error_details,
+ )
+
+ if current_user:
+ quota = await tier_quota_service.check_quota(user_id, tier)
+ if not quota.allowed:
+ retry_after = _seconds_until_midnight_utc()
+ raise HTTPException(
+ status_code=429,
+ detail={
+ "error": "QUOTA_EXCEEDED",
+ "message": f"Limite quotidienne atteinte ({quota.current_usage}/{quota.limit} fichiers). Reessayez apres minuit UTC.",
+ "details": {
+ "current_usage": quota.current_usage,
+ "limit": quota.limit,
+ "tier": tier,
+ "reset_at": quota.reset_at_utc.isoformat(),
+ },
+ },
+ headers={"Retry-After": str(retry_after)},
+ )
+ rate_limit_remaining = quota.remaining
+ else:
+ rate_limit_remaining = -1
+
+ try:
+ LanguageValidator.validate(target_lang)
+ except ValidationError as e:
+ raise TranslateEndpointError(
+ code="INVALID_FORMAT",
+ message=f"Code langue cible invalide: {target_lang}",
+ details={"field": "target_lang"},
+ )
+
+ if source_lang and source_lang != "auto":
+ try:
+ LanguageValidator.validate(source_lang)
+ except ValidationError:
+ raise TranslateEndpointError(
+ code="INVALID_FORMAT",
+ message=f"Code langue source invalide: {source_lang}",
+ details={"field": "source_lang"},
+ )
+
+ input_path = None
+ original_filename = None
+ file_extension = None
+ file_size = 0
+ file_hash = None
+
+ if file:
+ validation_result = await file_validator.validate_async(file)
+ if not validation_result.is_valid:
+ error_msg = "; ".join(validation_result.errors)
+
+ # Use structured error codes from validator
+ if validation_result.error_code == "file_too_large":
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.FILE_TOO_LARGE,
+ message=error_msg,
+ details={
+ "errors": validation_result.errors,
+ "max_size_mb": MAX_FILE_SIZE_MB,
+ },
+ )
+ elif validation_result.error_code == "invalid_file_content":
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.CORRUPTED_FILE,
+ message=error_msg,
+ details={"errors": validation_result.errors},
+ )
+ else:
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.INVALID_FORMAT,
+ message=error_msg,
+ details={"errors": validation_result.errors},
+ )
+
+ original_filename = file.filename
+ file_extension = validation_result.data.get("extension")
+ file_size = validation_result.data.get("size_bytes", 0)
+
+ input_filename = file_handler_util.generate_unique_filename(
+ file.filename, "input"
+ )
+ input_path = config.UPLOAD_DIR / input_filename
+ await file_handler_util.save_upload_file(file, input_path)
+
+ file_hash = file_handler_util.calculate_sha256(input_path)
+ if file_hash is None:
+ file_handler_util.cleanup_file(input_path)
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.CORRUPTED_FILE,
+ message="Impossible de calculer le hash du fichier. Fichier potentiellement corrompu.",
+ details={"error": "sha256_calculation_failed"},
+ )
+
+ elif file_url:
+ input_path, original_filename = await download_from_url(file_url)
+ file_extension = Path(original_filename).suffix.lower()
+ file_size = input_path.stat().st_size
+ file_hash = file_handler_util.calculate_sha256(input_path)
+ if file_hash is None:
+ file_handler_util.cleanup_file(input_path)
+ raise TranslateEndpointError(
+ code=TranslateEndpointError.CORRUPTED_FILE,
+ message="Impossible de calculer le hash du fichier telecharge.",
+ details={"error": "sha256_calculation_failed"},
+ )
+
+ job_id = f"tr_{uuid.uuid4().hex[:12]}"
+
+ # Track file metadata in Redis with TTL
+ await storage_tracker.track_file(
+ job_id=job_id,
+ metadata={
+ "original_filename": original_filename,
+ "file_size": file_size,
+ "file_hash": file_hash,
+ "input_path": str(input_path),
+ "user_id": str(user_id) if user_id else None,
+ "timestamp": datetime.now(timezone.utc).isoformat(),
+ },
+ )
+
+ _cleanup_old_jobs()
+
+ _translation_jobs[job_id] = {
+ "id": job_id,
+ "status": "queued",
+ "progress_percent": 0,
+ "current_step": "Initializing",
+ "total_items": 0,
+ "processed_items": 0,
+ "error_message": None,
+ "file_name": original_filename,
+ "source_lang": source_lang,
+ "target_lang": target_lang,
+ "created_at": datetime.now(timezone.utc).isoformat(),
+ "user_id": user_id,
+ "input_path": str(input_path),
+ "file_extension": file_extension,
+ "provider": provider or mode,
+ "webhook_url": webhook_url,
+ "custom_prompt": custom_prompt,
+ "glossary_id": glossary_id,
+ "prompt_id": prompt_id, # Story 3.12: Store prompt_id
+ }
+
+ provider_to_use = provider or ("openrouter" if mode == "llm" else "google")
+
+ asyncio.create_task(
+ _run_translation_job(
+ job_id=job_id,
+ input_path=input_path,
+ file_extension=file_extension,
+ target_lang=target_lang,
+ source_lang=source_lang,
+ provider=provider_to_use,
+ user_id=user_id,
+ custom_prompt=custom_prompt,
+ glossary_id=glossary_id,
+ prompt_id=prompt_id, # Story 3.12: Pass prompt_id
+ webhook_url=webhook_url,
+ )
+ )
+
+ logger.info(
+ f"[{request_id}] Created translation job {job_id} for {original_filename}"
+ )
+
+ return JSONResponse(
+ status_code=202,
+ content={
+ "data": {
+ "id": job_id,
+ "status": "processing",
+ "file_name": original_filename,
+ "source_lang": source_lang,
+ "target_lang": target_lang,
+ },
+ "meta": {
+ "rate_limit_remaining": rate_limit_remaining,
+ "estimated_time_seconds": 15,
+ },
+ },
+ )
+
+ except TranslateEndpointError as e:
+ status_code = 400
+ if e.code == TranslateEndpointError.FILE_TOO_LARGE:
+ status_code = 413
+ elif e.code == TranslateEndpointError.UNAUTHORIZED:
+ status_code = 401
+ elif e.code == TranslateEndpointError.PRO_FEATURE_REQUIRED:
+ status_code = 403
+
+ return JSONResponse(
+ status_code=status_code,
+ content=e.to_dict(),
+ )
+ except HTTPException:
+ raise
+ except Exception as e:
+ logger.error(f"[{request_id}] Unexpected error: {e}")
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "PROCESSING_ERROR",
+ "message": "Erreur lors du traitement de la requete.",
+ "details": {"error_type": type(e).__name__},
+ },
+ )
+
+
+async def _run_translation_job(
+ job_id: str,
+ input_path: Path,
+ file_extension: str,
+ target_lang: str,
+ source_lang: str,
+ provider: str,
+ user_id: Optional[str],
+ custom_prompt: Optional[str],
+ glossary_id: Optional[str],
+ prompt_id: Optional[str] = None, # Story 3.12: Add prompt_id parameter
+ webhook_url: Optional[str] = None,
+) -> None:
+ """
+ Run translation job in background with progress tracking.
+
+ Args:
+ job_id: Unique job identifier
+ input_path: Path to input file
+ file_extension: File extension (.xlsx, .docx, .pptx)
+ target_lang: Target language code
+ source_lang: Source language code
+ provider: Translation provider name
+ user_id: Optional user ID for quota tracking
+ custom_prompt: Optional custom prompt text (Pro only)
+ glossary_id: Optional glossary ID for LLM translation (Pro only)
+ prompt_id: Optional saved prompt ID - takes priority over custom_prompt (Pro only, Story 3.12)
+ webhook_url: Optional webhook URL for completion notification
+ """
+ job = _translation_jobs.get(job_id)
+ if not job:
+ return
+
+ tracker = ProgressTracker(job_id, _translation_jobs)
+
+ try:
+ job["status"] = "processing"
+ tracker.update(10, "Validating file")
+
+ output_filename = file_handler_util.generate_unique_filename(
+ input_path.name.replace("input_", "translated_"), "translated"
+ )
+ output_path = config.OUTPUT_DIR / output_filename
+
+ from translators import excel_translator, word_translator, pptx_translator
+ from services.translation_service import (
+ OpenRouterTranslationProvider,
+ OllamaTranslationProvider,
+ translation_service,
+ )
+ from routes.admin_routes import load_settings as _load_admin_settings
+
+ _admin_cfg = _load_admin_settings()
+
+ # Helper: prefer value from admin settings JSON, fall back to env var
+ def _cfg(admin_val: str | None, env_var: str, default: str = "") -> str:
+ return (admin_val or "").strip() or os.getenv(env_var, default)
+
+ api_key = _cfg(_admin_cfg.openrouter.api_key, "OPENROUTER_API_KEY")
+ model = _cfg(_admin_cfg.openrouter.model, "OPENROUTER_MODEL", "deepseek/deepseek-v3.2")
+
+ # Story 3.10: Retrieve and format glossary terms for LLM prompt
+ glossary_terms = None
+ if glossary_id and user_id:
+ try:
+ glossary_terms = get_glossary_terms(glossary_id, user_id)
+ logger.info(f"Job {job_id}: Loaded {len(glossary_terms)} glossary terms")
+ except GlossaryNotFoundError as e:
+ tracker.set_error(str(e))
+ logger.error(f"Job {job_id}: Glossary error - {e}")
+ return
+
+ # Story 3.12: Retrieve prompt content if prompt_id provided
+ # Priority: prompt_id > custom_prompt
+ effective_prompt = None
+ if prompt_id and user_id:
+ try:
+ effective_prompt = get_prompt_content(prompt_id, user_id)
+ logger.info(f"Job {job_id}: Loaded prompt content from {prompt_id}")
+ except PromptNotFoundError as e:
+ tracker.set_error(str(e))
+ logger.error(f"Job {job_id}: Prompt error - {e}")
+ return
+ elif custom_prompt:
+ # Use custom_prompt if no prompt_id
+ effective_prompt = custom_prompt
+
+ # Build the full prompt combining effective prompt and glossary
+ full_prompt = build_full_prompt(effective_prompt, glossary_terms)
+
+ translation_provider = None
+ _p = provider.lower()
+
+ if _p in ("openrouter", "llm") and api_key:
+ translation_provider = OpenRouterTranslationProvider(
+ api_key, model, full_prompt
+ )
+ elif _p == "openrouter_premium":
+ premium_key = _cfg(_admin_cfg.openrouter_premium.api_key, "OPENROUTER_API_KEY")
+ premium_model = _cfg(_admin_cfg.openrouter_premium.model, "OPENROUTER_PREMIUM_MODEL", "anthropic/claude-3.5-haiku")
+ if not premium_key:
+ premium_key = api_key # fall back to main openrouter key
+ if premium_key:
+ translation_provider = OpenRouterTranslationProvider(
+ premium_key, premium_model, full_prompt
+ )
+ elif _p == "openai":
+ from services.translation_service import OpenAITranslationProvider
+ openai_key = _cfg(_admin_cfg.openai.api_key, "OPENAI_API_KEY")
+ openai_model = _cfg(_admin_cfg.openai.model, "OPENAI_MODEL", "gpt-4o-mini")
+ if openai_key:
+ translation_provider = OpenAITranslationProvider(
+ api_key=openai_key,
+ model=openai_model,
+ system_prompt=full_prompt,
+ )
+ elif _p == "deepl":
+ deepl_key = _cfg(_admin_cfg.deepl.api_key, "DEEPL_API_KEY")
+ if deepl_key:
+ from services.translation_service import DeepLTranslationProvider
+ translation_provider = DeepLTranslationProvider(deepl_key, full_prompt)
+ elif _p == "zai":
+ from services.translation_service import OpenAITranslationProvider as _OAI
+ zai_key = _cfg(_admin_cfg.zai.api_key, "ZAI_API_KEY")
+ zai_model = _cfg(_admin_cfg.zai.model, "ZAI_MODEL", "grok-2-1212")
+ zai_url = _cfg(_admin_cfg.zai.base_url, "ZAI_BASE_URL", "https://api.x.ai/v1")
+ if zai_key:
+ translation_provider = _OAI(
+ api_key=zai_key,
+ model=zai_model,
+ base_url=zai_url,
+ system_prompt=full_prompt,
+ )
+ elif _p == "ollama":
+ ollama_url = _cfg(_admin_cfg.ollama.base_url, "OLLAMA_BASE_URL", "http://localhost:11434")
+ ollama_model = _cfg(_admin_cfg.ollama.model, "OLLAMA_MODEL", "llama3")
+ translation_provider = OllamaTranslationProvider(
+ ollama_url,
+ ollama_model,
+ ollama_model,
+ full_prompt,
+ )
+
+ tracker.update(20, "Preparing translation")
+
+ def progress_callback(progress_info: dict) -> None:
+ """Callback for translator progress updates with standardized key handling."""
+ current = progress_info.get(
+ "current",
+ progress_info.get(
+ "slide",
+ progress_info.get(
+ "sheet",
+ progress_info.get("paragraph", progress_info.get("element", 1)),
+ ),
+ ),
+ )
+ total = progress_info.get(
+ "total",
+ progress_info.get(
+ "total_slides",
+ progress_info.get(
+ "total_sheets", progress_info.get("total_paragraphs", 1)
+ ),
+ ),
+ )
+
+ item_name = "Translating"
+ if file_extension == ".pptx":
+ item_name = "Translating slide"
+ elif file_extension == ".xlsx":
+ item_name = "Translating sheet"
+ elif file_extension == ".docx":
+ item_name = "Processing paragraph"
+
+ # max_percent=95: the translator reaches current==total when its last
+ # chunk finishes, but the file is not yet written. set_completed()
+ # pushes to 100% once the file is saved.
+ tracker.update_item(current, total, item_name, max_percent=95)
+
+ # Run synchronous translators in a thread pool to avoid blocking the event loop.
+ # Without this, status polling requests from the frontend would time out during
+ # translation, causing the "Connection lost" error and frozen progress bar.
+ # Always call set_provider (even with None) to reset any previously-set
+ # provider on the singleton translator instances between jobs.
+ if file_extension == ".xlsx":
+ excel_translator.set_provider(translation_provider)
+ await asyncio.to_thread(
+ excel_translator.translate_file,
+ input_path,
+ output_path,
+ target_lang,
+ source_lang,
+ progress_callback=progress_callback,
+ )
+ elif file_extension == ".docx":
+ word_translator.set_provider(translation_provider)
+ await asyncio.to_thread(
+ word_translator.translate_file,
+ input_path,
+ output_path,
+ target_lang,
+ source_lang,
+ progress_callback=progress_callback,
+ )
+ elif file_extension == ".pptx":
+ pptx_translator.set_provider(translation_provider)
+ await asyncio.to_thread(
+ pptx_translator.translate_file,
+ input_path,
+ output_path,
+ target_lang,
+ source_lang,
+ progress_callback=progress_callback,
+ )
+ else:
+ raise ValueError(f"Unsupported file type: {file_extension}")
+
+ if user_id:
+ await tier_quota_service.increment_on_success(user_id)
+
+ tracker.set_completed(str(output_path))
+ logger.info(f"Job {job_id}: Completed successfully")
+
+ except Exception as e:
+ tracker.set_error(str(e))
+ logger.error(f"Job {job_id}: Failed - {e}")
+
+ finally:
+ if webhook_url:
+ try:
+ # Generate unique event_id for webhook deduplication
+ event_id = f"evt_{uuid.uuid4().hex[:16]}"
+
+ async with httpx.AsyncClient(timeout=10) as client:
+ response = await client.post(
+ webhook_url,
+ json={
+ "event_id": event_id,
+ "translation_id": job_id,
+ "status": job["status"],
+ "timestamp": datetime.now(timezone.utc).isoformat(),
+ "file_name": job.get("file_name"),
+ "source_lang": job.get("source_lang"),
+ "target_lang": job.get("target_lang"),
+ "error_message": job.get("error_message"),
+ },
+ )
+
+ # Log successful webhook delivery
+ if response.is_success:
+ logger.info(
+ f"Job {job_id}: Webhook notification sent successfully to {webhook_url} "
+ f"(status={response.status_code}, event_id={event_id})"
+ )
+ else:
+ # Log non-2xx response with body for debugging
+ try:
+ response_body = await response.aread()
+ body_preview = response_body[:500].decode('utf-8', errors='replace')
+ except Exception:
+ body_preview = ""
+ logger.warning(
+ f"Job {job_id}: Webhook returned non-success status "
+ f"(status={response.status_code}, url={webhook_url}, event_id={event_id}, "
+ f"response_body={body_preview})"
+ )
+
+ except httpx.TimeoutException:
+ logger.warning(
+ f"Job {job_id}: Webhook notification timed out after 10s (url={webhook_url}, event_id={event_id})"
+ )
+ except httpx.RequestError as e:
+ logger.warning(
+ f"Job {job_id}: Webhook notification failed - {type(e).__name__}: {e} "
+ f"(url={webhook_url}, event_id={event_id})"
+ )
+ except Exception as e:
+ logger.warning(
+ f"Job {job_id}: Unexpected webhook error - {type(e).__name__}: {e} (event_id={event_id})"
+ )
+
+
+@router_v1.get(
+ "/translations/{job_id}",
+ response_model=TranslationStatusResponse,
+ responses={
+ 200: {"description": "Translation status", "model": TranslationStatusResponse},
+ 404: {"description": "Job not found", "model": ErrorResponse},
+ },
+)
+async def get_translation_status(
+ job_id: str,
+ current_user: Optional[Any] = Depends(get_authenticated_user),
+):
+ """
+ Get translation job status with real-time progress.
+
+ Returns current status and progress of a translation job.
+
+ **Status Values:**
+ - `queued`: Job is waiting to be processed
+ - `processing`: Job is actively being translated
+ - `completed`: Translation finished successfully
+ - `failed`: Translation encountered an error
+
+ **Progress Fields:**
+ - `progress_percent`: 0-100 indicating completion percentage
+ - `current_step`: Human-readable description of current operation
+ - `error_message`: Present only when status is "failed"
+
+ **Example Response (Processing):**
+ ```json
+ {
+ "data": {
+ "id": "tr_abc123",
+ "status": "processing",
+ "progress_percent": 45,
+ "current_step": "Translating slide 5/10",
+ "file_name": "presentation.pptx",
+ "source_lang": "en",
+ "target_lang": "fr",
+ "created_at": "2024-01-15T10:30:00Z"
+ },
+ "meta": {}
+ }
+ ```
+ """
+ job = _translation_jobs.get(job_id)
+
+ if not job:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "NOT_FOUND",
+ "message": "Job de traduction non trouve.",
+ "details": {"job_id": job_id},
+ },
+ )
+
+ response_data = {
+ "id": job["id"],
+ "status": job["status"],
+ "progress_percent": job.get("progress_percent", 0),
+ "current_step": job.get("current_step", "Unknown"),
+ "file_name": job.get("file_name"),
+ "source_lang": job.get("source_lang"),
+ "target_lang": job.get("target_lang"),
+ "created_at": job.get("created_at"),
+ }
+
+ estimated_remaining = None
+ if job["status"] == "processing" and job.get("progress_percent", 0) > 0:
+ try:
+ created_at_str = job.get("created_at")
+ if created_at_str:
+ created_at = datetime.fromisoformat(
+ created_at_str.replace("Z", "+00:00")
+ )
+ elapsed_seconds = (
+ datetime.now(timezone.utc) - created_at
+ ).total_seconds()
+ progress_percent = job.get("progress_percent", 0)
+ if progress_percent > 0:
+ total_estimated = elapsed_seconds / (progress_percent / 100)
+ estimated_remaining = max(1, int(total_estimated - elapsed_seconds))
+ except Exception:
+ pass
+
+ if job["status"] == "completed":
+ response_data["completed_at"] = job.get("completed_at")
+ elif job["status"] == "failed":
+ response_data["failed_at"] = job.get("failed_at")
+ response_data["error_message"] = job.get("error_message")
+
+ return {
+ "data": response_data,
+ "meta": {"estimated_remaining_seconds": estimated_remaining},
+ }
+
+
+@router_v1.get("/translate/health")
+async def translate_health():
+ """Health check for translation endpoint."""
+ return {"status": "healthy", "endpoint": "/api/v1/translate"}
+
+
+MIME_TYPES = {
+ ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+ ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+ ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+}
+
+
+def _cleanup_files(input_path: Optional[str], output_path: Optional[str]) -> None:
+ """Delete input and output files after download completes."""
+ try:
+ if output_path:
+ out_path = Path(output_path)
+ if out_path.exists():
+ out_path.unlink()
+ logger.info(f"Deleted output file: {output_path}")
+ except Exception as e:
+ logger.warning(f"Failed to delete output file {output_path}: {e}")
+
+ try:
+ if input_path:
+ in_path = Path(input_path)
+ if in_path.exists():
+ in_path.unlink()
+ logger.info(f"Deleted input file: {input_path}")
+ except Exception as e:
+ logger.warning(f"Failed to delete input file {input_path}: {e}")
+
+
+@router_v1.get(
+ "/download/{job_id}",
+ responses={
+ 200: {
+ "description": "Translated file download",
+ "content": {"application/octet-stream": {}},
+ },
+ 404: {"description": "File not found or not ready", "model": ErrorResponse},
+ },
+)
+async def download_translated_file(
+ job_id: str,
+ current_user: Optional[Any] = Depends(get_authenticated_user),
+):
+ """
+ Download a translated file.
+
+ Returns the translated file as a binary download with proper Content-Type
+ and Content-Disposition headers. The file is automatically deleted after
+ the download completes.
+
+ **Status Requirements:**
+ - Job must exist and have status "completed"
+ - Job must have an output_path field
+
+ **Error Codes:**
+ - `FILE_EXPIRED`: Job not found, expired, or no output file
+ - `NOT_READY`: Job exists but translation is not complete
+
+ **Response Headers:**
+ - `Content-Type`: Appropriate MIME type for the file format
+ - `Content-Disposition`: attachment with filename containing "_translated" suffix
+
+ **Example:**
+ ```
+ GET /api/v1/download/tr_abc123def456
+ → Returns file with Content-Disposition: attachment; filename="report_translated.xlsx"
+ ```
+ """
+ if not JOB_ID_PATTERN.match(job_id):
+ return JSONResponse(
+ status_code=400,
+ content={
+ "error": "INVALID_JOB_ID",
+ "message": "Format d'identifiant de travail invalide.",
+ "details": {"job_id": job_id, "expected_format": "tr_xxxxxxxxxxxx"},
+ },
+ )
+
+ job = _translation_jobs.get(job_id)
+
+ if not job:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "FILE_EXPIRED",
+ "message": "Le fichier traduit n'est plus disponible ou a expire.",
+ "details": {"job_id": job_id, "status": "not_found"},
+ },
+ )
+
+ job_user_id = job.get("user_id")
+ if current_user and job_user_id and str(job_user_id) != str(current_user.id):
+ return JSONResponse(
+ status_code=403,
+ content={
+ "error": "ACCESS_DENIED",
+ "message": "Vous n'avez pas acces a ce fichier.",
+ "details": {"job_id": job_id},
+ },
+ )
+
+ if job.get("status") != "completed":
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "NOT_READY",
+ "message": "La traduction est encore en cours.",
+ "details": {
+ "job_id": job_id,
+ "status": job.get("status"),
+ "progress_percent": job.get("progress_percent", 0),
+ },
+ },
+ )
+
+ output_path_str = job.get("output_path")
+ if not output_path_str:
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "FILE_EXPIRED",
+ "message": "Le fichier traduit n'est plus disponible ou a expire.",
+ "details": {"job_id": job_id, "status": "no_output_path"},
+ },
+ )
+
+ output_path = Path(output_path_str)
+ if not output_path.exists():
+ return JSONResponse(
+ status_code=404,
+ content={
+ "error": "FILE_EXPIRED",
+ "message": "Le fichier traduit n'est plus disponible ou a expire.",
+ "details": {"job_id": job_id, "status": "file_deleted"},
+ },
+ )
+
+ original_filename = job.get("file_name", "document")
+ if original_filename:
+ name_without_ext = Path(original_filename).stem
+ extension = Path(original_filename).suffix.lower()
+ download_filename = f"{name_without_ext}_translated{extension}"
+ else:
+ file_extension = job.get("file_extension", ".xlsx")
+ download_filename = f"document_translated{file_extension}"
+ extension = file_extension
+
+ mime_type = MIME_TYPES.get(extension, "application/octet-stream")
+
+ input_path_str = job.get("input_path")
+
+ logger.info(f"Download requested for job {job_id}: {download_filename}")
+
+ return FileResponse(
+ path=str(output_path),
+ media_type=mime_type,
+ filename=download_filename,
+ background=BackgroundTask(_cleanup_files, input_path_str, output_path_str),
+ )
diff --git a/schemas/__init__.py b/schemas/__init__.py
new file mode 100644
index 0000000..60286ad
--- /dev/null
+++ b/schemas/__init__.py
@@ -0,0 +1,98 @@
+"""
+Pydantic models for API documentation and validation
+Story 3.6: Documentation OpenAPI (Swagger + ReDoc)
+"""
+
+from .translation import (
+ TranslateResponseData,
+ TranslateResponseMeta,
+ TranslateResponse,
+ TranslationStatusData,
+ TranslationStatusMeta,
+ TranslationStatusResponse,
+ LanguageResponse,
+)
+from .auth import (
+ RegisterRequest,
+ LoginRequest,
+ TokenResponse,
+ LogoutResponse,
+ RefreshRequest,
+)
+from .api_keys import (
+ APIKeyCreateRequest,
+ APIKeyResponse,
+ APIKeyListResponse,
+ APIKeyRevokeResponse,
+)
+from .admin import (
+ AdminLoginRequest,
+ AdminLoginResponse,
+ AdminDashboardResponse,
+ AdminUserResponse,
+ AdminUserUpdateRequest,
+ AdminStatsResponse,
+ AdminRevokeApiKeyRequest,
+)
+from .errors import ErrorResponse, ErrorCode
+from .common import (
+ SuccessResponse,
+ HealthCheckResponse,
+ ReadyCheckResponse,
+)
+from .glossary_schemas import (
+ GlossaryTermCreate,
+ GlossaryTermResponse,
+ GlossaryCreate,
+ GlossaryUpdate,
+ GlossaryResponse,
+ GlossaryListItem,
+ GlossaryListResponse,
+ GlossaryDetailResponse,
+)
+
+__all__ = [
+ # Translation
+ "TranslateResponseData",
+ "TranslateResponseMeta",
+ "TranslateResponse",
+ "TranslationStatusData",
+ "TranslationStatusMeta",
+ "TranslationStatusResponse",
+ "LanguageResponse",
+ # Auth
+ "RegisterRequest",
+ "LoginRequest",
+ "TokenResponse",
+ "LogoutResponse",
+ "RefreshRequest",
+ # API Keys
+ "APIKeyCreateRequest",
+ "APIKeyResponse",
+ "APIKeyListResponse",
+ "APIKeyRevokeResponse",
+ # Admin
+ "AdminLoginRequest",
+ "AdminLoginResponse",
+ "AdminDashboardResponse",
+ "AdminUserResponse",
+ "AdminUserUpdateRequest",
+ "AdminStatsResponse",
+ "AdminRevokeApiKeyRequest",
+ # Errors
+ "ErrorResponse",
+ "ErrorCode",
+ # Common
+ "SuccessResponse",
+ "HealthCheckResponse",
+ "ReadyCheckResponse",
+ # Glossaries
+ "GlossaryTermCreate",
+ "GlossaryTermResponse",
+ "GlossaryCreate",
+ "GlossaryUpdate",
+ "GlossaryResponse",
+ "GlossaryListItem",
+ "GlossaryListResponse",
+ "GlossaryDetailResponse",
+]
diff --git a/schemas/admin.py b/schemas/admin.py
new file mode 100644
index 0000000..fcda5c1
--- /dev/null
+++ b/schemas/admin.py
@@ -0,0 +1,262 @@
+"""
+Pydantic models for admin endpoints
+Story 3.6: Documentation OpenAPI (Swagger + ReDoc)
+"""
+
+from pydantic import BaseModel, Field
+from typing import Optional, Literal, Dict, Any, List
+
+
+class AdminLoginRequest(BaseModel):
+ """Request model for admin login"""
+
+ password: str = Field(
+ ...,
+ example="admin_secret_password",
+ description="Mot de passe administrateur"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "password": "admin_secret_password"
+ }
+ }
+
+
+class AdminLoginData(BaseModel):
+ """Admin login response data"""
+
+ access_token: str = Field(
+ ...,
+ example="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+ description="Token d'accès admin (expire dans 24h)"
+ )
+ token_type: str = Field(
+ default="bearer",
+ example="bearer",
+ description="Type de token"
+ )
+ expires_in: int = Field(
+ default=86400,
+ example=86400,
+ description="Durée de validité en secondes"
+ )
+
+
+class AdminLoginResponse(BaseModel):
+ """Response model for admin login"""
+
+ status: str = Field(default="success", description="Statut de la connexion")
+ access_token: str = Field(..., description="Token d'accès admin")
+ token_type: str = Field(default="bearer", description="Type de token")
+ expires_in: int = Field(default=86400, description="Durée de validité en secondes")
+ message: str = Field(default="Login successful", description="Message de confirmation")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "status": "success",
+ "access_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+ "token_type": "bearer",
+ "expires_in": 86400,
+ "message": "Login successful"
+ }
+ }
+
+
+class AdminUserUpdateRequest(BaseModel):
+ """Request model for updating user tier"""
+
+ plan: Literal["free", "starter", "pro", "business", "enterprise"] = Field(
+ ...,
+ example="pro",
+ description="Nouveau plan d'abonnement"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "plan": "pro"
+ }
+ }
+
+
+class AdminUserData(BaseModel):
+ """User data in admin responses"""
+
+ id: str = Field(..., description="Identifiant unique de l'utilisateur")
+ email: str = Field(..., description="Adresse email")
+ name: str = Field(..., description="Nom de l'utilisateur")
+ plan: str = Field(..., description="Plan d'abonnement actuel")
+ subscription_status: str = Field(..., description="Statut de l'abonnement")
+ docs_translated_this_month: int = Field(..., description="Documents traduits ce mois")
+ pages_translated_this_month: int = Field(..., description="Pages traduites ce mois")
+ extra_credits: int = Field(..., description="Crédits supplémentaires")
+ created_at: str = Field(..., description="Date de création du compte")
+ plan_limits: Dict[str, Any] = Field(..., description="Limites du plan actuel")
+
+
+class AdminUserResponse(BaseModel):
+ """Response model for admin user operations"""
+
+ data: AdminUserData
+ meta: dict = Field(default_factory=dict)
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": {
+ "id": "usr_abc123def456",
+ "email": "utilisateur@exemple.com",
+ "name": "Jean Dupont",
+ "plan": "pro",
+ "subscription_status": "active",
+ "docs_translated_this_month": 15,
+ "pages_translated_this_month": 42,
+ "extra_credits": 0,
+ "created_at": "2024-01-01T00:00:00Z",
+ "plan_limits": {
+ "docs_per_month": 100,
+ "max_pages_per_doc": 100
+ }
+ },
+ "meta": {}
+ }
+ }
+
+
+class AdminUsersListResponse(BaseModel):
+ """Response model for listing users"""
+
+ total: int = Field(..., description="Nombre total d'utilisateurs")
+ users: List[AdminUserData] = Field(..., description="Liste des utilisateurs")
+
+
+class AdminDashboardResponse(BaseModel):
+ """Response model for admin dashboard"""
+
+ timestamp: str = Field(..., description="Timestamp de la réponse")
+ status: str = Field(..., description="Statut global du système")
+ system: Dict[str, Any] = Field(..., description="Informations système")
+ providers: Dict[str, Any] = Field(..., description="Statut des providers")
+ cleanup: Dict[str, Any] = Field(..., description="Statut du cleanup")
+ rate_limits: Dict[str, Any] = Field(..., description="Statut des rate limits")
+ config: Dict[str, Any] = Field(..., description="Configuration actuelle")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "timestamp": "2024-01-15T10:30:00Z",
+ "status": "healthy",
+ "system": {
+ "memory": {},
+ "disk": {}
+ },
+ "providers": {
+ "google": {
+ "name": "google",
+ "available": True,
+ "last_check": "2024-01-15T10:29:00Z"
+ }
+ },
+ "cleanup": {
+ "files_cleaned": 12,
+ "tracked_files_count": 5
+ },
+ "rate_limits": {
+ "active_clients": 3
+ },
+ "config": {
+ "max_file_size_mb": 50,
+ "supported_extensions": [".xlsx", ".docx", ".pptx"],
+ "translation_service": "google"
+ }
+ }
+ }
+
+
+class AdminStatsResponse(BaseModel):
+ """Response model for admin statistics"""
+
+ users: Dict[str, Any] = Field(..., description="Statistiques utilisateurs")
+ translations: Dict[str, Any] = Field(..., description="Statistiques de traduction")
+ cache: Dict[str, Any] = Field(..., description="Statistiques du cache")
+ config: Dict[str, Any] = Field(..., description="Configuration actuelle")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "users": {
+ "total": 150,
+ "active_this_month": 45,
+ "by_plan": {
+ "free": 100,
+ "pro": 40,
+ "business": 10
+ }
+ },
+ "translations": {
+ "docs_this_month": 350,
+ "pages_this_month": 1250
+ },
+ "cache": {
+ "hits": 1500,
+ "misses": 500,
+ "size": 42
+ },
+ "config": {
+ "translation_service": "google",
+ "max_file_size_mb": 50,
+ "supported_extensions": [".xlsx", ".docx", ".pptx"]
+ }
+ }
+ }
+
+
+class AdminRevokeApiKeyRequest(BaseModel):
+ """Request model for admin API key revocation"""
+
+ reason: Optional[str] = Field(
+ None,
+ example="Violation des conditions d'utilisation",
+ description="Raison de la révocation (optionnel)"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "reason": "Violation des conditions d'utilisation"
+ }
+ }
+
+
+class AdminRevokeApiKeyData(BaseModel):
+ """Data returned after admin API key revocation"""
+
+ id: str = Field(..., description="Identifiant de la clé révoquée")
+ revoked: bool = Field(..., description="Confirmation de révocation")
+ revoked_at: str = Field(..., description="Date de révocation (ISO 8601)")
+ owner_user_id: str = Field(..., description="ID de l'utilisateur propriétaire")
+ reason: Optional[str] = Field(None, description="Raison de la révocation")
+
+
+class AdminRevokeApiKeyResponse(BaseModel):
+ """Response model for admin API key revocation"""
+
+ data: AdminRevokeApiKeyData
+ meta: dict = Field(default_factory=dict)
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": {
+ "id": "550e8400-e29b-41d4-a716-446655440000",
+ "revoked": True,
+ "revoked_at": "2024-01-15T16:00:00Z",
+ "owner_user_id": "usr_abc123def456",
+ "reason": "Violation des conditions d'utilisation"
+ },
+ "meta": {}
+ }
+ }
\ No newline at end of file
diff --git a/schemas/api_keys.py b/schemas/api_keys.py
new file mode 100644
index 0000000..6ad4f93
--- /dev/null
+++ b/schemas/api_keys.py
@@ -0,0 +1,177 @@
+"""
+Pydantic models for API key endpoints
+Story 3.6: Documentation OpenAPI (Swagger + ReDoc)
+"""
+
+from pydantic import BaseModel, Field
+from typing import Optional
+
+
+class APIKeyCreateRequest(BaseModel):
+ """Request model for creating an API key"""
+
+ name: Optional[str] = Field(
+ default="Default API Key",
+ max_length=100,
+ example="Production API Key",
+ description="Nom descriptif pour la clé API"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "name": "Production API Key"
+ }
+ }
+
+
+class APIKeyData(BaseModel):
+ """API key data in response (full key shown only on creation)"""
+
+ id: str = Field(
+ ...,
+ example="550e8400-e29b-41d4-a716-446655440000",
+ description="Identifiant unique de la clé API"
+ )
+ key: str = Field(
+ ...,
+ example="sk_live_abc123def456ghi789...",
+ description="Clé API complète (affichée UNE SEULE FOIS à la création)"
+ )
+ name: str = Field(
+ ...,
+ example="Production API Key",
+ description="Nom de la clé API"
+ )
+ key_prefix: str = Field(
+ ...,
+ example="sk_live_",
+ description="Préfixe de la clé (pour identification)"
+ )
+ created_at: str = Field(
+ ...,
+ example="2024-01-15T10:30:00Z",
+ description="Date de création (ISO 8601)"
+ )
+
+
+class APIKeyResponse(BaseModel):
+ """Response model for API key creation"""
+
+ data: APIKeyData
+ meta: dict = Field(default_factory=dict)
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": {
+ "id": "550e8400-e29b-41d4-a716-446655440000",
+ "key": "sk_live_abc123def456ghi789jkl012mno345pqr678...",
+ "name": "Production API Key",
+ "key_prefix": "sk_live_",
+ "created_at": "2024-01-15T10:30:00Z"
+ },
+ "meta": {}
+ }
+ }
+
+
+class APIKeyListItem(BaseModel):
+ """API key item in list (without secret)"""
+
+ id: str = Field(..., description="Identifiant unique de la clé API")
+ name: str = Field(..., description="Nom de la clé API")
+ key_prefix: str = Field(..., description="Préfixe de la clé (pour identification)")
+ is_active: bool = Field(..., description="Si la clé est active")
+ last_used_at: Optional[str] = Field(None, description="Dernière utilisation (ISO 8601)")
+ usage_count: int = Field(..., description="Nombre total d'utilisations")
+ created_at: str = Field(..., description="Date de création (ISO 8601)")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "id": "550e8400-e29b-41d4-a716-446655440000",
+ "name": "Production API Key",
+ "key_prefix": "sk_live_",
+ "is_active": True,
+ "last_used_at": "2024-01-15T14:30:00Z",
+ "usage_count": 42,
+ "created_at": "2024-01-15T10:30:00Z"
+ }
+ }
+
+
+class APIKeyListMeta(BaseModel):
+ """Metadata for API key list response"""
+
+ total: int = Field(..., description="Nombre total de clés API")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "total": 2
+ }
+ }
+
+
+class APIKeyListResponse(BaseModel):
+ """Response model for listing API keys"""
+
+ data: list[APIKeyListItem]
+ meta: APIKeyListMeta
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": [
+ {
+ "id": "550e8400-e29b-41d4-a716-446655440000",
+ "name": "Production API Key",
+ "key_prefix": "sk_live_",
+ "is_active": True,
+ "last_used_at": "2024-01-15T14:30:00Z",
+ "usage_count": 42,
+ "created_at": "2024-01-15T10:30:00Z"
+ },
+ {
+ "id": "660e8400-e29b-41d4-a716-446655440001",
+ "name": "Development API Key",
+ "key_prefix": "sk_live_",
+ "is_active": True,
+ "last_used_at": None,
+ "usage_count": 0,
+ "created_at": "2024-01-16T09:00:00Z"
+ }
+ ],
+ "meta": {
+ "total": 2
+ }
+ }
+ }
+
+
+class APIKeyRevokeData(BaseModel):
+ """Data returned after API key revocation"""
+
+ id: str = Field(..., description="Identifiant de la clé révoquée")
+ revoked: bool = Field(..., description="Confirmation de révocation")
+ revoked_at: str = Field(..., description="Date de révocation (ISO 8601)")
+
+
+class APIKeyRevokeResponse(BaseModel):
+ """Response model for API key revocation"""
+
+ data: APIKeyRevokeData
+ meta: dict = Field(default_factory=dict)
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": {
+ "id": "550e8400-e29b-41d4-a716-446655440000",
+ "revoked": True,
+ "revoked_at": "2024-01-15T16:00:00Z"
+ },
+ "meta": {}
+ }
+ }
\ No newline at end of file
diff --git a/schemas/auth.py b/schemas/auth.py
new file mode 100644
index 0000000..66632b7
--- /dev/null
+++ b/schemas/auth.py
@@ -0,0 +1,163 @@
+"""
+Pydantic models for authentication endpoints
+Story 3.6: Documentation OpenAPI (Swagger + ReDoc)
+"""
+
+from pydantic import BaseModel, Field, EmailStr
+from typing import Optional
+
+
+class RegisterRequest(BaseModel):
+ """Request model for user registration"""
+
+ email: EmailStr = Field(
+ ...,
+ example="utilisateur@exemple.com",
+ description="Adresse email de l'utilisateur"
+ )
+ password: str = Field(
+ ...,
+ example="MotDePasse123!",
+ description="Mot de passe (min 8 caractères)",
+ min_length=8
+ )
+ name: Optional[str] = Field(
+ None,
+ example="Jean Dupont",
+ description="Nom complet de l'utilisateur (optionnel)"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "email": "utilisateur@exemple.com",
+ "password": "MotDePasse123!",
+ "name": "Jean Dupont"
+ }
+ }
+
+
+class LoginRequest(BaseModel):
+ """Request model for user login"""
+
+ email: EmailStr = Field(
+ ...,
+ example="utilisateur@exemple.com",
+ description="Adresse email de l'utilisateur"
+ )
+ password: str = Field(
+ ...,
+ example="MotDePasse123!",
+ description="Mot de passe"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "email": "utilisateur@exemple.com",
+ "password": "MotDePasse123!"
+ }
+ }
+
+
+class TokenData(BaseModel):
+ """Token data in response"""
+
+ access_token: str = Field(
+ ...,
+ example="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+ description="Token d'accès JWT (expire dans 15 minutes)"
+ )
+ refresh_token: str = Field(
+ ...,
+ example="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+ description="Token de rafraîchissement JWT (expire dans 7 jours)"
+ )
+ token_type: str = Field(
+ default="bearer",
+ example="bearer",
+ description="Type de token"
+ )
+
+
+class TokenResponse(BaseModel):
+ """Response model for authentication tokens"""
+
+ data: TokenData
+ meta: dict = Field(default_factory=dict)
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": {
+ "access_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+ "refresh_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+ "token_type": "bearer"
+ },
+ "meta": {}
+ }
+ }
+
+
+class LogoutResponse(BaseModel):
+ """Response model for logout"""
+
+ data: dict = Field(
+ default_factory=lambda: {"message": "Déconnexion réussie"},
+ description="Message de confirmation"
+ )
+ meta: dict = Field(default_factory=dict)
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": {
+ "message": "Déconnexion réussie"
+ },
+ "meta": {}
+ }
+ }
+
+
+class RefreshRequest(BaseModel):
+ """Request model for token refresh"""
+
+ refresh_token: str = Field(
+ ...,
+ example="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+ description="Token de rafraîchissement"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "refresh_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..."
+ }
+ }
+
+
+class UserData(BaseModel):
+ """User data in responses"""
+
+ id: str = Field(..., description="Identifiant unique de l'utilisateur")
+ email: str = Field(..., description="Adresse email")
+ tier: str = Field(..., description="Niveau d'abonnement (free, pro, etc.)")
+
+
+class RegisterResponse(BaseModel):
+ """Response model for registration"""
+
+ data: UserData
+ meta: dict = Field(default_factory=dict)
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": {
+ "id": "usr_abc123def456",
+ "email": "utilisateur@exemple.com",
+ "tier": "free"
+ },
+ "meta": {}
+ }
+ }
\ No newline at end of file
diff --git a/schemas/common.py b/schemas/common.py
new file mode 100644
index 0000000..6885c02
--- /dev/null
+++ b/schemas/common.py
@@ -0,0 +1,179 @@
+"""
+Common Pydantic models for API documentation
+Story 3.6: Documentation OpenAPI (Swagger + ReDoc)
+"""
+
+from pydantic import BaseModel, Field
+from typing import Optional, Dict, Any, List
+
+
+class SuccessResponse(BaseModel):
+ """Generic success response"""
+
+ data: Dict[str, Any] = Field(..., description="Données de réponse")
+ meta: Dict[str, Any] = Field(default_factory=dict, description="Métadonnées")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": {
+ "message": "Opération réussie"
+ },
+ "meta": {}
+ }
+ }
+
+
+class HealthCheckResponse(BaseModel):
+ """Response model for health check endpoint"""
+
+ status: str = Field(..., description="Statut de santé (healthy/unhealthy)")
+ translation_service: str = Field(..., description="Service de traduction configuré")
+ database: Dict[str, Any] = Field(..., description="Statut de la base de données")
+ redis: Dict[str, Any] = Field(..., description="Statut de Redis")
+ memory: Dict[str, Any] = Field(..., description="Informations mémoire")
+ disk: Dict[str, Any] = Field(..., description="Informations disque")
+ cleanup_service: Dict[str, Any] = Field(..., description="Statut du service de cleanup")
+ rate_limits: Dict[str, Any] = Field(..., description="Configuration des rate limits")
+ translation_cache: Dict[str, Any] = Field(..., description="Statut du cache de traduction")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "status": "healthy",
+ "translation_service": "google",
+ "database": {
+ "status": "healthy"
+ },
+ "redis": {
+ "status": "not_configured"
+ },
+ "memory": {},
+ "disk": {},
+ "cleanup_service": {
+ "running": True
+ },
+ "rate_limits": {
+ "requests_per_minute": 30,
+ "translations_per_minute": 10
+ },
+ "translation_cache": {
+ "hits": 1500,
+ "misses": 500,
+ "size": 42
+ }
+ }
+ }
+
+
+class ReadyCheckResponse(BaseModel):
+ """Response model for readiness check endpoint"""
+
+ ready: bool = Field(..., description="Si le service est prêt à recevoir du trafic")
+ issues: Optional[List[str]] = Field(
+ None,
+ description="Liste des problèmes si non prêt"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "examples": [
+ {
+ "summary": "Service prêt",
+ "value": {
+ "ready": True
+ }
+ },
+ {
+ "summary": "Service non prêt",
+ "value": {
+ "ready": False,
+ "issues": ["database_unavailable", "redis_unavailable"]
+ }
+ }
+ ]
+ }
+
+
+class RootResponse(BaseModel):
+ """Response model for root endpoint"""
+
+ name: str = Field(..., description="Nom de l'API")
+ version: str = Field(..., description="Version de l'API")
+ status: str = Field(..., description="Statut opérationnel")
+ docs: str = Field(..., description="URL de la documentation Swagger")
+ redoc: str = Field(..., description="URL de la documentation ReDoc")
+ api_base: str = Field(..., description="Base URL des endpoints API")
+ supported_formats: List[str] = Field(..., description="Formats de fichier supportés")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "name": "Office Translator API",
+ "version": "1.0.0",
+ "status": "operational",
+ "docs": "/docs",
+ "redoc": "/redoc",
+ "api_base": "/api/v1",
+ "supported_formats": [".xlsx", ".docx", ".pptx"]
+ }
+ }
+
+
+class RateLimitStatusResponse(BaseModel):
+ """Response model for rate limit status"""
+
+ client_ip: str = Field(..., description="Adresse IP du client")
+ limits: Dict[str, int] = Field(..., description="Limites configurées")
+ current_usage: Dict[str, Any] = Field(..., description="Utilisation actuelle")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "client_ip": "192.168.1.1",
+ "limits": {
+ "requests_per_minute": 30,
+ "requests_per_hour": 200,
+ "translations_per_minute": 10,
+ "translations_per_hour": 50
+ },
+ "current_usage": {
+ "requests_this_minute": 5,
+ "requests_this_hour": 42,
+ "translations_this_minute": 1,
+ "translations_this_hour": 8
+ }
+ }
+ }
+
+
+class MetricsResponse(BaseModel):
+ """Response model for metrics endpoint"""
+
+ system: Dict[str, Any] = Field(..., description="Métriques système")
+ cleanup: Dict[str, Any] = Field(..., description="Métriques de cleanup")
+ rate_limits: Dict[str, Any] = Field(..., description="Métriques de rate limiting")
+ config: Dict[str, Any] = Field(..., description="Configuration actuelle")
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "system": {
+ "memory": {},
+ "disk": {},
+ "status": "healthy"
+ },
+ "cleanup": {
+ "files_cleaned": 12,
+ "last_cleanup": "2024-01-15T10:00:00Z"
+ },
+ "rate_limits": {
+ "active_clients": 3
+ },
+ "config": {
+ "max_file_size_mb": 50,
+ "supported_extensions": [".xlsx", ".docx", ".pptx"],
+ "translation_service": "google"
+ }
+ }
+ }
\ No newline at end of file
diff --git a/schemas/errors.py b/schemas/errors.py
new file mode 100644
index 0000000..c902766
--- /dev/null
+++ b/schemas/errors.py
@@ -0,0 +1,279 @@
+"""
+Error response models for API documentation
+Story 3.6: Documentation OpenAPI (Swagger + ReDoc)
+"""
+
+from enum import Enum
+from pydantic import BaseModel, Field
+from typing import Optional, Dict, Any
+
+
+class ErrorCode(str, Enum):
+ """All error codes used in the API"""
+
+ # Client errors (4xx)
+ INVALID_FORMAT = "INVALID_FORMAT"
+ CORRUPTED_FILE = "CORRUPTED_FILE"
+ FILE_TOO_LARGE = "FILE_TOO_LARGE"
+ URL_DOWNLOAD_FAILED = "URL_DOWNLOAD_FAILED"
+ URL_UNREACHABLE = "URL_UNREACHABLE"
+ QUOTA_EXCEEDED = "QUOTA_EXCEEDED"
+ UNAUTHORIZED = "UNAUTHORIZED"
+ FORBIDDEN = "FORBIDDEN"
+ INVALID_CREDENTIALS = "INVALID_CREDENTIALS"
+ USER_NOT_FOUND = "USER_NOT_FOUND"
+ EMAIL_EXISTS = "EMAIL_EXISTS"
+ INVALID_EMAIL = "INVALID_EMAIL"
+ TOKEN_EXPIRED = "TOKEN_EXPIRED"
+ TOKEN_MISSING = "TOKEN_MISSING"
+ TOKEN_INVALID = "TOKEN_INVALID"
+ MISSING_API_KEY = "MISSING_API_KEY"
+ INVALID_API_KEY = "INVALID_API_KEY"
+ API_KEY_REVOKED = "API_KEY_REVOKED"
+ API_KEY_NOT_FOUND = "API_KEY_NOT_FOUND"
+ API_KEY_LIMIT_REACHED = "API_KEY_LIMIT_REACHED"
+ PRO_FEATURE_REQUIRED = "PRO_FEATURE_REQUIRED"
+ GLOSSARY_NOT_FOUND = "GLOSSARY_NOT_FOUND"
+ PROMPT_NOT_FOUND = "PROMPT_NOT_FOUND"
+ INVALID_WEBHOOK_URL = "INVALID_WEBHOOK_URL"
+ FILE_EXPIRED = "FILE_EXPIRED"
+ NOT_READY = "NOT_READY"
+ NOT_FOUND = "NOT_FOUND"
+ INVALID_REQUEST = "INVALID_REQUEST"
+ INVALID_JOB_ID = "INVALID_JOB_ID"
+ ACCESS_DENIED = "ACCESS_DENIED"
+
+ # Provider errors (5xx but not 500)
+ PROVIDER_UNAVAILABLE = "PROVIDER_UNAVAILABLE"
+ PROVIDER_RATE_LIMITED = "PROVIDER_RATE_LIMITED"
+ ALL_PROVIDERS_FAILED = "ALL_PROVIDERS_FAILED"
+ WEBHOOK_FAILED = "WEBHOOK_FAILED"
+
+ # System errors (5xx)
+ INTERNAL_ERROR = "INTERNAL_ERROR"
+ AUTH_HASHING_UNAVAILABLE = "AUTH_HASHING_UNAVAILABLE"
+
+
+class ErrorResponse(BaseModel):
+ """Standard error response format"""
+
+ error: ErrorCode = Field(
+ ...,
+ description="Code d'erreur standardisé",
+ example="INVALID_FORMAT"
+ )
+ message: str = Field(
+ ...,
+ description="Message d'erreur lisible en français",
+ example="Format de fichier non supporté. Formats acceptés: .xlsx, .docx, .pptx"
+ )
+ details: Optional[Dict[str, Any]] = Field(
+ None,
+ description="Détails supplémentaires sur l'erreur"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "error": "INVALID_FORMAT",
+ "message": "Format PDF non supporté. Formats acceptés: .xlsx, .docx, .pptx",
+ "details": {
+ "accepted_formats": [".xlsx", ".docx", ".pptx"],
+ "detected_format": ".pdf"
+ }
+ }
+ }
+
+
+# Pre-defined error examples for OpenAPI documentation
+ERROR_EXAMPLES = {
+ "INVALID_FORMAT": {
+ "summary": "Format de fichier non supporté",
+ "value": {
+ "error": "INVALID_FORMAT",
+ "message": "Format PDF non supporté. Formats acceptés: .xlsx, .docx, .pptx",
+ "details": {
+ "accepted_formats": [".xlsx", ".docx", ".pptx"],
+ "detected_format": ".pdf"
+ }
+ }
+ },
+ "CORRUPTED_FILE": {
+ "summary": "Fichier corrompu",
+ "value": {
+ "error": "CORRUPTED_FILE",
+ "message": "Le fichier n'est pas un document Office valide ou est corrompu.",
+ "details": {
+ "reason": "Invalid magic bytes"
+ }
+ }
+ },
+ "FILE_TOO_LARGE": {
+ "summary": "Fichier trop volumineux",
+ "value": {
+ "error": "FILE_TOO_LARGE",
+ "message": "Le fichier dépasse la limite de 50 MB.",
+ "details": {
+ "max_size_mb": 50,
+ "actual_size_mb": 65.3
+ }
+ }
+ },
+ "QUOTA_EXCEEDED": {
+ "summary": "Limite quotidienne atteinte",
+ "value": {
+ "error": "QUOTA_EXCEEDED",
+ "message": "Limite quotidienne de 5 traductions atteinte. Réessayez après minuit UTC.",
+ "details": {
+ "current_usage": 5,
+ "limit": 5,
+ "tier": "free",
+ "reset_at": "2024-01-16T00:00:00Z"
+ }
+ }
+ },
+ "UNAUTHORIZED": {
+ "summary": "Authentification requise",
+ "value": {
+ "error": "UNAUTHORIZED",
+ "message": "Authentification requise.",
+ "details": None
+ }
+ },
+ "FORBIDDEN": {
+ "summary": "Accès interdit",
+ "value": {
+ "error": "FORBIDDEN",
+ "message": "Vous n'avez pas accès à cette ressource.",
+ "details": None
+ }
+ },
+ "INVALID_CREDENTIALS": {
+ "summary": "Identifiants invalides",
+ "value": {
+ "error": "INVALID_CREDENTIALS",
+ "message": "Email ou mot de passe incorrect.",
+ "details": None
+ }
+ },
+ "EMAIL_EXISTS": {
+ "summary": "Email déjà utilisé",
+ "value": {
+ "error": "EMAIL_EXISTS",
+ "message": "Un compte existe déjà avec cette adresse email.",
+ "details": None
+ }
+ },
+ "TOKEN_EXPIRED": {
+ "summary": "Token expiré",
+ "value": {
+ "error": "TOKEN_EXPIRED",
+ "message": "Token invalide ou expiré.",
+ "details": None
+ }
+ },
+ "PRO_FEATURE_REQUIRED": {
+ "summary": "Fonctionnalité Pro requise",
+ "value": {
+ "error": "PRO_FEATURE_REQUIRED",
+ "message": "Cette fonctionnalité nécessite un abonnement Pro.",
+ "details": {
+ "feature": "llm_translation",
+ "current_tier": "free",
+ "required_tier": "pro"
+ }
+ }
+ },
+ "API_KEY_NOT_FOUND": {
+ "summary": "Clé API non trouvée",
+ "value": {
+ "error": "API_KEY_NOT_FOUND",
+ "message": "Clé API non trouvée, n'appartient pas à l'utilisateur ou déjà révoquée.",
+ "details": None
+ }
+ },
+ "FILE_EXPIRED": {
+ "summary": "Fichier expiré",
+ "value": {
+ "error": "FILE_EXPIRED",
+ "message": "Le fichier traduit n'est plus disponible ou a expiré.",
+ "details": {
+ "job_id": "tr_abc123",
+ "status": "not_found"
+ }
+ }
+ },
+ "NOT_READY": {
+ "summary": "Traduction en cours",
+ "value": {
+ "error": "NOT_READY",
+ "message": "La traduction est encore en cours.",
+ "details": {
+ "job_id": "tr_abc123",
+ "status": "processing",
+ "progress_percent": 45
+ }
+ }
+ },
+ "NOT_FOUND": {
+ "summary": "Ressource non trouvée",
+ "value": {
+ "error": "NOT_FOUND",
+ "message": "Ressource non trouvée.",
+ "details": None
+ }
+ },
+ "INTERNAL_ERROR": {
+ "summary": "Erreur interne",
+ "value": {
+ "error": "INTERNAL_ERROR",
+ "message": "Une erreur interne est survenue. Veuillez réessayer.",
+ "details": None
+ }
+ },
+ "INVALID_WEBHOOK_URL": {
+ "summary": "URL webhook invalide",
+ "value": {
+ "error": "INVALID_WEBHOOK_URL",
+ "message": "L'URL du webhook doit être une URL HTTP/HTTPS valide.",
+ "details": {
+ "field": "webhook_url",
+ "allowed_schemes": ["http", "https"],
+ "hint": "L'URL doit commencer par http:// ou https://"
+ }
+ }
+ },
+ "WEBHOOK_LOCALHOST_BLOCKED": {
+ "summary": "Localhost non autorisé",
+ "value": {
+ "error": "INVALID_WEBHOOK_URL",
+ "message": "Les URLs localhost ne sont pas autorisées.",
+ "details": {
+ "field": "webhook_url",
+ "reason": "localhost_blocked"
+ }
+ }
+ },
+ "WEBHOOK_PRIVATE_IP_BLOCKED": {
+ "summary": "IP privée non autorisée",
+ "value": {
+ "error": "INVALID_WEBHOOK_URL",
+ "message": "Les adresses IP privées ne sont pas autorisées.",
+ "details": {
+ "field": "webhook_url",
+ "reason": "private_ip_blocked"
+ }
+ }
+ },
+ "WEBHOOK_CREDENTIALS_IN_URL": {
+ "summary": "Credentials dans l'URL",
+ "value": {
+ "error": "INVALID_WEBHOOK_URL",
+ "message": "L'URL ne doit pas contenir d'identifiants (credentials).",
+ "details": {
+ "field": "webhook_url",
+ "reason": "credentials_in_url"
+ }
+ }
+ }
+}
diff --git a/schemas/glossary_schemas.py b/schemas/glossary_schemas.py
new file mode 100644
index 0000000..eeb579a
--- /dev/null
+++ b/schemas/glossary_schemas.py
@@ -0,0 +1,100 @@
+"""
+Pydantic schemas for glossary endpoints.
+Story 3.9: Glossaires - Endpoint CRUD
+"""
+
+from datetime import datetime
+from uuid import UUID
+from typing import Optional
+
+from pydantic import BaseModel, Field, field_validator
+
+
+class GlossaryTermCreate(BaseModel):
+ """Schema for creating a single term."""
+
+ source: str = Field(..., min_length=1, max_length=500, description="Terme source")
+ target: str = Field(
+ ..., min_length=1, max_length=500, description="Traduction cible"
+ )
+
+ @field_validator("source", "target")
+ @classmethod
+ def strip_whitespace(cls, v: str) -> str:
+ return v.strip()
+
+
+class GlossaryTermResponse(BaseModel):
+ """Schema for term in response."""
+
+ id: str
+ source: str
+ target: str
+ created_at: Optional[datetime] = None
+
+ model_config = {"from_attributes": True}
+
+
+class GlossaryCreate(BaseModel):
+ """Schema for creating a glossary."""
+
+ name: str = Field(..., min_length=1, max_length=255, description="Nom du glossaire")
+ terms: list[GlossaryTermCreate] = Field(
+ default_factory=list, description="Liste des termes"
+ )
+
+ @field_validator("name")
+ @classmethod
+ def strip_name(cls, v: str) -> str:
+ return v.strip()
+
+
+class GlossaryUpdate(BaseModel):
+ """Schema for updating a glossary (all fields optional)."""
+
+ name: Optional[str] = Field(None, min_length=1, max_length=255)
+ terms: Optional[list[GlossaryTermCreate]] = Field(None)
+
+ @field_validator("name")
+ @classmethod
+ def strip_name(cls, v: Optional[str]) -> Optional[str]:
+ return v.strip() if v else None
+
+
+class GlossaryResponse(BaseModel):
+ """Schema for glossary in response (with full terms)."""
+
+ id: str
+ name: str
+ terms: list[GlossaryTermResponse] = []
+ created_at: Optional[datetime] = None
+ updated_at: Optional[datetime] = None
+
+ model_config = {"from_attributes": True}
+
+
+class GlossaryListItem(BaseModel):
+ """Schema for glossary in list (without full terms)."""
+
+ id: str
+ name: str
+ terms_count: int = Field(
+ default=0, description="Nombre de termes dans le glossaire"
+ )
+ created_at: Optional[datetime] = None
+
+ model_config = {"from_attributes": True}
+
+
+class GlossaryListResponse(BaseModel):
+ """Schema for glossaries list response."""
+
+ data: list[GlossaryListItem] = []
+ meta: dict = Field(default_factory=dict)
+
+
+class GlossaryDetailResponse(BaseModel):
+ """Schema for single glossary response."""
+
+ data: GlossaryResponse
+ meta: dict = Field(default_factory=dict)
diff --git a/schemas/prompt_schemas.py b/schemas/prompt_schemas.py
new file mode 100644
index 0000000..6505da0
--- /dev/null
+++ b/schemas/prompt_schemas.py
@@ -0,0 +1,79 @@
+"""
+Pydantic schemas for custom prompt endpoints.
+Story 3.11: Custom Prompts - Endpoint CRUD
+"""
+
+from datetime import datetime
+from typing import Optional
+
+from pydantic import BaseModel, Field, field_validator
+
+
+class PromptCreate(BaseModel):
+ """Schema for creating a prompt."""
+
+ name: str = Field(..., min_length=1, max_length=255, description="Nom du prompt")
+ content: str = Field(
+ ..., min_length=1, max_length=10000, description="Contenu du prompt"
+ )
+
+ @field_validator("name", "content")
+ @classmethod
+ def strip_whitespace(cls, v: str) -> str:
+ return v.strip()
+
+
+class PromptUpdate(BaseModel):
+ """Schema for updating a prompt (at least one field required)."""
+
+ name: Optional[str] = Field(None, min_length=1, max_length=255)
+ content: Optional[str] = Field(None, min_length=1, max_length=10000)
+
+ @field_validator("name", "content")
+ @classmethod
+ def strip_whitespace(cls, v: Optional[str]) -> Optional[str]:
+ return v.strip() if v else None
+
+ def has_updates(self) -> bool:
+ """Check if at least one field is provided for update."""
+ return self.name is not None or self.content is not None
+
+
+class PromptResponse(BaseModel):
+ """Schema for prompt in response."""
+
+ id: str
+ name: str
+ content: str
+ created_at: Optional[datetime] = None
+ updated_at: Optional[datetime] = None
+
+ model_config = {"from_attributes": True}
+
+
+class PromptListItem(BaseModel):
+ """Schema for prompt in list (lighter version)."""
+
+ id: str
+ name: str
+ content_preview: str = Field(
+ ..., description="First 100 chars of content for list view"
+ )
+ created_at: Optional[datetime] = None
+ updated_at: Optional[datetime] = None
+
+ model_config = {"from_attributes": True}
+
+
+class PromptListResponse(BaseModel):
+ """Schema for prompts list response."""
+
+ data: list[PromptListItem] = []
+ meta: dict = Field(default_factory=dict)
+
+
+class PromptDetailResponse(BaseModel):
+ """Schema for single prompt response."""
+
+ data: PromptResponse
+ meta: dict = Field(default_factory=dict)
diff --git a/schemas/translation.py b/schemas/translation.py
new file mode 100644
index 0000000..f1094bf
--- /dev/null
+++ b/schemas/translation.py
@@ -0,0 +1,236 @@
+"""
+Pydantic models for translation endpoints
+Story 3.6: Documentation OpenAPI (Swagger + ReDoc)
+"""
+
+from pydantic import BaseModel, Field
+from typing import Optional, Literal, List, Dict, Any
+from datetime import datetime
+
+
+class TranslateResponseData(BaseModel):
+ """Response data for translation request"""
+
+ id: str = Field(
+ ...,
+ example="tr_abc123def456",
+ description="Identifiant unique du job de traduction"
+ )
+ status: Literal["processing"] = Field(
+ default="processing",
+ description="Statut du job (toujours 'processing' à la création)"
+ )
+ file_name: Optional[str] = Field(
+ None,
+ example="rapport_financier.xlsx",
+ description="Nom du fichier original"
+ )
+ source_lang: Optional[str] = Field(
+ None,
+ example="en",
+ description="Code langue source (ISO 639-1)"
+ )
+ target_lang: str = Field(
+ ...,
+ example="fr",
+ description="Code langue cible (ISO 639-1)"
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "id": "tr_abc123def456",
+ "status": "processing",
+ "file_name": "rapport_financier.xlsx",
+ "source_lang": "en",
+ "target_lang": "fr"
+ }
+ }
+
+
+class TranslateResponseMeta(BaseModel):
+ """Metadata for translation response"""
+
+ rate_limit_remaining: int = Field(
+ ...,
+ description="Nombre de traductions restantes aujourd'hui",
+ example=4
+ )
+ estimated_time_seconds: Optional[int] = Field(
+ None,
+ description="Temps estimé pour la traduction en secondes",
+ example=15
+ )
+
+
+class TranslateResponse(BaseModel):
+ """Full response for translation request"""
+
+ data: TranslateResponseData
+ meta: TranslateResponseMeta
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": {
+ "id": "tr_abc123def456",
+ "status": "processing",
+ "file_name": "rapport_financier.xlsx",
+ "source_lang": "en",
+ "target_lang": "fr"
+ },
+ "meta": {
+ "rate_limit_remaining": 4,
+ "estimated_time_seconds": 15
+ }
+ }
+ }
+
+
+class TranslationStatusData(BaseModel):
+ """Response data for translation status endpoint"""
+
+ id: str = Field(
+ ...,
+ description="Identifiant unique du job de traduction",
+ example="tr_abc123def456"
+ )
+ status: Literal["queued", "processing", "completed", "failed"] = Field(
+ ...,
+ description="Statut actuel du job"
+ )
+ progress_percent: int = Field(
+ default=0,
+ ge=0,
+ le=100,
+ description="Pourcentage de progression (0-100)",
+ example=65
+ )
+ current_step: str = Field(
+ default="Initializing",
+ description="Description de l'opération en cours",
+ example="Traduction de la diapositive 3/5"
+ )
+ file_name: Optional[str] = Field(
+ None,
+ description="Nom du fichier original",
+ example="presentation.pptx"
+ )
+ source_lang: Optional[str] = Field(
+ None,
+ description="Code langue source",
+ example="en"
+ )
+ target_lang: Optional[str] = Field(
+ None,
+ description="Code langue cible",
+ example="fr"
+ )
+ created_at: Optional[str] = Field(
+ None,
+ description="Date de création (ISO 8601)",
+ example="2024-01-15T10:30:00Z"
+ )
+ completed_at: Optional[str] = Field(
+ None,
+ description="Date de complétion (ISO 8601)",
+ example="2024-01-15T10:35:00Z"
+ )
+ failed_at: Optional[str] = Field(
+ None,
+ description="Date d'échec (ISO 8601)",
+ example=None
+ )
+ error_message: Optional[str] = Field(
+ None,
+ description="Message d'erreur si status='failed'",
+ example=None
+ )
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "id": "tr_abc123def456",
+ "status": "processing",
+ "progress_percent": 65,
+ "current_step": "Traduction de la diapositive 3/5",
+ "file_name": "presentation.pptx",
+ "source_lang": "en",
+ "target_lang": "fr",
+ "created_at": "2024-01-15T10:30:00Z",
+ "completed_at": None,
+ "failed_at": None,
+ "error_message": None
+ }
+ }
+
+
+class TranslationStatusMeta(BaseModel):
+ """Metadata for translation status response"""
+
+ estimated_remaining_seconds: Optional[int] = Field(
+ default=None,
+ description="Temps restant estimé en secondes",
+ example=30
+ )
+
+
+class TranslationStatusResponse(BaseModel):
+ """Full response for translation status endpoint"""
+
+ data: TranslationStatusData
+ meta: Optional[TranslationStatusMeta] = None
+
+ class Config:
+ json_schema_extra = {
+ "example": {
+ "data": {
+ "id": "tr_abc123def456",
+ "status": "processing",
+ "progress_percent": 65,
+ "current_step": "Traduction de la diapositive 3/5",
+ "file_name": "presentation.pptx",
+ "source_lang": "en",
+ "target_lang": "fr",
+ "created_at": "2024-01-15T10:30:00Z"
+ },
+ "meta": {
+ "estimated_remaining_seconds": 30
+ }
+ }
+ }
+
+
+class LanguageItem(BaseModel):
+ """Single language entry"""
+
+ code: str = Field(
+ ...,
+ description="Code langue ISO 639-1",
+ example="fr"
+ )
+ name: str = Field(
+ ...,
+ description="Nom de la langue en français",
+ example="Français"
+ )
+
+
+class LanguageResponse(BaseModel):
+ """Response model for supported languages"""
+
+ supported_languages: Dict[str, str] = Field(
+ ...,
+ description="Dictionnaire des langues supportées (code -> nom)",
+ example={
+ "fr": "French",
+ "de": "German",
+ "es": "Spanish",
+ "it": "Italian"
+ }
+ )
+ note: Optional[str] = Field(
+ None,
+ description="Note sur la disponibilité des langues",
+ example="Les langues supportées peuvent varier selon le service de traduction configuré"
+ )
\ No newline at end of file
diff --git a/services/auth_service.py b/services/auth_service.py
index 26c5fa0..69a10ae 100644
--- a/services/auth_service.py
+++ b/services/auth_service.py
@@ -5,10 +5,13 @@ This service provides user authentication with automatic backend selection:
- If DATABASE_URL is configured: Uses PostgreSQL database
- Otherwise: Falls back to JSON file storage (development mode)
"""
+
import os
import secrets
import hashlib
-from datetime import datetime, timedelta
+import uuid
+import time
+from datetime import datetime, timedelta, timezone
from typing import Optional, Dict, Any
import json
from pathlib import Path
@@ -19,6 +22,7 @@ logger = logging.getLogger(__name__)
# Try to import optional dependencies
try:
import jwt
+
JWT_AVAILABLE = True
except ImportError:
JWT_AVAILABLE = False
@@ -26,6 +30,7 @@ except ImportError:
try:
from passlib.context import CryptContext
+
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
PASSLIB_AVAILABLE = True
except ImportError:
@@ -41,6 +46,7 @@ if USE_DATABASE:
from database.repositories import UserRepository
from database.connection import get_sync_session, init_db as _init_db
from database import models as db_models
+
DATABASE_AVAILABLE = True
logger.info("Database backend enabled for authentication")
except ImportError as e:
@@ -49,21 +55,89 @@ if USE_DATABASE:
logger.warning(f"Database modules not available: {e}. Using JSON storage.")
else:
DATABASE_AVAILABLE = False
- logger.info("Using JSON file storage for authentication (DATABASE_URL not configured)")
+ logger.info(
+ "Using JSON file storage for authentication (DATABASE_URL not configured)"
+ )
from models.subscription import User, UserCreate, PlanType, SubscriptionStatus, PLANS
# Configuration
-SECRET_KEY = os.getenv("JWT_SECRET", os.getenv("JWT_SECRET_KEY", secrets.token_urlsafe(32)))
+_jwt_secret = os.getenv("JWT_SECRET", os.getenv("JWT_SECRET_KEY"))
+if not _jwt_secret:
+ _jwt_secret = secrets.token_urlsafe(32)
+ logger.critical(
+ "SECURITY: JWT_SECRET_KEY is not configured! Using an ephemeral random key. "
+ "ALL JWT TOKENS WILL BE INVALIDATED ON EVERY RESTART. "
+ "Set JWT_SECRET_KEY in your .env file immediately."
+ )
+SECRET_KEY = _jwt_secret
+
ALGORITHM = "HS256"
-ACCESS_TOKEN_EXPIRE_HOURS = 24
-REFRESH_TOKEN_EXPIRE_DAYS = 30
+ACCESS_TOKEN_EXPIRE_MINUTES = 15
+REFRESH_TOKEN_EXPIRE_DAYS = 7
# Simple file-based storage (used when database is not configured)
USERS_FILE = Path("data/users.json")
USERS_FILE.parent.mkdir(exist_ok=True)
+# Token blocklist: jti → expiry timestamp (Unix).
+# Uses Redis when available (persistent across restarts), falls back to in-memory.
+_revoked_jtis: dict[str, float] = {}
+_redis_blocklist_client = None
+
+
+def _get_blocklist_redis():
+ """Return Redis client for token blocklist, or None if unavailable."""
+ global _redis_blocklist_client
+ if _redis_blocklist_client is not None:
+ return _redis_blocklist_client if _redis_blocklist_client is not False else None
+ redis_url = os.getenv("REDIS_URL", "")
+ if not redis_url:
+ _redis_blocklist_client = False
+ return None
+ try:
+ import redis as redis_lib
+ client = redis_lib.from_url(redis_url, decode_responses=True)
+ client.ping()
+ _redis_blocklist_client = client
+ logger.info("Token blocklist using Redis (persistent across restarts)")
+ return client
+ except Exception as e:
+ logger.warning(f"Redis unavailable for token blocklist, using in-memory: {e}")
+ _redis_blocklist_client = False
+ return None
+
+
+def revoke_token_jti(jti: str, expires_at: float) -> None:
+ """Add a JTI to the blocklist (revoked until its expiry time)."""
+ ttl = max(1, int(expires_at - time.time()))
+ redis = _get_blocklist_redis()
+ if redis:
+ try:
+ redis.setex(f"revoked_jti:{jti}", ttl, "1")
+ return
+ except Exception as e:
+ logger.warning(f"Redis revoke failed, falling back to memory: {e}")
+ _revoked_jtis[jti] = expires_at
+
+
+def is_token_revoked(jti: str) -> bool:
+ """Return True if JTI is revoked. Lazy GC of expired in-memory entries."""
+ if not jti:
+ return False
+ redis = _get_blocklist_redis()
+ if redis:
+ try:
+ return redis.exists(f"revoked_jti:{jti}") == 1
+ except Exception as e:
+ logger.warning(f"Redis revoke check failed, falling back to memory: {e}")
+ now = time.time()
+ expired = [k for k, v in _revoked_jtis.items() if v < now]
+ for k in expired:
+ _revoked_jtis.pop(k, None)
+ return jti in _revoked_jtis
+
def hash_password(password: str) -> str:
"""Hash a password using bcrypt or fallback to SHA256"""
@@ -91,34 +165,61 @@ def verify_password(plain_password: str, hashed_password: str) -> bool:
return False
-def create_access_token(user_id: str, expires_delta: Optional[timedelta] = None) -> str:
- """Create a JWT access token"""
+def create_access_token(
+ user_id: str, tier: str = "free", expires_delta: Optional[timedelta] = None
+) -> str:
+ """Create a JWT access token with tier claim for quick access"""
if not JWT_AVAILABLE:
- # Fallback to simple token
token_data = {
"user_id": user_id,
- "exp": (datetime.utcnow() + (expires_delta or timedelta(hours=ACCESS_TOKEN_EXPIRE_HOURS))).isoformat()
+ "tier": tier,
+ "exp": (
+ datetime.now(timezone.utc)
+ + (expires_delta or timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
+ ).isoformat(),
}
import base64
+
return base64.urlsafe_b64encode(json.dumps(token_data).encode()).decode()
-
- expire = datetime.utcnow() + (expires_delta or timedelta(hours=ACCESS_TOKEN_EXPIRE_HOURS))
- to_encode = {"sub": user_id, "exp": expire, "type": "access"}
+
+ expire = datetime.now(timezone.utc) + (
+ expires_delta or timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+ )
+ to_encode = {
+ "sub": user_id,
+ "tier": tier,
+ "exp": expire,
+ "type": "access",
+ "jti": str(uuid.uuid4()),
+ }
return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
-def create_refresh_token(user_id: str) -> str:
- """Create a JWT refresh token"""
+def create_refresh_token(
+ user_id: str, expires_delta: Optional[timedelta] = None
+) -> str:
+ """Create a JWT refresh token (7 days by default)"""
if not JWT_AVAILABLE:
token_data = {
"user_id": user_id,
- "exp": (datetime.utcnow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)).isoformat()
+ "exp": (
+ datetime.now(timezone.utc)
+ + (expires_delta or timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS))
+ ).isoformat(),
}
import base64
+
return base64.urlsafe_b64encode(json.dumps(token_data).encode()).decode()
-
- expire = datetime.utcnow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
- to_encode = {"sub": user_id, "exp": expire, "type": "refresh"}
+
+ expire = datetime.now(timezone.utc) + (
+ expires_delta or timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
+ )
+ to_encode = {
+ "sub": user_id,
+ "exp": expire,
+ "type": "refresh",
+ "jti": str(uuid.uuid4()),
+ }
return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
@@ -127,20 +228,24 @@ def verify_token(token: str) -> Optional[Dict[str, Any]]:
if not JWT_AVAILABLE:
try:
import base64
+
data = json.loads(base64.urlsafe_b64decode(token.encode()).decode())
exp = datetime.fromisoformat(data["exp"])
- if exp < datetime.utcnow():
+ if exp < datetime.now(timezone.utc):
return None
return {"sub": data["user_id"]}
- except:
+ except Exception:
return None
-
+
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
+ jti = payload.get("jti")
+ if jti and is_token_revoked(jti):
+ return None
return payload
except jwt.ExpiredSignatureError:
return None
- except jwt.JWTError:
+ except jwt.PyJWTError:
return None
@@ -148,16 +253,17 @@ def load_users() -> Dict[str, Dict]:
"""Load users from file storage (JSON backend only)"""
if USERS_FILE.exists():
try:
- with open(USERS_FILE, 'r') as f:
+ with open(USERS_FILE, "r") as f:
return json.load(f)
- except:
+ except Exception as e:
+ logger.error(f"Failed to load users file: {e}")
return {}
return {}
def save_users(users: Dict[str, Dict]):
"""Save users to file storage (JSON backend only)"""
- with open(USERS_FILE, 'w') as f:
+ with open(USERS_FILE, "w") as f:
json.dump(users, f, indent=2, default=str)
@@ -170,18 +276,21 @@ def _db_user_to_model(db_user) -> User:
password_hash=db_user.password_hash,
avatar_url=db_user.avatar_url,
plan=PlanType(db_user.plan) if db_user.plan else PlanType.FREE,
- subscription_status=SubscriptionStatus(db_user.subscription_status) if db_user.subscription_status else SubscriptionStatus.ACTIVE,
+ subscription_status=SubscriptionStatus(db_user.subscription_status)
+ if db_user.subscription_status
+ else SubscriptionStatus.ACTIVE,
stripe_customer_id=db_user.stripe_customer_id,
stripe_subscription_id=db_user.stripe_subscription_id,
docs_translated_this_month=db_user.docs_translated_this_month or 0,
pages_translated_this_month=db_user.pages_translated_this_month or 0,
api_calls_this_month=db_user.api_calls_this_month or 0,
+ daily_translation_count=getattr(db_user, "daily_translation_count", 0) or 0,
extra_credits=db_user.extra_credits or 0,
- usage_reset_date=db_user.usage_reset_date or datetime.utcnow(),
- default_source_lang=db_user.default_source_lang or "en",
- default_target_lang=db_user.default_target_lang or "es",
- default_provider=db_user.default_provider or "google",
- created_at=db_user.created_at or datetime.utcnow(),
+ usage_reset_date=db_user.usage_reset_date or datetime.now(timezone.utc),
+ default_source_lang=getattr(db_user, "default_source_lang", None) or "en",
+ default_target_lang=getattr(db_user, "default_target_lang", None) or "es",
+ default_provider=getattr(db_user, "default_provider", None) or "google",
+ created_at=db_user.created_at or datetime.now(timezone.utc),
updated_at=db_user.updated_at,
)
@@ -189,6 +298,9 @@ def _db_user_to_model(db_user) -> User:
def get_user_by_email(email: str) -> Optional[User]:
"""Get a user by email"""
if USE_DATABASE and DATABASE_AVAILABLE:
+ from database.connection import get_sync_session
+ from database.repositories import UserRepository
+
with get_sync_session() as session:
repo = UserRepository(session)
db_user = repo.get_by_email(email)
@@ -206,6 +318,9 @@ def get_user_by_email(email: str) -> Optional[User]:
def get_user_by_id(user_id: str) -> Optional[User]:
"""Get a user by ID"""
if USE_DATABASE and DATABASE_AVAILABLE:
+ from database.connection import get_sync_session
+ from database.repositories import UserRepository
+
with get_sync_session() as session:
repo = UserRepository(session)
db_user = repo.get_by_id(user_id)
@@ -224,26 +339,26 @@ def create_user(user_create: UserCreate) -> User:
# Check if email exists
if get_user_by_email(user_create.email):
raise ValueError("Email already registered")
-
+
if USE_DATABASE and DATABASE_AVAILABLE:
+ from database.connection import get_sync_session
+ from database.repositories import UserRepository
+
with get_sync_session() as session:
repo = UserRepository(session)
db_user = repo.create(
email=user_create.email,
name=user_create.name,
- password_hash=hash_password(user_create.password),
- plan=PlanType.FREE.value,
- subscription_status=SubscriptionStatus.ACTIVE.value
+ hashed_password=hash_password(user_create.password),
+ tier="free",
)
- session.commit()
- session.refresh(db_user)
return _db_user_to_model(db_user)
else:
users = load_users()
-
+
# Generate user ID
user_id = secrets.token_urlsafe(16)
-
+
# Create user
user = User(
id=user_id,
@@ -253,11 +368,11 @@ def create_user(user_create: UserCreate) -> User:
plan=PlanType.FREE,
subscription_status=SubscriptionStatus.ACTIVE,
)
-
+
# Save to storage
users[user_id] = user.model_dump()
save_users(users)
-
+
return user
@@ -274,46 +389,55 @@ def authenticate_user(email: str, password: str) -> Optional[User]:
def update_user(user_id: str, updates: Dict[str, Any]) -> Optional[User]:
"""Update a user's data"""
if USE_DATABASE and DATABASE_AVAILABLE:
+ from database.connection import get_sync_session
+ from database.repositories import UserRepository
+
with get_sync_session() as session:
repo = UserRepository(session)
- db_user = repo.update(user_id, updates)
+ db_user = repo.update(user_id, **updates)
if db_user:
- session.commit()
- session.refresh(db_user)
return _db_user_to_model(db_user)
return None
else:
users = load_users()
if user_id not in users:
return None
-
+
users[user_id].update(updates)
- users[user_id]["updated_at"] = datetime.utcnow().isoformat()
+ users[user_id]["updated_at"] = datetime.now(timezone.utc).isoformat()
save_users(users)
-
+
return User(**users[user_id])
def check_usage_limits(user: User) -> Dict[str, Any]:
"""Check if user has exceeded their plan limits"""
plan = PLANS[user.plan]
-
+
# Reset usage if it's a new month
- now = datetime.utcnow()
- if user.usage_reset_date.month != now.month or user.usage_reset_date.year != now.year:
- update_user(user.id, {
- "docs_translated_this_month": 0,
- "pages_translated_this_month": 0,
- "api_calls_this_month": 0,
- "usage_reset_date": now.isoformat() if not USE_DATABASE else now
- })
+ now = datetime.now(timezone.utc)
+ if (
+ user.usage_reset_date.month != now.month
+ or user.usage_reset_date.year != now.year
+ ):
+ update_user(
+ user.id,
+ {
+ "docs_translated_this_month": 0,
+ "pages_translated_this_month": 0,
+ "api_calls_this_month": 0,
+ "usage_reset_date": now.isoformat() if not USE_DATABASE else now,
+ },
+ )
user.docs_translated_this_month = 0
user.pages_translated_this_month = 0
user.api_calls_this_month = 0
-
+
docs_limit = plan["docs_per_month"]
- docs_remaining = max(0, docs_limit - user.docs_translated_this_month) if docs_limit > 0 else -1
-
+ docs_remaining = (
+ max(0, docs_limit - user.docs_translated_this_month) if docs_limit > 0 else -1
+ )
+
return {
"can_translate": docs_remaining != 0 or user.extra_credits > 0,
"docs_used": user.docs_translated_this_month,
@@ -332,15 +456,15 @@ def record_usage(user_id: str, pages_count: int, use_credits: bool = False) -> b
user = get_user_by_id(user_id)
if not user:
return False
-
+
updates = {
"docs_translated_this_month": user.docs_translated_this_month + 1,
"pages_translated_this_month": user.pages_translated_this_month + pages_count,
}
-
+
if use_credits:
updates["extra_credits"] = max(0, user.extra_credits - pages_count)
-
+
result = update_user(user_id, updates)
return result is not None
@@ -350,11 +474,94 @@ def add_credits(user_id: str, credits: int) -> bool:
user = get_user_by_id(user_id)
if not user:
return False
-
+
result = update_user(user_id, {"extra_credits": user.extra_credits + credits})
return result is not None
+# Valid plan values for admin tier change (Story 1.7)
+VALID_PLAN_VALUES = {"free", "starter", "pro", "business", "enterprise"}
+
+
+def update_user_plan(user_id: str, plan: str) -> Optional[User]:
+ """
+ Update a user's plan/tier (admin only). Keeps User.plan and User.tier in sync.
+ tier is set to 'pro' for pro/business/enterprise, 'free' otherwise (DB constraint).
+ """
+ plan_lower = (plan or "").strip().lower()
+ if plan_lower not in VALID_PLAN_VALUES:
+ return None
+
+ plan_enum = PlanType(plan_lower)
+ tier = (
+ "pro"
+ if plan_enum in (PlanType.PRO, PlanType.BUSINESS, PlanType.ENTERPRISE)
+ else "free"
+ )
+
+ if USE_DATABASE and DATABASE_AVAILABLE:
+ updates = {"plan": plan_enum, "tier": tier}
+ else:
+ updates = {"plan": plan_lower, "tier": tier}
+
+ return update_user(user_id, updates)
+
+
+def get_user_by_api_key(api_key: str) -> Optional[User]:
+ """
+ Get a user by API key.
+
+ Verifies that:
+ - The key exists in the database
+ - The key is active (is_active=True)
+ - The key hasn't expired (expires_at is None or in the future)
+
+ Returns the user associated with the API key, or None if invalid/revoked.
+
+ Raises:
+ ValueError: With code "API_KEY_REVOKED" if key exists but is inactive
+ """
+ if not api_key:
+ return None
+
+ # Only database backend supports API keys
+ if USE_DATABASE and DATABASE_AVAILABLE:
+ from database.connection import get_sync_session
+ from database.models import ApiKey
+ import hashlib
+
+ # Hash the provided key to compare with stored hash
+ key_hash = hashlib.sha256(api_key.encode()).hexdigest()
+
+ with get_sync_session() as session:
+ api_key_record = (
+ session.query(ApiKey).filter(ApiKey.key_hash == key_hash).first()
+ )
+
+ if not api_key_record:
+ return None
+
+ # Check if key is active (Story 3.2 - Revocation check)
+ if not api_key_record.is_active:
+ raise ValueError("API_KEY_REVOKED")
+
+ # Check expiration if set
+ if api_key_record.expires_at:
+ if api_key_record.expires_at < datetime.now(timezone.utc):
+ raise ValueError("API_KEY_EXPIRED")
+
+ # Update last_used_at and usage_count
+ api_key_record.last_used_at = datetime.now(timezone.utc)
+ api_key_record.usage_count = (api_key_record.usage_count or 0) + 1
+ session.commit()
+
+ # Get the user
+ user_id = api_key_record.user_id
+ return get_user_by_id(str(user_id))
+
+ return None
+
+
def init_database():
"""Initialize the database (call on application startup)"""
if USE_DATABASE and DATABASE_AVAILABLE:
diff --git a/services/auth_service_db.py b/services/auth_service_db.py
index c678890..3aa7695 100644
--- a/services/auth_service_db.py
+++ b/services/auth_service_db.py
@@ -2,16 +2,18 @@
Database-backed authentication service
Replaces JSON file storage with SQLAlchemy
"""
+
import os
import secrets
import hashlib
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
from typing import Optional, Dict, Any
import logging
# Try to import optional dependencies
try:
import jwt
+
JWT_AVAILABLE = True
except ImportError:
JWT_AVAILABLE = False
@@ -20,6 +22,7 @@ except ImportError:
try:
from passlib.context import CryptContext
+
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
PASSLIB_AVAILABLE = True
except ImportError:
@@ -65,24 +68,26 @@ def verify_password(plain_password: str, hashed_password: str) -> bool:
def create_access_token(user_id: str, expires_delta: Optional[timedelta] = None) -> str:
"""Create a JWT access token"""
- expire = datetime.utcnow() + (expires_delta or timedelta(hours=ACCESS_TOKEN_EXPIRE_HOURS))
-
+ expire = datetime.now(timezone.utc) + (
+ expires_delta or timedelta(hours=ACCESS_TOKEN_EXPIRE_HOURS)
+ )
+
if not JWT_AVAILABLE:
token_data = {"user_id": user_id, "exp": expire.isoformat(), "type": "access"}
return base64.urlsafe_b64encode(json.dumps(token_data).encode()).decode()
-
+
to_encode = {"sub": user_id, "exp": expire, "type": "access"}
return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
def create_refresh_token(user_id: str) -> str:
"""Create a JWT refresh token"""
- expire = datetime.utcnow() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
-
+ expire = datetime.now(timezone.utc) + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
+
if not JWT_AVAILABLE:
token_data = {"user_id": user_id, "exp": expire.isoformat(), "type": "refresh"}
return base64.urlsafe_b64encode(json.dumps(token_data).encode()).decode()
-
+
to_encode = {"sub": user_id, "exp": expire, "type": "refresh"}
return jwt.encode(to_encode, SECRET_KEY, algorithm=ALGORITHM)
@@ -93,12 +98,12 @@ def verify_token(token: str) -> Optional[Dict[str, Any]]:
try:
data = json.loads(base64.urlsafe_b64decode(token.encode()).decode())
exp = datetime.fromisoformat(data["exp"])
- if exp < datetime.utcnow():
+ if exp < datetime.now(timezone.utc):
return None
return {"sub": data["user_id"], "type": data.get("type", "access")}
except Exception:
return None
-
+
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
return payload
@@ -112,18 +117,18 @@ def create_user(email: str, name: str, password: str) -> User:
"""Create a new user in the database"""
with get_db_session() as db:
repo = UserRepository(db)
-
+
# Check if email already exists
existing = repo.get_by_email(email)
if existing:
raise ValueError("Email already registered")
-
- password_hash = hash_password(password)
+
+ hashed = hash_password(password)
user = repo.create(
email=email,
name=name,
- password_hash=password_hash,
- plan=PlanType.FREE,
+ hashed_password=hashed,
+ tier="free",
)
return user
@@ -133,15 +138,15 @@ def authenticate_user(email: str, password: str) -> Optional[User]:
with get_db_session() as db:
repo = UserRepository(db)
user = repo.get_by_email(email)
-
+
if not user:
return None
-
+
if not verify_password(password, user.password_hash):
return None
-
+
# Update last login
- repo.update(user.id, last_login_at=datetime.utcnow())
+ repo.update(user.id, last_login_at=datetime.now(timezone.utc))
return user
@@ -181,11 +186,15 @@ def use_credits(user_id: str, credits: int) -> bool:
return repo.use_credits(user_id, credits)
-def increment_usage(user_id: str, docs: int = 0, pages: int = 0, api_calls: int = 0) -> bool:
+def increment_usage(
+ user_id: str, docs: int = 0, pages: int = 0, api_calls: int = 0
+) -> bool:
"""Increment user usage counters"""
with get_db_session() as db:
repo = UserRepository(db)
- result = repo.increment_usage(user_id, docs=docs, pages=pages, api_calls=api_calls)
+ result = repo.increment_usage(
+ user_id, docs=docs, pages=pages, api_calls=api_calls
+ )
return result is not None
@@ -194,12 +203,12 @@ def check_usage_limits(user_id: str) -> Dict[str, Any]:
with get_db_session() as db:
repo = UserRepository(db)
user = repo.get_by_id(user_id)
-
+
if not user:
return {"allowed": False, "reason": "User not found"}
-
+
plan_config = PLANS.get(user.plan, PLANS[PlanType.FREE])
-
+
# Check document limit
docs_limit = plan_config["docs_per_month"]
if docs_limit > 0 and user.docs_translated_this_month >= docs_limit:
@@ -211,10 +220,12 @@ def check_usage_limits(user_id: str) -> Dict[str, Any]:
"limit": docs_limit,
"used": user.docs_translated_this_month,
}
-
+
return {
"allowed": True,
- "docs_remaining": max(0, docs_limit - user.docs_translated_this_month) if docs_limit > 0 else -1,
+ "docs_remaining": max(0, docs_limit - user.docs_translated_this_month)
+ if docs_limit > 0
+ else -1,
"extra_credits": user.extra_credits,
}
@@ -224,22 +235,28 @@ def get_user_usage_stats(user_id: str) -> Dict[str, Any]:
with get_db_session() as db:
repo = UserRepository(db)
user = repo.get_by_id(user_id)
-
+
if not user:
return {}
-
+
plan_config = PLANS.get(user.plan, PLANS[PlanType.FREE])
-
+
return {
"docs_used": user.docs_translated_this_month,
"docs_limit": plan_config["docs_per_month"],
- "docs_remaining": max(0, plan_config["docs_per_month"] - user.docs_translated_this_month) if plan_config["docs_per_month"] > 0 else -1,
+ "docs_remaining": max(
+ 0, plan_config["docs_per_month"] - user.docs_translated_this_month
+ )
+ if plan_config["docs_per_month"] > 0
+ else -1,
"pages_used": user.pages_translated_this_month,
"extra_credits": user.extra_credits,
"max_pages_per_doc": plan_config["max_pages_per_doc"],
"max_file_size_mb": plan_config["max_file_size_mb"],
"allowed_providers": plan_config["providers"],
"api_access": plan_config.get("api_access", False),
- "api_calls_used": user.api_calls_this_month if plan_config.get("api_access") else 0,
+ "api_calls_used": user.api_calls_this_month
+ if plan_config.get("api_access")
+ else 0,
"api_calls_limit": plan_config.get("api_calls_per_month", 0),
}
diff --git a/services/glossary_service.py b/services/glossary_service.py
new file mode 100644
index 0000000..d8547cf
--- /dev/null
+++ b/services/glossary_service.py
@@ -0,0 +1,183 @@
+"""
+Glossary Service for Translation
+Story 3.10: Glossaires - Application lors Traduction LLM
+
+Provides functions to retrieve glossary terms and format them for LLM prompts.
+"""
+
+import logging
+from typing import List, Dict, Any, Optional
+
+from database.connection import get_sync_session
+from database.models import Glossary, GlossaryTerm
+from utils.exceptions import GlossaryNotFoundError
+
+logger = logging.getLogger(__name__)
+
+
+def get_glossary_terms(glossary_id: str, user_id: str) -> List[Dict[str, str]]:
+ """
+ Retrieve glossary terms for a specific glossary owned by a user.
+
+ Args:
+ glossary_id: UUID of the glossary
+ user_id: UUID of the user (must own the glossary)
+
+ Returns:
+ List of dictionaries with 'source' and 'target' keys
+
+ Raises:
+ GlossaryNotFoundError: If glossary doesn't exist or doesn't belong to user
+ """
+ try:
+ with get_sync_session() as session:
+ glossary = (
+ session.query(Glossary)
+ .filter(Glossary.id == glossary_id, Glossary.user_id == user_id)
+ .first()
+ )
+
+ if not glossary:
+ raise GlossaryNotFoundError(
+ message="Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
+ details={"glossary_id": glossary_id}
+ )
+
+ # Get all terms for this glossary
+ terms = (
+ session.query(GlossaryTerm)
+ .filter(GlossaryTerm.glossary_id == glossary_id)
+ .all()
+ )
+
+ # Format as list of dicts
+ result = [{"source": term.source, "target": term.target} for term in terms]
+
+ logger.info(
+ f"Retrieved {len(result)} terms from glossary {glossary_id} for user {user_id}"
+ )
+
+ return result
+
+ except GlossaryNotFoundError:
+ raise
+ except Exception as e:
+ logger.error(f"Error retrieving glossary {glossary_id}: {e}")
+ raise GlossaryNotFoundError(
+ message="Erreur lors de la récupération du glossaire.",
+ details={"glossary_id": glossary_id, "error": str(e)}
+ )
+
+
+def validate_glossary_access(glossary_id: str, user_id: str) -> bool:
+ """
+ Validate that a glossary exists and belongs to the user.
+
+ This is a lightweight check that doesn't return the terms,
+ useful for early validation before starting a translation job.
+
+ Args:
+ glossary_id: UUID of the glossary
+ user_id: UUID of the user (must own the glossary)
+
+ Returns:
+ True if glossary exists and belongs to user
+
+ Raises:
+ GlossaryNotFoundError: If glossary doesn't exist or doesn't belong to user
+ """
+ try:
+ with get_sync_session() as session:
+ glossary = (
+ session.query(Glossary)
+ .filter(Glossary.id == glossary_id, Glossary.user_id == user_id)
+ .first()
+ )
+
+ if not glossary:
+ raise GlossaryNotFoundError(
+ message="Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
+ details={"glossary_id": glossary_id}
+ )
+
+ return True
+
+ except GlossaryNotFoundError:
+ raise
+ except Exception as e:
+ logger.error(f"Error validating glossary access {glossary_id}: {e}")
+ raise GlossaryNotFoundError(
+ message="Erreur lors de la validation du glossaire.",
+ details={"glossary_id": glossary_id, "error": str(e)}
+ )
+
+
+def format_glossary_for_prompt(terms: List[Dict[str, str]]) -> str:
+ """
+ Format glossary terms for injection into an LLM system prompt.
+
+ The format is designed to be clear and unambiguous for LLMs:
+ - Clear header explaining the purpose
+ - Simple source → target format
+ - Explicit instruction to use these translations
+
+ Args:
+ terms: List of dictionaries with 'source' and 'target' keys
+
+ Returns:
+ Formatted string for LLM prompt
+ """
+ if not terms:
+ return ""
+
+ # Sort terms by length (longest first) to avoid substring conflicts
+ # e.g., "machine learning" should match before "machine"
+ sorted_terms = sorted(terms, key=lambda t: len(t.get("source", "")), reverse=True)
+
+ lines = [
+ "TERMINOLOGY GLOSSARY (use these exact translations):",
+ ""
+ ]
+
+ for term in sorted_terms:
+ source = term.get("source", "").strip()
+ target = term.get("target", "").strip()
+ if source and target:
+ # Escape single quotes in terms for clarity
+ source_escaped = source.replace("'", "\\'")
+ target_escaped = target.replace("'", "\\'")
+ lines.append(f"- '{source_escaped}' → '{target_escaped}'")
+
+ lines.extend([
+ "",
+ "IMPORTANT: Always use these translations when the terms appear in the text."
+ ])
+
+ return "\n".join(lines)
+
+
+def build_full_prompt(
+ custom_prompt: Optional[str],
+ glossary_terms: Optional[List[Dict[str, str]]]
+) -> str:
+ """
+ Build the complete prompt combining custom prompt and glossary.
+
+ Args:
+ custom_prompt: Optional custom system prompt from user
+ glossary_terms: Optional list of glossary terms
+
+ Returns:
+ Combined prompt string
+ """
+ parts = []
+
+ if custom_prompt:
+ parts.append(custom_prompt)
+
+ if glossary_terms:
+ glossary_prompt = format_glossary_for_prompt(glossary_terms)
+ if glossary_prompt:
+ parts.append(glossary_prompt)
+
+ return "\n\n".join(parts) if parts else ""
\ No newline at end of file
diff --git a/services/progress_tracker.py b/services/progress_tracker.py
new file mode 100644
index 0000000..e35b452
--- /dev/null
+++ b/services/progress_tracker.py
@@ -0,0 +1,174 @@
+"""
+Progress Tracker Service (Story 2.11)
+
+Provides real-time progress tracking for translation jobs.
+Designed for O(1) updates and < 500ms latency (NFR3).
+"""
+
+from typing import Dict, Any, Optional, Callable
+import threading
+import time
+
+
+class ProgressTracker:
+ """
+ Track translation progress with callback support.
+
+ Designed for high-performance updates with minimal overhead.
+ Uses in-memory storage for MVP (consistent with Story 2.10 pattern).
+
+ Usage:
+ storage = {} # Reference to _translation_jobs dict
+ tracker = ProgressTracker("job_123", storage)
+ tracker.update(50, "Translating sheet 2/4")
+
+ # Or use item-based progress
+ tracker.update_item(3, 10, "Translating slide")
+ """
+
+ def __init__(self, job_id: str, storage: Dict[str, Any]):
+ """
+ Initialize progress tracker.
+
+ Args:
+ job_id: The translation job ID
+ storage: Reference to the job storage dict (e.g., _translation_jobs)
+ """
+ self.job_id = job_id
+ self.storage = storage
+ self._lock = threading.RLock()
+ self._last_update_time = 0
+ self._min_update_interval = 0.05 # 50ms minimum between updates (throttling)
+
+ def update(self, percent: int, step: str) -> None:
+ """
+ Update progress percentage and current step.
+
+ Thread-safe and throttled to prevent excessive updates.
+
+ Args:
+ percent: Progress percentage (0-100), will be clamped
+ step: Human-readable description of current operation
+ """
+ with self._lock:
+ current_time = time.time()
+ if current_time - self._last_update_time < self._min_update_interval:
+ if percent < 100:
+ return
+
+ job = self.storage.get(self.job_id)
+ if job:
+ # Never decrease progress — only move forward.
+ new_percent = min(100, max(0, percent))
+ job["progress_percent"] = max(job.get("progress_percent", 0), new_percent)
+ job["current_step"] = step
+ job["processed_items"] = job.get("processed_items", 0)
+ job["total_items"] = job.get("total_items", 0)
+ self._last_update_time = current_time
+
+ def update_item(
+ self, current: int, total: int, item_name: str, max_percent: int = 100
+ ) -> None:
+ """
+ Update progress based on item count (e.g., slides, sheets).
+
+ Calculates percentage from current/total and formats step message.
+
+ Args:
+ current: Current item number (1-based)
+ total: Total number of items
+ item_name: Name of item type (e.g., "Translating slide", "Processing sheet")
+ max_percent: Upper bound for the computed percentage (default 100).
+ Use 95 to reserve the last 5% for file-save + set_completed().
+ """
+ percent = int((current / total) * 100) if total > 0 else 0
+ percent = min(percent, max_percent)
+ step = f"{item_name} {current}/{total}"
+
+ with self._lock:
+ current_time = time.time()
+ if current_time - self._last_update_time < self._min_update_interval:
+ if percent < 100:
+ return
+
+ job = self.storage.get(self.job_id)
+ if job:
+ # Never decrease progress — only move forward.
+ new_percent = min(100, max(0, percent))
+ job["progress_percent"] = max(job.get("progress_percent", 0), new_percent)
+ job["current_step"] = step
+ job["processed_items"] = current
+ job["total_items"] = total
+ self._last_update_time = current_time
+
+ def set_error(
+ self, error_message: str, step: str = "Error during translation"
+ ) -> None:
+ """
+ Mark job as failed with error message.
+
+ Args:
+ error_message: Description of the error
+ step: Current step description (default: "Error during translation")
+ """
+ with self._lock:
+ job = self.storage.get(self.job_id)
+ if job:
+ job["status"] = "failed"
+ job["error_message"] = error_message
+ job["current_step"] = step
+ job["failed_at"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+
+ def set_completed(self, output_path: Optional[str] = None) -> None:
+ """
+ Mark job as completed.
+
+ Args:
+ output_path: Optional path to the output file
+ """
+ with self._lock:
+ job = self.storage.get(self.job_id)
+ if job:
+ job["status"] = "completed"
+ job["progress_percent"] = 100
+ job["current_step"] = "Translation complete"
+ job["completed_at"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+ if output_path:
+ job["output_path"] = str(output_path)
+
+
+def create_progress_callback(
+ tracker: ProgressTracker, item_name: str, total_items: int
+) -> Callable[[Dict[str, Any]], None]:
+ """
+ Create a progress callback function for use with translators.
+
+ Args:
+ tracker: ProgressTracker instance
+ item_name: Name of item being processed (e.g., "Translating slide")
+ total_items: Total number of items
+
+ Returns:
+ Callback function compatible with translator progress_callback parameter
+ """
+
+ def callback(progress_info: Dict[str, Any]) -> None:
+ """Progress callback that updates the tracker."""
+ # Extract item number from progress_info dict
+ # Different translators use different keys
+ current = progress_info.get(
+ "slide",
+ progress_info.get(
+ "sheet", progress_info.get("paragraph", progress_info.get("element", 1))
+ ),
+ )
+ total = progress_info.get(
+ "total_slides",
+ progress_info.get(
+ "total", progress_info.get("total_paragraphs", total_items)
+ ),
+ )
+
+ tracker.update_item(current, total, item_name)
+
+ return callback
diff --git a/services/prompt_service.py b/services/prompt_service.py
new file mode 100644
index 0000000..3991ea0
--- /dev/null
+++ b/services/prompt_service.py
@@ -0,0 +1,127 @@
+"""
+Prompt Service for Translation
+Story 3.12: Custom Prompts - Application lors Traduction LLM
+
+Provides functions to retrieve prompt content and validate access.
+"""
+
+import uuid
+import logging
+from typing import Optional, Tuple
+
+from database.connection import get_sync_session
+from database.models import CustomPrompt
+from utils.exceptions import PromptNotFoundError
+
+logger = logging.getLogger(__name__)
+
+
+def _validate_uuid(id_str: str, id_name: str = "ID") -> None:
+ """
+ Validate that a string is a valid UUID.
+
+ Args:
+ id_str: String to validate
+ id_name: Name of the ID for error messages
+
+ Raises:
+ PromptNotFoundError: If the string is not a valid UUID
+ """
+ try:
+ uuid.UUID(id_str)
+ except (ValueError, AttributeError):
+ raise PromptNotFoundError(
+ message=f"{id_name} invalide.",
+ details={id_name.lower(): id_str}
+ )
+
+
+def _get_prompt_record(prompt_id: str, user_id: str) -> Tuple[CustomPrompt, bool]:
+ """
+ Internal helper to fetch a prompt record from the database.
+
+ This is a shared function to avoid code duplication between
+ get_prompt_content and validate_prompt_access.
+
+ Args:
+ prompt_id: UUID of the prompt
+ user_id: UUID of the user (must own the prompt)
+
+ Returns:
+ Tuple of (CustomPrompt, was_logged) - was_logged indicates if access was already logged
+
+ Raises:
+ PromptNotFoundError: If prompt doesn't exist or doesn't belong to user
+ """
+ # Validate UUIDs before querying database
+ _validate_uuid(prompt_id, "prompt_id")
+ _validate_uuid(user_id, "user_id")
+
+ try:
+ with get_sync_session() as session:
+ prompt = (
+ session.query(CustomPrompt)
+ .filter(CustomPrompt.id == prompt_id, CustomPrompt.user_id == user_id)
+ .first()
+ )
+
+ if not prompt:
+ raise PromptNotFoundError(
+ message="Prompt introuvable ou vous n'avez pas accès à cette ressource.",
+ details={"prompt_id": prompt_id}
+ )
+
+ return prompt, False
+
+ except PromptNotFoundError:
+ raise
+ except Exception as e:
+ logger.error(f"Error fetching prompt {prompt_id}: {e}")
+ raise PromptNotFoundError(
+ message="Erreur lors de la récupération du prompt.",
+ details={"prompt_id": prompt_id, "error": str(e)}
+ )
+
+
+def get_prompt_content(prompt_id: str, user_id: str) -> str:
+ """
+ Retrieve prompt content for a specific prompt owned by a user.
+
+ Args:
+ prompt_id: UUID of the prompt
+ user_id: UUID of the user (must own the prompt)
+
+ Returns:
+ The prompt content string
+
+ Raises:
+ PromptNotFoundError: If prompt doesn't exist or doesn't belong to user
+ """
+ prompt, _ = _get_prompt_record(prompt_id, user_id)
+
+ logger.info(
+ f"Retrieved prompt '{prompt.name}' ({prompt_id}) for user {user_id}"
+ )
+
+ return prompt.content
+
+
+def validate_prompt_access(prompt_id: str, user_id: str) -> bool:
+ """
+ Validate that a prompt exists and belongs to the user.
+
+ Lightweight check before starting a translation job.
+ Does NOT log to avoid duplicate log entries when followed by get_prompt_content.
+
+ Args:
+ prompt_id: UUID of the prompt
+ user_id: UUID of the user (must own the prompt)
+
+ Returns:
+ True if prompt exists and belongs to user
+
+ Raises:
+ PromptNotFoundError: If prompt doesn't exist or doesn't belong to user
+ """
+ _get_prompt_record(prompt_id, user_id)
+ return True
diff --git a/services/providers/README.md b/services/providers/README.md
new file mode 100644
index 0000000..fb2a210
--- /dev/null
+++ b/services/providers/README.md
@@ -0,0 +1,282 @@
+# Translation Providers
+
+This directory contains translation provider implementations for the office_translator service.
+
+## Available Providers
+
+### Google Translate (`google_provider.py`)
+
+Production-ready Google Translate provider with:
+- Robust error handling with specific error codes
+- Retry logic with exponential backoff
+- Health check with result caching (60s TTL)
+- Usage metrics logging
+
+**Configuration:**
+```bash
+GOOGLE_TRANSLATE_ENABLED=true
+GOOGLE_TRANSLATE_TIMEOUT=30
+GOOGLE_TRANSLATE_MAX_RETRIES=3
+GOOGLE_TRANSLATE_RETRY_DELAY=1
+```
+
+**API Usage:**
+- Free tier: 500,000 characters/month
+- 5,000 characters max per request
+- Cost: ~$20 per million characters (paid tier)
+
+**Error Codes:**
+| Code | Description |
+|------|-------------|
+| `GOOGLE_QUOTA_EXCEEDED` | API quota exceeded (429) |
+| `GOOGLE_INVALID_KEY` | Invalid API key (401/403) |
+| `GOOGLE_NETWORK_ERROR` | Network/timeout error (502) |
+| `GOOGLE_UNSUPPORTED_LANGUAGE` | Language not supported (400) |
+| `GOOGLE_TEXT_TOO_LONG` | Text exceeds 5000 chars (413) |
+
+### DeepL (`deepl_provider.py`)
+
+Production-ready DeepL provider with:
+- Automatic Free/Pro endpoint detection based on API key format
+- Robust error handling with specific error codes
+- Retry logic with exponential backoff
+- Health check with result caching (60s TTL)
+- Language code normalization for DeepL compatibility
+
+**Configuration:**
+```bash
+DEEPL_ENABLED=true
+DEEPL_API_KEY=your_deepl_api_key_here # Free keys end with :fx
+DEEPL_TIMEOUT=30
+DEEPL_MAX_RETRIES=3
+DEEPL_RETRY_DELAY=1
+```
+
+**Free vs Pro API Keys:**
+| Type | Key Format | Endpoint |
+|------|------------|----------|
+| Free | Ends with `:fx` | `https://api-free.deepl.com/v2/translate` |
+| Pro | Does NOT end with `:fx` | `https://api.deepl.com/v2/translate` |
+
+**API Usage:**
+- Free tier: 500,000 characters/month
+- Pro tier: ~€25 per million characters
+- 128KB max per request
+- Higher quality for European languages
+
+**Supported Languages:**
+BG, CS, DA, DE, EL, EN-GB, EN-US, ES, ET, FI, FR, HU, ID, IT, JA, KO, LT, LV, NB, NL, PL, PT-BR, PT-PT, RO, RU, SK, SL, SV, TR, UK, ZH
+
+**Language Notes:**
+- English has two variants: EN-GB, EN-US (defaults to EN-US)
+- Portuguese has two variants: PT-BR, PT-PT (defaults to PT-BR)
+- Language codes are case-sensitive (uppercase)
+- Auto-detect uses `auto` (like Google)
+
+**Error Codes:**
+| Code | HTTP | Description |
+|------|------|-------------|
+| `DEEPL_QUOTA_EXCEEDED` | 429 | Character quota exceeded |
+| `DEEPL_INVALID_KEY` | 401 | Invalid API key |
+| `DEEPL_NETWORK_ERROR` | 502 | Network/timeout error |
+| `DEEPL_UNSUPPORTED_LANGUAGE` | 400 | Language not supported |
+| `DEEPL_TEXT_TOO_LONG` | 413 | Text exceeds 128KB |
+
+### OpenAI (`openai_provider.py`)
+
+Cloud LLM translation provider with:
+- GPT-4/GPT-4o/GPT-4o-mini model support
+- Custom system prompt support for translation context
+- Robust error handling with specific error codes
+- Retry logic with exponential backoff
+- Fast timeout for cloud API (default 60s)
+- Health check with result caching (60s TTL)
+
+**Configuration:**
+```bash
+OPENAI_ENABLED=true
+OPENAI_API_KEY=sk-proj-xxxxxxxxxxxxxxxxxxxxxxxx
+OPENAI_MODEL=gpt-4o-mini
+OPENAI_TIMEOUT=60
+OPENAI_MAX_RETRIES=3
+OPENAI_RETRY_DELAY=1.0
+# OPENAI_BASE_URL=https://api.openai.com/v1 # Optional: for Azure OpenAI or proxies
+```
+
+**Prerequisites:**
+- OpenAI API key from https://platform.openai.com/api-keys
+- Valid billing method on your OpenAI account
+
+**Recommended Models for Translation:**
+| Model | Cost | Speed | Quality | Best For |
+|-------|------|-------|---------|----------|
+| `gpt-4o-mini` | $0.15/M tokens | Fast | Good | Default choice, cost-effective |
+| `gpt-4o` | $2.50/M tokens | Medium | Excellent | High-quality requirements |
+| `gpt-4` | $30/M tokens | Slower | Excellent | Critical translations |
+| `gpt-3.5-turbo` | $0.50/M tokens | Fastest | Good | Speed priority |
+
+**Custom System Prompt:**
+```python
+request = TranslationRequest(
+ text="Hello",
+ target_language="fr",
+ metadata={"custom_prompt": "Translate formally for business context"}
+)
+```
+
+**Rate Limiting:**
+- OpenAI has strict rate limits per tier
+- The provider automatically handles 429 errors with retry
+- Retry-After header is respected when available
+- Exponential backoff for transient errors
+
+**Error Codes:**
+| Code | HTTP | Description |
+|------|------|-------------|
+| `OPENAI_RATE_LIMITED` | 429 | Rate limit hit, retry suggested |
+| `OPENAI_INVALID_KEY` | 401 | Invalid API key |
+| `OPENAI_QUOTA_EXCEEDED` | 429 | Billing quota exceeded |
+| `OPENAI_TIMEOUT` | 502 | Request timeout |
+| `OPENAI_SERVICE_ERROR` | 502 | OpenAI server error |
+| `OPENAI_CONTEXT_TOO_LONG` | 413 | Text exceeds model limit |
+
+### Ollama (`ollama_provider.py`)
+
+Local LLM translation provider with:
+- Custom system prompt support for translation context
+- Automatic model availability checking
+- Robust error handling with specific error codes
+- Retry logic with exponential backoff
+- Longer timeout for LLM operations (default 120s)
+- Health check with result caching (60s TTL)
+
+**Configuration:**
+```bash
+OLLAMA_ENABLED=true
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_MODEL=llama3
+OLLAMA_VISION_MODEL=llava
+OLLAMA_TIMEOUT=120
+OLLAMA_MAX_RETRIES=2
+OLLAMA_RETRY_DELAY=2
+```
+
+**Prerequisites:**
+- Ollama must be installed and running: `ollama serve`
+- Model must be pulled before use: `ollama pull llama3`
+
+**Recommended Models for Translation:**
+| Model | Size | Best For |
+|-------|------|----------|
+| `llama3` | 8B | General translation, good balance |
+| `llama3:70b` | 70B | High-quality translation |
+| `mistral` | 7B | Fast translation |
+| `qwen2` | 7B | Strong non-English support |
+
+**Custom System Prompt:**
+```python
+request = TranslationRequest(
+ text="Hello",
+ target_language="fr",
+ metadata={"custom_prompt": "Translate formally for business context"}
+)
+```
+
+**Error Codes:**
+| Code | HTTP | Description |
+|------|------|-------------|
+| `OLLAMA_UNAVAILABLE` | 502 | Ollama service not reachable |
+| `OLLAMA_MODEL_NOT_FOUND` | 400 | Model not pulled |
+| `OLLAMA_TIMEOUT` | 502 | Request timeout |
+| `OLLAMA_GENERATION_ERROR` | 502 | LLM generation failed |
+| `OLLAMA_CONTEXT_TOO_LONG` | 413 | Text exceeds model limit |
+
+## Usage
+
+```python
+from services.providers.google_provider import GoogleTranslationProvider
+from services.providers.deepl_provider import DeepLTranslationProvider
+from services.providers.openai_provider import OpenAITranslationProvider
+from services.providers.ollama_provider import OllamaTranslationProvider
+from services.providers.schemas import TranslationRequest
+
+# Google provider
+google_provider = GoogleTranslationProvider()
+request = TranslationRequest(text="Hello", target_language="fr")
+response = google_provider.translate_text(request)
+
+# DeepL provider (requires API key)
+deepl_provider = DeepLTranslationProvider(api_key="your-key:fx")
+request = TranslationRequest(text="Hello", target_language="fr")
+response = deepl_provider.translate_text(request)
+
+# OpenAI provider (requires API key)
+openai_provider = OpenAITranslationProvider(
+ api_key="sk-...",
+ model="gpt-4o-mini"
+)
+request = TranslationRequest(text="Hello", target_language="fr")
+response = openai_provider.translate_text(request)
+
+# OpenAI with custom prompt
+request = TranslationRequest(
+ text="Hello",
+ target_language="fr",
+ metadata={"custom_prompt": "Translate formally for business context"}
+)
+response = openai_provider.translate_text(request)
+
+# Ollama provider (requires local Ollama running)
+ollama_provider = OllamaTranslationProvider(
+ base_url="http://localhost:11434",
+ model="llama3"
+)
+request = TranslationRequest(text="Hello", target_language="fr")
+response = ollama_provider.translate_text(request)
+
+# Ollama with custom prompt
+request = TranslationRequest(
+ text="Hello",
+ target_language="fr",
+ metadata={"custom_prompt": "Translate formally"}
+)
+response = ollama_provider.translate_text(request)
+
+if response.success:
+ print(response.translated_text)
+else:
+ print(f"Error: {response.error_code} - {response.error}")
+```
+
+## Registry Usage
+
+```python
+from services.providers import registry
+
+# List all providers
+print(registry.list_all())
+
+# Get first available from fallback chain
+provider = registry.get_first_available(["google", "deepl", "openai", "ollama"])
+
+# Check if provider is available
+print(registry.list_available())
+```
+
+## Health Check
+
+```python
+status = provider.health_check()
+print(f"Available: {status.available}")
+print(f"Latency: {status.latency_ms}ms")
+print(f"Last Check: {status.last_check}")
+```
+
+## Architecture
+
+All providers extend `TranslationProvider` base class and implement:
+- `translate_text(request: TranslationRequest) -> TranslationResponse`
+- `translate_batch(requests: List[TranslationRequest]) -> List[TranslationResponse]`
+- `is_available() -> bool`
+- `health_check() -> ProviderHealthStatus`
+- `get_name() -> str`
diff --git a/services/providers/__init__.py b/services/providers/__init__.py
new file mode 100644
index 0000000..23ed665
--- /dev/null
+++ b/services/providers/__init__.py
@@ -0,0 +1,81 @@
+"""
+Translation Providers Package.
+
+This package provides a pluggable architecture for translation providers
+with a registry for easy access and fallback support.
+
+Usage:
+ from services.providers import TranslationProvider, registry
+ from services.providers.schemas import TranslationRequest, TranslationResponse
+
+ # Get a provider (Google is auto-registered)
+ google_provider = registry.get("google")
+
+ # Translate text
+ request = TranslationRequest(text="Hello", target_language="fr")
+ response = google_provider.translate_text(request)
+
+ # Use fallback chain
+ provider = registry.get_first_available(["google", "deepl", "openai"])
+"""
+
+from .base import TranslationProvider
+from .schemas import (
+ TranslationRequest,
+ TranslationResponse,
+ BatchTranslationRequest,
+ BatchTranslationResponse,
+ ProviderHealthStatus,
+)
+from .registry import ProviderRegistry, registry, get_registry
+
+__all__ = [
+ "TranslationProvider",
+ "TranslationRequest",
+ "TranslationResponse",
+ "BatchTranslationRequest",
+ "BatchTranslationResponse",
+ "ProviderHealthStatus",
+ "ProviderRegistry",
+ "registry",
+ "get_registry",
+ "translate_with_fallback",
+ "translate_with_fallback_by_mode",
+ "AllProvidersFailedError",
+ "ALL_PROVIDERS_FAILED",
+]
+
+
+def _auto_register_providers() -> None:
+ """Auto-register available providers on module import."""
+ from .google_provider import register_google_provider
+ from .config import ProvidersConfig
+
+ if ProvidersConfig.GOOGLE_ENABLED:
+ register_google_provider()
+
+ if ProvidersConfig.DEEPL_ENABLED and ProvidersConfig.DEEPL_API_KEY:
+ from .deepl_provider import register_deepl_provider
+
+ register_deepl_provider()
+
+ if ProvidersConfig.OLLAMA_ENABLED:
+ from .ollama_provider import register_ollama_provider
+
+ register_ollama_provider()
+
+ if ProvidersConfig.OPENAI_ENABLED and ProvidersConfig.OPENAI_API_KEY:
+ from .openai_provider import register_openai_provider
+
+ register_openai_provider()
+
+
+_auto_register_providers()
+
+# Import fallback functions for easy access
+from .fallback import (
+ translate_with_fallback,
+ translate_with_fallback_by_mode,
+ AllProvidersFailedError,
+ ALL_PROVIDERS_FAILED,
+)
diff --git a/services/providers/base.py b/services/providers/base.py
new file mode 100644
index 0000000..8710213
--- /dev/null
+++ b/services/providers/base.py
@@ -0,0 +1,104 @@
+"""
+Abstract base class for translation providers.
+Provides a common interface for all translation provider implementations.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Optional, List
+import time
+
+from .schemas import (
+ TranslationRequest,
+ TranslationResponse,
+ BatchTranslationRequest,
+ BatchTranslationResponse,
+ ProviderHealthStatus,
+)
+
+
+class TranslationProvider(ABC):
+ """
+ Abstract base class for translation providers.
+
+ All translation providers must implement this interface to ensure
+ consistent behavior across different translation services.
+ """
+
+ @abstractmethod
+ def translate_text(self, request: TranslationRequest) -> TranslationResponse:
+ """
+ Translate a single text string.
+
+ Args:
+ request: TranslationRequest containing text, target_language, and source_language
+
+ Returns:
+ TranslationResponse with translated text and metadata
+ """
+ pass
+
+ @abstractmethod
+ def get_name(self) -> str:
+ """
+ Return the provider name for logging and registry.
+
+ Returns:
+ Provider name as a string (e.g., "google", "deepl", "openai")
+ """
+ pass
+
+ @abstractmethod
+ def is_available(self) -> bool:
+ """
+ Check if the provider is configured and reachable.
+
+ Returns:
+ True if the provider can perform translations, False otherwise
+ """
+ pass
+
+ def translate_batch(
+ self, requests: List[TranslationRequest]
+ ) -> List[TranslationResponse]:
+ """
+ Translate multiple texts. Default implementation uses individual calls.
+
+ Subclasses can override this for optimized batch processing.
+
+ Args:
+ requests: List of TranslationRequest objects
+
+ Returns:
+ List of TranslationResponse objects in the same order as requests
+ """
+ return [self.translate_text(req) for req in requests]
+
+ def health_check(self) -> ProviderHealthStatus:
+ """
+ Return health status details for the provider.
+
+ Performs a lightweight check to verify the provider is operational.
+
+ Returns:
+ ProviderHealthStatus with availability and latency information
+ """
+ start_time = time.time()
+
+ try:
+ available = self.is_available()
+ latency_ms = (time.time() - start_time) * 1000
+
+ return ProviderHealthStatus(
+ name=self.get_name(),
+ available=available,
+ latency_ms=round(latency_ms, 2),
+ error=None if available else "Provider not available",
+ )
+ except Exception as e:
+ latency_ms = (time.time() - start_time) * 1000
+ return ProviderHealthStatus(
+ name=self.get_name(),
+ available=False,
+ latency_ms=round(latency_ms, 2),
+ error=str(e),
+ )
diff --git a/services/providers/config.py b/services/providers/config.py
new file mode 100644
index 0000000..9823b63
--- /dev/null
+++ b/services/providers/config.py
@@ -0,0 +1,208 @@
+"""
+Provider Configuration - Environment-based settings for translation providers.
+
+Loads API keys, URLs, and enable/disable flags from environment variables.
+"""
+
+import os
+from typing import List, Optional
+from pydantic import BaseModel
+
+
+def _ensure_dotenv_loaded() -> None:
+ """Load .env file if not already loaded."""
+ from dotenv import load_dotenv
+
+ load_dotenv()
+
+
+_ensure_dotenv_loaded()
+
+
+class ProviderSettings(BaseModel):
+ """Settings for a single translation provider."""
+
+ enabled: bool = False
+ api_key: Optional[str] = None
+ base_url: Optional[str] = None
+ model: Optional[str] = None
+
+
+class ProvidersConfig:
+ """
+ Configuration for all translation providers.
+
+ Loads settings from environment variables with sensible defaults.
+ """
+
+ # Google Translate (no API key required via deep_translator)
+ GOOGLE_ENABLED: bool = (
+ os.getenv("GOOGLE_TRANSLATE_ENABLED", "true").lower() == "true"
+ )
+ GOOGLE_TRANSLATE_TIMEOUT: int = int(os.getenv("GOOGLE_TRANSLATE_TIMEOUT", "30"))
+ GOOGLE_TRANSLATE_MAX_RETRIES: int = int(
+ os.getenv("GOOGLE_TRANSLATE_MAX_RETRIES", "3")
+ )
+ GOOGLE_TRANSLATE_RETRY_DELAY: float = float(
+ os.getenv("GOOGLE_TRANSLATE_RETRY_DELAY", "1.0")
+ )
+
+ # DeepL
+ DEEPL_ENABLED: bool = os.getenv("DEEPL_ENABLED", "false").lower() == "true"
+ DEEPL_API_KEY: str = os.getenv("DEEPL_API_KEY", "")
+ DEEPL_TIMEOUT: int = int(os.getenv("DEEPL_TIMEOUT", "30"))
+ DEEPL_MAX_RETRIES: int = int(os.getenv("DEEPL_MAX_RETRIES", "3"))
+ DEEPL_RETRY_DELAY: float = float(os.getenv("DEEPL_RETRY_DELAY", "1.0"))
+
+ # OpenAI
+ OPENAI_ENABLED: bool = os.getenv("OPENAI_ENABLED", "false").lower() == "true"
+ OPENAI_API_KEY: str = os.getenv("OPENAI_API_KEY", "")
+ OPENAI_MODEL: str = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
+ OPENAI_TIMEOUT: int = int(os.getenv("OPENAI_TIMEOUT", "60"))
+ OPENAI_MAX_RETRIES: int = int(os.getenv("OPENAI_MAX_RETRIES", "3"))
+ OPENAI_RETRY_DELAY: float = float(os.getenv("OPENAI_RETRY_DELAY", "1.0"))
+ OPENAI_BASE_URL: str = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
+ OPENAI_HEALTH_CHECK_TIMEOUT: int = int(
+ os.getenv("OPENAI_HEALTH_CHECK_TIMEOUT", "5")
+ )
+
+ # Ollama (local LLM) - default model is config-only, no hardcode in provider
+ _DEFAULT_OLLAMA_MODEL: str = "llama3"
+ OLLAMA_ENABLED: bool = os.getenv("OLLAMA_ENABLED", "false").lower() == "true"
+ OLLAMA_BASE_URL: str = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+ OLLAMA_MODEL: str = os.getenv("OLLAMA_MODEL", _DEFAULT_OLLAMA_MODEL)
+ OLLAMA_VISION_MODEL: str = os.getenv("OLLAMA_VISION_MODEL", "llava")
+ OLLAMA_TIMEOUT: int = int(os.getenv("OLLAMA_TIMEOUT", "120"))
+ OLLAMA_MAX_RETRIES: int = int(os.getenv("OLLAMA_MAX_RETRIES", "2"))
+ OLLAMA_RETRY_DELAY: float = float(os.getenv("OLLAMA_RETRY_DELAY", "2.0"))
+
+ # OpenRouter (multi-model API)
+ OPENROUTER_ENABLED: bool = (
+ os.getenv("OPENROUTER_ENABLED", "false").lower() == "true"
+ )
+ OPENROUTER_API_KEY: str = os.getenv("OPENROUTER_API_KEY", "")
+ OPENROUTER_MODEL: str = os.getenv("OPENROUTER_MODEL", "deepseek/deepseek-chat")
+
+ # Fallback chain configuration
+ # General fallback chain (backward compatibility)
+ FALLBACK_CHAIN: List[str] = [
+ name.strip()
+ for name in os.getenv(
+ "PROVIDER_FALLBACK_CHAIN", "google,deepl,openai,ollama,openrouter"
+ ).split(",")
+ if name.strip()
+ ]
+
+ # Mode-specific fallback chains
+ # Classic mode: Google Translate -> DeepL
+ FALLBACK_CHAIN_CLASSIC: List[str] = [
+ name.strip()
+ for name in os.getenv("FALLBACK_CHAIN_CLASSIC", "google,deepl").split(",")
+ if name.strip()
+ ]
+
+ # LLM mode: Ollama (local) -> OpenAI (cloud)
+ FALLBACK_CHAIN_LLM: List[str] = [
+ name.strip()
+ for name in os.getenv("FALLBACK_CHAIN_LLM", "ollama,openai").split(",")
+ if name.strip()
+ ]
+
+ @classmethod
+ def get_fallback_chain(cls, mode: str = "auto") -> List[str]:
+ """
+ Get the fallback chain for a specific mode.
+
+ Args:
+ mode: "classic" for Classic providers, "llm" for LLM providers,
+ "auto" or any other value for general fallback chain
+
+ Returns:
+ List of provider names in fallback order
+ """
+ mode = mode.lower()
+ if mode == "classic":
+ return cls.FALLBACK_CHAIN_CLASSIC
+ elif mode == "llm":
+ return cls.FALLBACK_CHAIN_LLM
+ else:
+ return cls.FALLBACK_CHAIN
+
+ @classmethod
+ def get_provider_settings(cls, provider_name: str) -> ProviderSettings:
+ """
+ Get settings for a specific provider.
+
+ Args:
+ provider_name: Name of the provider (e.g., "google", "deepl")
+
+ Returns:
+ ProviderSettings for the requested provider
+ """
+ settings_map = {
+ "google": ProviderSettings(
+ enabled=cls.GOOGLE_ENABLED, api_key=None, base_url=None, model=None
+ ),
+ "deepl": ProviderSettings(
+ enabled=cls.DEEPL_ENABLED,
+ api_key=cls.DEEPL_API_KEY if cls.DEEPL_API_KEY else None,
+ base_url=None,
+ model=None,
+ ),
+ "openai": ProviderSettings(
+ enabled=cls.OPENAI_ENABLED,
+ api_key=cls.OPENAI_API_KEY if cls.OPENAI_API_KEY else None,
+ base_url=cls.OPENAI_BASE_URL or None,
+ model=cls.OPENAI_MODEL,
+ ),
+ "ollama": ProviderSettings(
+ enabled=cls.OLLAMA_ENABLED,
+ api_key=None,
+ base_url=cls.OLLAMA_BASE_URL,
+ model=cls.OLLAMA_MODEL,
+ ),
+ "openrouter": ProviderSettings(
+ enabled=cls.OPENROUTER_ENABLED,
+ api_key=cls.OPENROUTER_API_KEY if cls.OPENROUTER_API_KEY else None,
+ base_url="https://openrouter.ai/api/v1",
+ model=cls.OPENROUTER_MODEL,
+ ),
+ }
+ return settings_map.get(provider_name.lower(), ProviderSettings())
+
+ @classmethod
+ def is_provider_configured(cls, provider_name: str) -> bool:
+ """
+ Check if a provider is properly configured.
+
+ Args:
+ provider_name: Name of the provider
+
+ Returns:
+ True if the provider is enabled and has required configuration
+ """
+ settings = cls.get_provider_settings(provider_name)
+
+ if not settings.enabled:
+ return False
+
+ # Providers requiring API keys
+ providers_requiring_key = {"deepl", "openai", "openrouter"}
+
+ if provider_name.lower() in providers_requiring_key:
+ return bool(settings.api_key)
+
+ return True
+
+ @classmethod
+ def get_available_providers(cls) -> List[str]:
+ """
+ Get list of configured and available providers.
+
+ Returns:
+ List of provider names that are ready to use
+ """
+ return [name for name in cls.FALLBACK_CHAIN if cls.is_provider_configured(name)]
+
+
+providers_config = ProvidersConfig()
diff --git a/services/providers/deepl_provider.py b/services/providers/deepl_provider.py
new file mode 100644
index 0000000..fefe5ac
--- /dev/null
+++ b/services/providers/deepl_provider.py
@@ -0,0 +1,763 @@
+"""
+DeepL Provider - Production-ready implementation.
+
+Extends TranslationProvider base class with robust error handling,
+retry logic, and health monitoring.
+
+Features:
+- Automatic Free/Pro endpoint detection based on API key format
+- Specific error codes for all DeepL API errors
+- Retry logic with exponential backoff for transient errors
+- Timeout configuration
+- Health check with caching
+- Structlog-compatible logging (no document content in logs)
+"""
+
+import os
+import socket
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+try:
+ import structlog
+
+ _HAS_STRUCTLOG = True
+ logger = structlog.get_logger(__name__)
+except ImportError:
+ import logging
+
+ _HAS_STRUCTLOG = False
+ logger = logging.getLogger(__name__)
+
+
+def _log_info(event: str, **kwargs):
+ """Log info message compatible with both structlog and standard logging."""
+ if _HAS_STRUCTLOG:
+ logger.info(event, **kwargs)
+ else:
+ logger.info(f"{event} {' '.join(f'{k}={v}' for k, v in kwargs.items())}")
+
+
+def _log_warning(event: str, **kwargs):
+ """Log warning message compatible with both structlog and standard logging."""
+ if _HAS_STRUCTLOG:
+ logger.warning(event, **kwargs)
+ else:
+ logger.warning(f"{event} {' '.join(f'{k}={v}' for k, v in kwargs.items())}")
+
+
+def _log_error(event: str, **kwargs):
+ """Log error message compatible with both structlog and standard logging."""
+ if _HAS_STRUCTLOG:
+ logger.error(event, **kwargs)
+ else:
+ logger.error(f"{event} {' '.join(f'{k}={v}' for k, v in kwargs.items())}")
+
+
+from .base import TranslationProvider
+from .schemas import (
+ BatchTranslationRequest,
+ BatchTranslationResponse,
+ ProviderHealthStatus,
+ TranslationRequest,
+ TranslationResponse,
+)
+
+DEEPL_QUOTA_EXCEEDED = "DEEPL_QUOTA_EXCEEDED"
+DEEPL_INVALID_KEY = "DEEPL_INVALID_KEY"
+DEEPL_NETWORK_ERROR = "DEEPL_NETWORK_ERROR"
+DEEPL_UNSUPPORTED_LANGUAGE = "DEEPL_UNSUPPORTED_LANGUAGE"
+DEEPL_TEXT_TOO_LONG = "DEEPL_TEXT_TOO_LONG"
+
+_RETRYABLE_ERRORS = {DEEPL_NETWORK_ERROR, DEEPL_QUOTA_EXCEEDED}
+
+DEEPL_FREE_SUFFIX = ":fx"
+MAX_TEXT_LENGTH = 128 * 1024
+
+DEEPL_SUPPORTED_LANGUAGES = {
+ "BG",
+ "CS",
+ "DA",
+ "DE",
+ "EL",
+ "EN-GB",
+ "EN-US",
+ "ES",
+ "ET",
+ "FI",
+ "FR",
+ "HU",
+ "ID",
+ "IT",
+ "JA",
+ "KO",
+ "LT",
+ "LV",
+ "NB",
+ "NL",
+ "PL",
+ "PT-BR",
+ "PT-PT",
+ "RO",
+ "RU",
+ "SK",
+ "SL",
+ "SV",
+ "TR",
+ "UK",
+ "ZH",
+}
+
+
+class DeepLProviderError(Exception):
+ """Exception raised for DeepL API errors."""
+
+ def __init__(
+ self, code: str, message: str, details: Optional[Dict[str, Any]] = None
+ ):
+ self.code = code
+ self.message = message
+ self.details = details or {}
+ super().__init__(message)
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Convert error to dictionary format."""
+ result = {
+ "error": self.code,
+ "message": self.message,
+ }
+ if self.details:
+ result["details"] = self.details
+ return result
+
+
+class DeepLTranslationProvider(TranslationProvider):
+ """
+ DeepL implementation using deep_translator library.
+
+ Features:
+ - Automatic Free/Pro endpoint detection based on API key format
+ - Thread-safe translator instances per thread
+ - Caching support (uses global cache from translation_service)
+ - Batch translation with optimized processing
+ - Robust error handling with specific error codes
+ - Retry logic with exponential backoff
+ - Configurable timeout
+ - Health check with result caching
+ """
+
+ def __init__(
+ self,
+ api_key: str,
+ use_cache: bool = True,
+ timeout: int = 30,
+ max_retries: int = 3,
+ retry_delay: float = 1.0,
+ ):
+ """
+ Initialize DeepL provider.
+
+ Args:
+ api_key: DeepL API key (Free keys end with :fx)
+ use_cache: Whether to use translation caching (default: True)
+ timeout: Request timeout in seconds (default: 30)
+ max_retries: Maximum retry attempts for transient errors (default: 3)
+ retry_delay: Initial retry delay in seconds (default: 1.0)
+ """
+ if not api_key:
+ raise ValueError("DeepL API key is required")
+
+ self._api_key = api_key
+ self._api_type = self._detect_api_type(api_key)
+ self._local = threading.local()
+ self._use_cache = use_cache
+ self._provider_name = "deepl"
+ self._cache = None
+ self.timeout = timeout
+ self.max_retries = max_retries
+ self.retry_delay = retry_delay
+ self._health_cache: Dict[str, Any] = {}
+ self._health_cache_ttl = 60
+ self._health_cache_lock = threading.Lock()
+
+ if use_cache:
+ self._init_cache()
+
+ def _detect_api_type(self, api_key: str) -> str:
+ """
+ Detect if API key is Free or Pro based on suffix.
+
+ Free tier keys end with ':fx', Pro keys do not.
+
+ Args:
+ api_key: DeepL API key
+
+ Returns:
+ "free" or "pro"
+ """
+ if api_key.endswith(DEEPL_FREE_SUFFIX):
+ return "free"
+ return "pro"
+
+ def _get_api_url(self) -> str:
+ """
+ Get correct API URL based on key type.
+
+ Note: deep_translator handles this internally, but we log it.
+
+ Returns:
+ API URL for Free or Pro tier
+ """
+ if self._api_type == "free":
+ return "https://api-free.deepl.com/v2/translate"
+ return "https://api.deepl.com/v2/translate"
+
+ def _init_cache(self):
+ """Initialize or get the translation cache."""
+ from services.translation_service import _translation_cache
+
+ self._cache = _translation_cache
+
+ def _normalize_language_code(self, lang_code: str) -> str:
+ """
+ Normalize language code for DeepL.
+
+ DeepL uses uppercase language codes (e.g., "EN-US", "FR").
+
+ Args:
+ lang_code: Input language code (e.g., "en", "en-US", "EN-us")
+
+ Returns:
+ Normalized language code for DeepL
+ """
+ if not lang_code or lang_code.lower() == "auto":
+ return ""
+
+ lang_upper = lang_code.upper()
+
+ if lang_upper in DEEPL_SUPPORTED_LANGUAGES:
+ return lang_upper
+
+ base_lang = lang_upper.split("-")[0]
+
+ if base_lang == "EN":
+ return "EN-US"
+ elif base_lang == "PT":
+ return "PT-BR"
+ elif base_lang in {
+ "BG",
+ "CS",
+ "DA",
+ "DE",
+ "EL",
+ "ES",
+ "ET",
+ "FI",
+ "FR",
+ "HU",
+ "ID",
+ "IT",
+ "JA",
+ "KO",
+ "LT",
+ "LV",
+ "NB",
+ "NL",
+ "PL",
+ "RO",
+ "RU",
+ "SK",
+ "SL",
+ "SV",
+ "TR",
+ "UK",
+ "ZH",
+ }:
+ return base_lang
+
+ return lang_upper
+
+ def _is_language_supported(self, lang_code: str) -> bool:
+ """
+ Check if a language code is supported by DeepL.
+
+ Args:
+ lang_code: Language code to check
+
+ Returns:
+ True if supported, False otherwise
+ """
+ if not lang_code:
+ return True
+
+ normalized = self._normalize_language_code(lang_code)
+ return normalized in DEEPL_SUPPORTED_LANGUAGES
+
+ def _get_translator(self, source_language: str, target_language: str):
+ """Get or create a translator instance for the current thread."""
+ from deep_translator import DeepLTranslator
+
+ source_lang = self._normalize_language_code(source_language)
+ target_lang = self._normalize_language_code(target_language)
+
+ key = f"{source_lang}_{target_lang}"
+ if not hasattr(self._local, "translators"):
+ self._local.translators = {}
+ if key not in self._local.translators:
+ self._local.translators[key] = DeepLTranslator(
+ api_key=self._api_key,
+ source=source_lang if source_lang else "auto",
+ target=target_lang,
+ )
+ return self._local.translators[key]
+
+ def _make_api_request(
+ self, text: str, source_language: str, target_language: str
+ ) -> str:
+ """
+ Make API request with error mapping.
+
+ Raises:
+ DeepLProviderError: For any API errors with specific codes
+ """
+ if len(text.encode("utf-8")) > MAX_TEXT_LENGTH:
+ raise DeepLProviderError(
+ code=DEEPL_TEXT_TOO_LONG,
+ message="Texte trop long (max 128KB par requête).",
+ details={"text_length": len(text), "max_length": MAX_TEXT_LENGTH},
+ )
+
+ if not self._is_language_supported(target_language):
+ raise DeepLProviderError(
+ code=DEEPL_UNSUPPORTED_LANGUAGE,
+ message=f"Langue '{target_language}' non supportée par DeepL.",
+ details={"unsupported_language": target_language},
+ )
+
+ try:
+ translator = self._get_translator(source_language, target_language)
+ with ThreadPoolExecutor(max_workers=1) as executor:
+ future = executor.submit(translator.translate, text)
+ return future.result(timeout=self.timeout)
+ except Exception as e:
+ error_str = str(e).lower()
+
+ if (
+ "quota" in error_str
+ or "limit" in error_str
+ or "429" in error_str
+ or "456" in error_str
+ ):
+ raise DeepLProviderError(
+ code=DEEPL_QUOTA_EXCEEDED,
+ message="Quota DeepL dépassé. Réessayez demain.",
+ details={"provider": "deepl", "api_type": self._api_type},
+ )
+ elif (
+ "auth" in error_str
+ or "key" in error_str
+ or "invalid" in error_str
+ or "401" in error_str
+ or "403" in error_str
+ ):
+ raise DeepLProviderError(
+ code=DEEPL_INVALID_KEY,
+ message="Clé API DeepL invalide. Contactez l'administrateur.",
+ details={"provider": "deepl"},
+ )
+ elif "language" in error_str or "not supported" in error_str:
+ raise DeepLProviderError(
+ code=DEEPL_UNSUPPORTED_LANGUAGE,
+ message=f"Langue '{target_language}' non supportée par DeepL.",
+ details={"unsupported_language": target_language},
+ )
+ elif (
+ isinstance(e, (socket.timeout, TimeoutError, FuturesTimeoutError))
+ or "timeout" in error_str
+ ):
+ raise DeepLProviderError(
+ code=DEEPL_NETWORK_ERROR,
+ message="Service DeepL indisponible. Réessayez.",
+ details={"provider": "deepl", "error_type": "timeout"},
+ )
+ else:
+ raise DeepLProviderError(
+ code=DEEPL_NETWORK_ERROR,
+ message="Service DeepL indisponible. Réessayez.",
+ details={"provider": "deepl", "original_error": str(e)[:100]},
+ )
+
+ def get_name(self) -> str:
+ """Return provider name."""
+ return self._provider_name
+
+ def is_available(self) -> bool:
+ """
+ Check if DeepL is available (API key configured and API reachable).
+
+ Performs a minimal translate call to verify the API is actually reachable.
+ Uses cached result if available and not expired (TTL 60s).
+ """
+ current_time = time.time()
+
+ with self._health_cache_lock:
+ if "is_available" in self._health_cache:
+ cached = self._health_cache["is_available"]
+ if current_time - cached["timestamp"] < self._health_cache_ttl:
+ return cached["value"]
+
+ available = False
+ try:
+ translator = self._get_translator("en", "fr")
+ with ThreadPoolExecutor(max_workers=1) as executor:
+ future = executor.submit(translator.translate, "a")
+ future.result(timeout=5)
+ available = True
+ except Exception as e:
+ _log_warning(
+ "deepl_availability_check_failed",
+ error=str(e)[:100],
+ )
+
+ with self._health_cache_lock:
+ self._health_cache["is_available"] = {
+ "value": available,
+ "timestamp": current_time,
+ }
+
+ return available
+
+ def translate_text(self, request: TranslationRequest) -> TranslationResponse:
+ """
+ Translate a single text string using DeepL.
+
+ API Usage Notes:
+ - DeepL Free tier: 500,000 characters/month
+ - DeepL Pro: ~€25 per million characters
+ - 128KB max per request
+
+ Optimization: Skips API call if source == target language.
+
+ Args:
+ request: TranslationRequest with text and language info
+
+ Returns:
+ TranslationResponse with translated text
+ """
+ text = request.text
+ target_language = request.target_language
+ source_language = request.source_language or "auto"
+
+ if not text or not text.strip():
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ )
+
+ norm_source = self._normalize_language_code(source_language)
+ norm_target = self._normalize_language_code(target_language)
+
+ if norm_source and norm_source == norm_target:
+ _log_info(
+ "deepl_translation_skip",
+ source_target_lang=target_language,
+ text_length=len(text),
+ )
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ source_language=source_language,
+ )
+
+ if self._use_cache and self._cache:
+ cached = self._cache.get(
+ text, target_language, source_language, self._provider_name
+ )
+ if cached is not None:
+ return TranslationResponse(
+ translated_text=cached,
+ provider_name=self._provider_name,
+ from_cache=True,
+ )
+
+ last_error: Optional[DeepLProviderError] = None
+ retries = 0
+
+ while retries <= self.max_retries:
+ try:
+ result = self._make_api_request(text, source_language, target_language)
+
+ if self._use_cache and self._cache:
+ self._cache.set(
+ text,
+ target_language,
+ source_language,
+ self._provider_name,
+ result,
+ )
+
+ _log_info(
+ "deepl_translation_success",
+ chars=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ api_type=self._api_type,
+ retries=retries,
+ )
+
+ return TranslationResponse(
+ translated_text=result,
+ provider_name=self._provider_name,
+ from_cache=False,
+ )
+
+ except DeepLProviderError as e:
+ last_error = e
+
+ if e.code not in _RETRYABLE_ERRORS:
+ break
+
+ retries += 1
+ if retries <= self.max_retries:
+ delay = self.retry_delay * (2 ** (retries - 1))
+ _log_info(
+ "deepl_translation_retry",
+ attempt=retries,
+ delay_s=round(delay, 2),
+ error_code=e.code,
+ text_length=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ )
+ time.sleep(delay)
+
+ except Exception as e:
+ last_error = DeepLProviderError(
+ code=DEEPL_NETWORK_ERROR,
+ message="Service DeepL indisponible. Réessayez.",
+ details={"original_error": str(e)[:100]},
+ )
+ retries += 1
+ if retries <= self.max_retries:
+ delay = self.retry_delay * (2 ** (retries - 1))
+ time.sleep(delay)
+
+ if last_error:
+ _log_error(
+ "deepl_translation_failed",
+ error_code=last_error.code,
+ text_length=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ retries=retries,
+ )
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ error=last_error.message,
+ error_code=last_error.code,
+ error_details=last_error.details,
+ )
+
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ error="Unknown error",
+ error_code=DEEPL_NETWORK_ERROR,
+ )
+
+ def translate_batch(
+ self, requests: List[TranslationRequest]
+ ) -> List[TranslationResponse]:
+ """
+ Translate multiple texts with optimized batch processing.
+
+ Args:
+ requests: List of TranslationRequest objects
+
+ Returns:
+ List of TranslationResponse objects
+ """
+ if not requests:
+ return []
+
+ return [self.translate_text(req) for req in requests]
+
+ def health_check(self) -> ProviderHealthStatus:
+ """
+ Return health status details for the provider.
+
+ Performs a lightweight check to verify the provider is operational.
+ Includes cached result for efficiency.
+
+ Returns:
+ ProviderHealthStatus with availability and latency information
+ """
+ current_time = time.time()
+
+ with self._health_cache_lock:
+ if "health_check" in self._health_cache:
+ cached = self._health_cache["health_check"]
+ if current_time - cached["timestamp"] < self._health_cache_ttl:
+ return cached["value"]
+
+ start_time = time.time()
+ last_check_iso = datetime.now(timezone.utc).isoformat()
+
+ try:
+ available = self.is_available()
+ latency_ms = (time.time() - start_time) * 1000
+
+ status = ProviderHealthStatus(
+ name=self._provider_name,
+ available=available,
+ latency_ms=round(latency_ms, 2),
+ error=None if available else "Provider not available",
+ last_check=last_check_iso,
+ )
+ except Exception as e:
+ latency_ms = (time.time() - start_time) * 1000
+ status = ProviderHealthStatus(
+ name=self._provider_name,
+ available=False,
+ latency_ms=round(latency_ms, 2),
+ error=str(e)[:100],
+ last_check=last_check_iso,
+ )
+
+ with self._health_cache_lock:
+ self._health_cache["health_check"] = {
+ "value": status,
+ "timestamp": current_time,
+ }
+
+ return status
+
+
+def register_deepl_provider():
+ """
+ Register the DeepL provider in the global registry.
+
+ This function should be called during module initialization
+ to make the provider available through the registry.
+ """
+ from .registry import registry
+
+ provider = get_deepl_provider()
+ if provider:
+ registry.register("deepl", provider)
+ return provider
+
+
+_provider_instance = None
+_provider_instance_lock = threading.Lock()
+
+
+def get_deepl_provider() -> Optional[DeepLTranslationProvider]:
+ """Get or create the DeepL provider instance (reads config from env). Thread-safe."""
+ global _provider_instance
+ if _provider_instance is None:
+ with _provider_instance_lock:
+ if _provider_instance is None:
+ from .config import ProvidersConfig
+
+ if not ProvidersConfig.DEEPL_API_KEY:
+ return None
+
+ _provider_instance = DeepLTranslationProvider(
+ api_key=ProvidersConfig.DEEPL_API_KEY,
+ use_cache=True,
+ timeout=getattr(ProvidersConfig, "DEEPL_TIMEOUT", 30),
+ max_retries=getattr(ProvidersConfig, "DEEPL_MAX_RETRIES", 3),
+ retry_delay=getattr(ProvidersConfig, "DEEPL_RETRY_DELAY", 1.0),
+ )
+ return _provider_instance
+
+
+class LegacyDeepLAdapter:
+ """
+ Exposes the new DeepLTranslationProvider via the legacy interface used by
+ translation_service: .translate(text, target_lang, source_lang) -> str and
+ .translate_batch(texts, target_lang, source_lang) -> List[str].
+ Raises TranslationProviderError on failure so the API can return 4xx/502.
+ """
+
+ def __init__(self):
+ self._provider = get_deepl_provider()
+ self.provider_name = "deepl"
+
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
+ if not self._provider:
+ from utils.exceptions import TranslationProviderError
+
+ raise TranslationProviderError(
+ "DEEPL_NOT_CONFIGURED",
+ "DeepL provider not configured. Set DEEPL_API_KEY.",
+ None,
+ )
+ req = TranslationRequest(
+ text=text,
+ target_language=target_language,
+ source_language=source_language,
+ )
+ resp = self._provider.translate_text(req)
+ if resp.error:
+ from utils.exceptions import TranslationProviderError
+
+ raise TranslationProviderError(
+ resp.error_code or "UNKNOWN",
+ resp.error or "Translation failed",
+ resp.error_details,
+ )
+ return resp.translated_text
+
+ def translate_batch(
+ self,
+ texts: List[str],
+ target_language: str,
+ source_language: str = "auto",
+ batch_size: int = 50,
+ ) -> List[str]:
+ if not self._provider:
+ from utils.exceptions import TranslationProviderError
+
+ raise TranslationProviderError(
+ "DEEPL_NOT_CONFIGURED",
+ "DeepL provider not configured. Set DEEPL_API_KEY.",
+ None,
+ )
+ requests = [
+ TranslationRequest(
+ text=t,
+ target_language=target_language,
+ source_language=source_language,
+ )
+ for t in texts
+ ]
+ responses = self._provider.translate_batch(requests)
+ result = []
+ for r in responses:
+ if r.error:
+ from utils.exceptions import TranslationProviderError
+
+ raise TranslationProviderError(
+ r.error_code or "UNKNOWN",
+ r.error or "Translation failed",
+ r.error_details,
+ )
+ result.append(r.translated_text)
+ return result
+
+
+def get_legacy_deepl_adapter() -> LegacyDeepLAdapter:
+ """Return an adapter so the legacy translation_service can use the new provider."""
+ return LegacyDeepLAdapter()
diff --git a/services/providers/fallback.py b/services/providers/fallback.py
new file mode 100644
index 0000000..ace2c14
--- /dev/null
+++ b/services/providers/fallback.py
@@ -0,0 +1,345 @@
+"""
+Fallback Translation Service - Provider fallback chain implementation.
+
+Provides automatic fallback between translation providers when one fails,
+ensuring translation remains available even if individual providers are down.
+
+Features:
+- Try providers in order until one succeeds
+- Return structured error when all providers fail
+- Log failed attempts and successful provider
+- Never expose HTTP 500 or document content
+"""
+
+from typing import List, Optional, Dict, Any
+import time
+
+try:
+ import structlog
+
+ logger = structlog.get_logger(__name__)
+ _HAS_STRUCTLOG = True
+except ImportError:
+ import logging
+
+ logger = logging.getLogger(__name__)
+ _HAS_STRUCTLOG = False
+
+
+def _log_info(event: str, **kwargs):
+ """Log info with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.info(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.info(msg)
+
+
+def _log_warning(event: str, **kwargs):
+ """Log warning with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.warning(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.warning(msg)
+
+
+def _log_error(event: str, **kwargs):
+ """Log error with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.error(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.error(msg)
+
+
+from .registry import registry
+from .schemas import TranslationRequest, TranslationResponse
+
+# Error code for when all providers fail
+ALL_PROVIDERS_FAILED = "ALL_PROVIDERS_FAILED"
+
+
+class AllProvidersFailedError(Exception):
+ """
+ Exception raised when all providers in the fallback chain fail.
+
+ This exception is used to signal that no provider could successfully
+ translate the text, and includes details about which providers were
+ tried and what errors occurred.
+ """
+
+ def __init__(
+ self,
+ message: str = "Tous les fournisseurs de traduction ont échoué.",
+ providers_tried: Optional[List[str]] = None,
+ errors: Optional[List[Dict[str, Any]]] = None,
+ ):
+ self.code = ALL_PROVIDERS_FAILED
+ self.message = message
+ self.providers_tried = providers_tried or []
+ self.errors = errors or []
+ super().__init__(message)
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Convert error to dictionary format for API responses."""
+ result = {
+ "error": self.code,
+ "message": self.message,
+ "details": {
+ "providers_tried": self.providers_tried,
+ "error_count": len(self.errors),
+ },
+ }
+ if self.errors:
+ # Include last error details (without sensitive info)
+ last_error = self.errors[-1]
+ result["details"]["last_error"] = {
+ "provider": last_error.get("provider"),
+ "error_code": last_error.get("error_code"),
+ "message": last_error.get("message", "")[:200], # Truncate
+ }
+ return result
+
+
+def translate_with_fallback(
+ request: TranslationRequest,
+ provider_names: List[str],
+ skip_unavailable: bool = True,
+) -> TranslationResponse:
+ """
+ Translate text using a fallback chain of providers.
+
+ Iterates through the list of provider names in order, attempting to
+ translate with each one. Returns the first successful translation.
+ If all providers fail, raises AllProvidersFailedError.
+
+ Args:
+ request: TranslationRequest with text and language info
+ provider_names: Ordered list of provider names to try
+ skip_unavailable: If True, skip providers that are not available
+ (health check fails). If False, try anyway.
+
+ Returns:
+ TranslationResponse with translated text and provider_name set
+ to the successful provider.
+
+ Raises:
+ AllProvidersFailedError: When all providers in the chain fail
+
+ Example:
+ >>> request = TranslationRequest(text="Hello", target_language="fr")
+ >>> response = translate_with_fallback(
+ ... request, ["google", "deepl", "openai"]
+ ... )
+ >>> print(response.translated_text) # "Bonjour"
+ >>> print(response.provider_name) # "deepl" (first that succeeded)
+ """
+ if not provider_names:
+ raise AllProvidersFailedError(
+ message="Aucun fournisseur configuré dans la chaîne de fallback.",
+ providers_tried=[],
+ )
+
+ providers_tried: List[str] = []
+ errors: List[Dict[str, Any]] = []
+
+ _log_info(
+ "fallback_translation_started",
+ providers=provider_names,
+ source_lang=request.source_language,
+ target_lang=request.target_language,
+ text_length=len(request.text),
+ )
+
+ for provider_name in provider_names:
+ # Get provider from registry
+ provider = registry.get(provider_name)
+
+ if provider is None:
+ _log_warning(
+ "fallback_provider_not_registered",
+ provider=provider_name,
+ )
+ errors.append(
+ {
+ "provider": provider_name,
+ "error_code": "PROVIDER_NOT_REGISTERED",
+ "message": f"Provider '{provider_name}' not registered",
+ }
+ )
+ providers_tried.append(provider_name)
+ continue
+
+ # Check availability if requested
+ if skip_unavailable and not provider.is_available():
+ _log_info(
+ "fallback_provider_unavailable",
+ provider=provider_name,
+ )
+ errors.append(
+ {
+ "provider": provider_name,
+ "error_code": "PROVIDER_UNAVAILABLE",
+ "message": f"Provider '{provider_name}' is not available",
+ }
+ )
+ providers_tried.append(provider_name)
+ continue
+
+ # Try to translate
+ start_time = time.time()
+ try:
+ response = provider.translate_text(request)
+ latency_ms = (time.time() - start_time) * 1000
+
+ # Check if translation succeeded
+ if response.error is None:
+ # Success!
+ _log_info(
+ "fallback_translation_success",
+ provider=provider_name,
+ latency_ms=round(latency_ms, 2),
+ attempts=len(providers_tried) + 1,
+ text_length=len(request.text),
+ source_lang=request.source_language,
+ target_lang=request.target_language,
+ )
+
+ # Ensure provider_name is set
+ if not response.provider_name:
+ response.provider_name = provider_name
+
+ return response
+ else:
+ # Provider returned an error
+ _log_warning(
+ "fallback_provider_error",
+ provider=provider_name,
+ error_code=response.error_code,
+ error_message=response.error[:200], # Truncate
+ )
+ errors.append(
+ {
+ "provider": provider_name,
+ "error_code": response.error_code,
+ "message": response.error,
+ }
+ )
+ providers_tried.append(provider_name)
+
+ except Exception as e:
+ # Provider raised an exception
+ latency_ms = (time.time() - start_time) * 1000
+ error_str = str(e)
+
+ _log_error(
+ "fallback_provider_exception",
+ provider=provider_name,
+ error_type=type(e).__name__,
+ latency_ms=round(latency_ms, 2),
+ )
+ errors.append(
+ {
+ "provider": provider_name,
+ "error_code": "PROVIDER_EXCEPTION",
+ "message": error_str[:200], # Truncate
+ }
+ )
+ providers_tried.append(provider_name)
+
+ # All providers failed
+ _log_error(
+ "fallback_all_providers_failed",
+ providers_tried=providers_tried,
+ error_count=len(errors),
+ text_length=len(request.text),
+ source_lang=request.source_language,
+ target_lang=request.target_language,
+ )
+
+ raise AllProvidersFailedError(
+ message="Tous les fournisseurs de traduction ont échoué. Veuillez réessayer plus tard.",
+ providers_tried=providers_tried,
+ errors=errors,
+ )
+
+
+def translate_with_fallback_by_mode(
+ request: TranslationRequest,
+ mode: str = "auto",
+) -> TranslationResponse:
+ """
+ Translate text using the fallback chain for a specific mode.
+
+ Args:
+ request: TranslationRequest with text and language info
+ mode: "classic" for Classic providers, "llm" for LLM providers,
+ "auto" for general fallback chain
+
+ Returns:
+ TranslationResponse with translated text
+
+ Raises:
+ AllProvidersFailedError: When all providers fail
+ """
+ from .config import ProvidersConfig
+
+ provider_names = ProvidersConfig.get_fallback_chain(mode)
+
+ if not provider_names:
+ raise AllProvidersFailedError(
+ message=f"Aucune chaîne de fallback configurée pour le mode '{mode}'.",
+ providers_tried=[],
+ )
+
+ return translate_with_fallback(request, provider_names)
+
+
+class LegacyFallbackAdapter:
+ """
+ Exposes the fallback chain via the legacy interface used by translation_service:
+ .translate(text, target_lang, source_lang) -> str and
+ .translate_batch(texts, target_lang, source_lang) -> List[str].
+ Raises AllProvidersFailedError when all providers fail (API returns 502).
+ """
+
+ def __init__(self, mode: str = "classic"):
+ """
+ Args:
+ mode: "classic" (Google → DeepL) or "llm" (Ollama → OpenAI)
+ """
+ self._mode = mode.lower()
+ self.provider_name = f"fallback_{self._mode}"
+ self._last_provider_used: Optional[str] = None
+
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
+ req = TranslationRequest(
+ text=text,
+ target_language=target_language,
+ source_language=source_language,
+ )
+ response = translate_with_fallback_by_mode(req, self._mode)
+ self._last_provider_used = response.provider_name or self._last_provider_used
+ return response.translated_text
+
+ def translate_batch(
+ self,
+ texts: List[str],
+ target_language: str,
+ source_language: str = "auto",
+ batch_size: int = 50,
+ ) -> List[str]:
+ results: List[str] = []
+ for t in texts:
+ req = TranslationRequest(
+ text=t,
+ target_language=target_language,
+ source_language=source_language,
+ )
+ response = translate_with_fallback_by_mode(req, self._mode)
+ self._last_provider_used = response.provider_name or self._last_provider_used
+ results.append(response.translated_text)
+ return results
diff --git a/services/providers/google_provider.py b/services/providers/google_provider.py
new file mode 100644
index 0000000..dd7ad16
--- /dev/null
+++ b/services/providers/google_provider.py
@@ -0,0 +1,534 @@
+"""
+Google Translate Provider - Production-ready implementation.
+
+Extends TranslationProvider base class with robust error handling,
+retry logic, and health monitoring.
+
+Features:
+- Specific error codes for all Google API errors
+- Retry logic with exponential backoff for transient errors
+- Timeout configuration
+- Health check with caching
+- Structlog-compatible logging (no document content in logs)
+"""
+
+import os
+import socket
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+try:
+ import structlog
+ logger = structlog.get_logger(__name__)
+except ImportError:
+ import logging
+ logger = logging.getLogger(__name__)
+
+from .base import TranslationProvider
+from .schemas import (
+ BatchTranslationRequest,
+ BatchTranslationResponse,
+ ProviderHealthStatus,
+ TranslationRequest,
+ TranslationResponse,
+)
+
+GOOGLE_QUOTA_EXCEEDED = "GOOGLE_QUOTA_EXCEEDED"
+GOOGLE_INVALID_KEY = "GOOGLE_INVALID_KEY"
+GOOGLE_NETWORK_ERROR = "GOOGLE_NETWORK_ERROR"
+GOOGLE_UNSUPPORTED_LANGUAGE = "GOOGLE_UNSUPPORTED_LANGUAGE"
+GOOGLE_TEXT_TOO_LONG = "GOOGLE_TEXT_TOO_LONG"
+
+_RETRYABLE_ERRORS = {GOOGLE_NETWORK_ERROR, GOOGLE_QUOTA_EXCEEDED}
+
+
+class GoogleProviderError(Exception):
+ """Exception raised for Google Translate API errors."""
+
+ def __init__(
+ self, code: str, message: str, details: Optional[Dict[str, Any]] = None
+ ):
+ self.code = code
+ self.message = message
+ self.details = details or {}
+ super().__init__(message)
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Convert error to dictionary format."""
+ result = {
+ "error": self.code,
+ "message": self.message,
+ }
+ if self.details:
+ result["details"] = self.details
+ return result
+
+
+class GoogleTranslationProvider(TranslationProvider):
+ """
+ Google Translate implementation using deep_translator library.
+
+ Features:
+ - Thread-safe translator instances per thread
+ - Caching support (uses global cache from translation_service)
+ - Batch translation with optimized processing
+ - Robust error handling with specific error codes
+ - Retry logic with exponential backoff
+ - Configurable timeout
+ - Health check with result caching
+ """
+
+ def __init__(
+ self,
+ use_cache: bool = True,
+ timeout: int = 30,
+ max_retries: int = 3,
+ retry_delay: float = 1.0,
+ ):
+ """
+ Initialize Google Translate provider.
+
+ Args:
+ use_cache: Whether to use translation caching (default: True)
+ timeout: Request timeout in seconds (default: 30)
+ max_retries: Maximum retry attempts for transient errors (default: 3)
+ retry_delay: Initial retry delay in seconds (default: 1.0)
+ """
+ self._local = threading.local()
+ self._use_cache = use_cache
+ self._provider_name = "google"
+ self._cache = None
+ self.timeout = timeout
+ self.max_retries = max_retries
+ self.retry_delay = retry_delay
+ self._health_cache: Dict[str, Any] = {}
+ self._health_cache_ttl = 60
+ self._health_cache_lock = threading.Lock()
+
+ if use_cache:
+ self._init_cache()
+
+ def _init_cache(self):
+ """Initialize or get the translation cache."""
+ from services.translation_service import _translation_cache
+
+ self._cache = _translation_cache
+
+ def _get_translator(self, source_language: str, target_language: str):
+ """Get or create a translator instance for the current thread."""
+ from deep_translator import GoogleTranslator
+
+ key = f"{source_language}_{target_language}"
+ if not hasattr(self._local, "translators"):
+ self._local.translators = {}
+ if key not in self._local.translators:
+ self._local.translators[key] = GoogleTranslator(
+ source=source_language, target=target_language
+ )
+ return self._local.translators[key]
+
+ def _make_api_request(
+ self, text: str, source_language: str, target_language: str
+ ) -> str:
+ """
+ Make API request with error mapping.
+
+ Raises:
+ GoogleProviderError: For any API errors with specific codes
+ """
+ if len(text) > 5000:
+ raise GoogleProviderError(
+ code=GOOGLE_TEXT_TOO_LONG,
+ message="Texte trop long (max 5000 caractères par requête).",
+ details={"text_length": len(text), "max_length": 5000},
+ )
+
+ try:
+ translator = self._get_translator(source_language, target_language)
+ # Apply timeout via executor (deep_translator has no timeout parameter)
+ with ThreadPoolExecutor(max_workers=1) as executor:
+ future = executor.submit(translator.translate, text)
+ return future.result(timeout=self.timeout)
+ except Exception as e:
+ error_str = str(e).lower()
+
+ if "quota" in error_str or "limit" in error_str or "429" in error_str:
+ raise GoogleProviderError(
+ code=GOOGLE_QUOTA_EXCEEDED,
+ message="Quota Google Translate dépassé. Réessayez demain.",
+ details={"provider": "google"},
+ )
+ elif "api" in error_str and (
+ "key" in error_str
+ or "invalid" in error_str
+ or "401" in error_str
+ or "403" in error_str
+ ):
+ raise GoogleProviderError(
+ code=GOOGLE_INVALID_KEY,
+ message="Clé API Google invalide. Contactez l'administrateur.",
+ details={"provider": "google"},
+ )
+ elif "language" in error_str or "not supported" in error_str:
+ raise GoogleProviderError(
+ code=GOOGLE_UNSUPPORTED_LANGUAGE,
+ message=f"Langue '{target_language}' non supportée par Google.",
+ details={"unsupported_language": target_language},
+ )
+ elif (
+ isinstance(e, (socket.timeout, TimeoutError, FuturesTimeoutError))
+ or "timeout" in error_str
+ ):
+ raise GoogleProviderError(
+ code=GOOGLE_NETWORK_ERROR,
+ message="Service Google Translate indisponible. Réessayez.",
+ details={"provider": "google", "error_type": "timeout"},
+ )
+ else:
+ raise GoogleProviderError(
+ code=GOOGLE_NETWORK_ERROR,
+ message="Service Google Translate indisponible. Réessayez.",
+ details={"provider": "google", "original_error": str(e)[:100]},
+ )
+
+ def get_name(self) -> str:
+ """Return provider name."""
+ return self._provider_name
+
+ def is_available(self) -> bool:
+ """
+ Check if Google Translate is available.
+
+ Uses cached result if available and not expired.
+ """
+ current_time = time.time()
+
+ with self._health_cache_lock:
+ if "is_available" in self._health_cache:
+ cached = self._health_cache["is_available"]
+ if current_time - cached["timestamp"] < self._health_cache_ttl:
+ return cached["value"]
+
+ try:
+ translator = self._get_translator("auto", "en")
+ available = translator is not None
+ except Exception as e:
+ logger.warning(
+ "google_availability_check_failed",
+ error=str(e)[:100],
+ )
+ available = False
+
+ with self._health_cache_lock:
+ self._health_cache["is_available"] = {
+ "value": available,
+ "timestamp": current_time,
+ }
+
+ return available
+
+ def translate_text(self, request: TranslationRequest) -> TranslationResponse:
+ """
+ Translate a single text string using Google Translate.
+
+ API Usage Notes:
+ - Google Translate free tier: 500,000 characters/month
+ - 5,000 characters max per request
+ - Cost: ~$20 per million characters (paid tier)
+
+ Optimization: Skips API call if source == target language.
+
+ Args:
+ request: TranslationRequest with text and language info
+
+ Returns:
+ TranslationResponse with translated text
+ """
+ text = request.text
+ target_language = request.target_language
+ source_language = request.source_language or "auto"
+
+ if not text or not text.strip():
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ )
+
+ # Optimization: Skip if source and target are the same
+ if source_language != "auto" and source_language == target_language:
+ logger.info(
+ "google_translation_skip",
+ source_target_lang=target_language,
+ text_length=len(text),
+ )
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ source_language=source_language,
+ )
+
+ if self._use_cache and self._cache:
+ cached = self._cache.get(
+ text, target_language, source_language, self._provider_name
+ )
+ if cached is not None:
+ return TranslationResponse(
+ translated_text=cached,
+ provider_name=self._provider_name,
+ from_cache=True,
+ )
+
+ last_error: Optional[GoogleProviderError] = None
+ retries = 0
+
+ while retries <= self.max_retries:
+ try:
+ result = self._make_api_request(text, source_language, target_language)
+
+ if self._use_cache and self._cache:
+ self._cache.set(
+ text,
+ target_language,
+ source_language,
+ self._provider_name,
+ result,
+ )
+
+ # Log usage metrics (character count, API call)
+ logger.info(
+ "google_translation_success",
+ chars=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ retries=retries,
+ )
+
+ return TranslationResponse(
+ translated_text=result,
+ provider_name=self._provider_name,
+ from_cache=False,
+ )
+
+ except GoogleProviderError as e:
+ last_error = e
+
+ if e.code not in _RETRYABLE_ERRORS:
+ break
+
+ retries += 1
+ if retries <= self.max_retries:
+ delay = self.retry_delay * (2 ** (retries - 1))
+ logger.info(
+ "google_translation_retry",
+ attempt=retries,
+ delay_s=round(delay, 2),
+ error_code=e.code,
+ text_length=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ )
+ time.sleep(delay)
+
+ except Exception as e:
+ last_error = GoogleProviderError(
+ code=GOOGLE_NETWORK_ERROR,
+ message="Service Google Translate indisponible. Réessayez.",
+ details={"original_error": str(e)[:100]},
+ )
+ retries += 1
+ if retries <= self.max_retries:
+ delay = self.retry_delay * (2 ** (retries - 1))
+ time.sleep(delay)
+
+ if last_error:
+ logger.error(
+ "google_translation_failed",
+ error_code=last_error.code,
+ text_length=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ retries=retries,
+ )
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ error=last_error.message,
+ error_code=last_error.code,
+ error_details=last_error.details,
+ )
+
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ error="Unknown error",
+ error_code=GOOGLE_NETWORK_ERROR,
+ )
+
+ def translate_batch(
+ self, requests: List[TranslationRequest]
+ ) -> List[TranslationResponse]:
+ """
+ Translate multiple texts with optimized batch processing.
+
+ Args:
+ requests: List of TranslationRequest objects
+
+ Returns:
+ List of TranslationResponse objects
+ """
+ if not requests:
+ return []
+
+ return [self.translate_text(req) for req in requests]
+
+ def health_check(self) -> ProviderHealthStatus:
+ """
+ Return health status details for the provider.
+
+ Performs a lightweight check to verify the provider is operational.
+ Includes cached result for efficiency.
+
+ Returns:
+ ProviderHealthStatus with availability and latency information
+ """
+ current_time = time.time()
+
+ with self._health_cache_lock:
+ if "health_check" in self._health_cache:
+ cached = self._health_cache["health_check"]
+ if current_time - cached["timestamp"] < self._health_cache_ttl:
+ return cached["value"]
+
+ start_time = time.time()
+ last_check_iso = datetime.now(timezone.utc).isoformat()
+
+ try:
+ available = self.is_available()
+ latency_ms = (time.time() - start_time) * 1000
+
+ status = ProviderHealthStatus(
+ name=self._provider_name,
+ available=available,
+ latency_ms=round(latency_ms, 2),
+ error=None if available else "Provider not available",
+ last_check=last_check_iso,
+ )
+ except Exception as e:
+ latency_ms = (time.time() - start_time) * 1000
+ status = ProviderHealthStatus(
+ name=self._provider_name,
+ available=False,
+ latency_ms=round(latency_ms, 2),
+ error=str(e)[:100],
+ last_check=last_check_iso,
+ )
+
+ with self._health_cache_lock:
+ self._health_cache["health_check"] = {
+ "value": status,
+ "timestamp": current_time,
+ }
+
+ return status
+
+
+def register_google_provider():
+ """
+ Register the Google provider in the global registry.
+
+ This function should be called during module initialization
+ to make the provider available through the registry.
+ """
+ from .registry import registry
+
+ provider = get_google_provider()
+ registry.register("google", provider)
+ return provider
+
+
+_provider_instance = None
+
+
+def get_google_provider() -> GoogleTranslationProvider:
+ """Get or create the Google provider instance (reads config from env)."""
+ global _provider_instance
+ if _provider_instance is None:
+ from .config import ProvidersConfig
+ _provider_instance = GoogleTranslationProvider(
+ use_cache=True,
+ timeout=ProvidersConfig.GOOGLE_TRANSLATE_TIMEOUT,
+ max_retries=ProvidersConfig.GOOGLE_TRANSLATE_MAX_RETRIES,
+ retry_delay=ProvidersConfig.GOOGLE_TRANSLATE_RETRY_DELAY,
+ )
+ return _provider_instance
+
+
+class LegacyGoogleAdapter:
+ """
+ Exposes the new GoogleTranslationProvider via the legacy interface used by
+ translation_service: .translate(text, target_lang, source_lang) -> str and
+ .translate_batch(texts, target_lang, source_lang) -> List[str].
+ Raises TranslationProviderError on failure so the API can return 4xx/502.
+ """
+
+ def __init__(self):
+ self._provider = get_google_provider()
+ self.provider_name = "google"
+
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
+ req = TranslationRequest(
+ text=text,
+ target_language=target_language,
+ source_language=source_language,
+ )
+ resp = self._provider.translate_text(req)
+ if resp.error:
+ from utils.exceptions import TranslationProviderError
+ raise TranslationProviderError(
+ resp.error_code or "UNKNOWN",
+ resp.error or "Translation failed",
+ resp.error_details,
+ )
+ return resp.translated_text
+
+ def translate_batch(
+ self,
+ texts: List[str],
+ target_language: str,
+ source_language: str = "auto",
+ batch_size: int = 50,
+ ) -> List[str]:
+ requests = [
+ TranslationRequest(
+ text=t,
+ target_language=target_language,
+ source_language=source_language,
+ )
+ for t in texts
+ ]
+ responses = self._provider.translate_batch(requests)
+ result = []
+ for r in responses:
+ if r.error:
+ from utils.exceptions import TranslationProviderError
+ raise TranslationProviderError(
+ r.error_code or "UNKNOWN",
+ r.error or "Translation failed",
+ r.error_details,
+ )
+ result.append(r.translated_text)
+ return result
+
+
+def get_legacy_google_adapter() -> LegacyGoogleAdapter:
+ """Return an adapter so the legacy translation_service can use the new provider."""
+ return LegacyGoogleAdapter()
diff --git a/services/providers/ollama_provider.py b/services/providers/ollama_provider.py
new file mode 100644
index 0000000..c16f769
--- /dev/null
+++ b/services/providers/ollama_provider.py
@@ -0,0 +1,605 @@
+"""
+Ollama Provider - Local LLM translation provider.
+
+Extends TranslationProvider base class with robust error handling,
+retry logic, and health monitoring for local Ollama instances.
+
+Features:
+- Local LLM translation via Ollama REST API
+- Custom system prompt support
+- Specific error codes for all Ollama API errors
+- Retry logic with exponential backoff for transient errors
+- Timeout configuration (longer for LLM)
+- Health check with caching
+- Structlog-compatible logging (no document content in logs)
+"""
+
+import socket
+import threading
+import time
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+from urllib.parse import urljoin
+
+try:
+ import structlog
+
+ logger = structlog.get_logger(__name__)
+ _HAS_STRUCTLOG = True
+except ImportError:
+ import logging
+
+ logger = logging.getLogger(__name__)
+ _HAS_STRUCTLOG = False
+
+
+def _log_info(event: str, **kwargs):
+ """Log info with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.info(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.info(msg)
+
+
+def _log_warning(event: str, **kwargs):
+ """Log warning with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.warning(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.warning(msg)
+
+
+def _log_error(event: str, **kwargs):
+ """Log error with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.error(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.error(msg)
+
+
+import requests
+from requests.exceptions import Timeout, ConnectionError as RequestsConnectionError
+
+from .base import TranslationProvider
+from .schemas import (
+ ProviderHealthStatus,
+ TranslationRequest,
+ TranslationResponse,
+)
+
+OLLAMA_UNAVAILABLE = "OLLAMA_UNAVAILABLE"
+OLLAMA_MODEL_NOT_FOUND = "OLLAMA_MODEL_NOT_FOUND"
+OLLAMA_TIMEOUT = "OLLAMA_TIMEOUT"
+OLLAMA_GENERATION_ERROR = "OLLAMA_GENERATION_ERROR"
+OLLAMA_CONTEXT_TOO_LONG = "OLLAMA_CONTEXT_TOO_LONG"
+
+_RETRYABLE_ERRORS = {OLLAMA_UNAVAILABLE, OLLAMA_TIMEOUT}
+
+
+class OllamaProviderError(Exception):
+ """Exception raised for Ollama API errors."""
+
+ def __init__(
+ self, code: str, message: str, details: Optional[Dict[str, Any]] = None
+ ):
+ self.code = code
+ self.message = message
+ self.details = details or {}
+ super().__init__(message)
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Convert error to dictionary format."""
+ result = {
+ "error": self.code,
+ "message": self.message,
+ }
+ if self.details:
+ result["details"] = self.details
+ return result
+
+
+DEFAULT_TRANSLATION_PROMPT = """You are a professional translator. Translate the following text from {source_lang} to {target_lang}.
+
+Rules:
+- Translate ONLY the text, do not add explanations or notes
+- Preserve the original formatting, line breaks, and structure
+- Maintain the original tone and style
+- For technical terms, use the standard translation in the target language
+- If the text contains proper nouns or brand names, keep them unchanged unless there's a well-known translation"""
+
+
+def _build_system_prompt(
+ source_lang: str, target_lang: str, custom_prompt: Optional[str] = None
+) -> str:
+ """Build system prompt for translation."""
+ if custom_prompt:
+ return custom_prompt
+ return DEFAULT_TRANSLATION_PROMPT.format(
+ source_lang=source_lang, target_lang=target_lang
+ )
+
+
+def _get_language_name(code: str) -> str:
+ """Convert language code to full name for better LLM understanding."""
+ language_names = {
+ "en": "English",
+ "fr": "French",
+ "es": "Spanish",
+ "de": "German",
+ "it": "Italian",
+ "pt": "Portuguese",
+ "nl": "Dutch",
+ "ru": "Russian",
+ "zh": "Chinese",
+ "ja": "Japanese",
+ "ko": "Korean",
+ "ar": "Arabic",
+ "hi": "Hindi",
+ "tr": "Turkish",
+ "pl": "Polish",
+ "vi": "Vietnamese",
+ "th": "Thai",
+ "id": "Indonesian",
+ "ms": "Malay",
+ "uk": "Ukrainian",
+ "cs": "Czech",
+ "sv": "Swedish",
+ "da": "Danish",
+ "fi": "Finnish",
+ "no": "Norwegian",
+ "el": "Greek",
+ "he": "Hebrew",
+ "ro": "Romanian",
+ "hu": "Hungarian",
+ "bg": "Bulgarian",
+ "sk": "Slovak",
+ "hr": "Croatian",
+ "sl": "Slovenian",
+ "lt": "Lithuanian",
+ "lv": "Latvian",
+ "et": "Estonian",
+ }
+ base_code = code.split("-")[0].lower()
+ return language_names.get(base_code, code)
+
+
+class OllamaTranslationProvider(TranslationProvider):
+ """
+ Ollama LLM implementation for local translation.
+
+ Features:
+ - Uses Ollama REST API (/api/chat endpoint)
+ - Custom system prompt support for translation context
+ - Thread-safe HTTP client
+ - Robust error handling with specific error codes
+ - Retry logic with exponential backoff
+ - Configurable timeout (default 120s for LLM)
+ - Health check with result caching
+ """
+
+ def __init__(
+ self,
+ base_url: str = "http://localhost:11434",
+ model: Optional[str] = None,
+ timeout: int = 120,
+ max_retries: int = 2,
+ retry_delay: float = 2.0,
+ ):
+ """
+ Initialize Ollama provider.
+
+ Args:
+ base_url: Ollama API base URL (default: http://localhost:11434)
+ model: Model name (e.g. llama3, mistral). If None, uses OLLAMA_MODEL from config.
+ timeout: Request timeout in seconds (default: 120 for LLM)
+ max_retries: Maximum retry attempts for transient errors (default: 2)
+ retry_delay: Initial retry delay in seconds (default: 2.0)
+ """
+ if model is None:
+ from .config import ProvidersConfig
+
+ model = ProvidersConfig.OLLAMA_MODEL
+ self._base_url = base_url.rstrip("/")
+ self._model = model
+ self._provider_name = "ollama"
+ self.timeout = timeout
+ self.max_retries = max_retries
+ self.retry_delay = retry_delay
+ self._health_cache: Dict[str, Any] = {}
+ self._health_cache_ttl = 60
+ self._health_cache_lock = threading.Lock()
+ self._available_models: Optional[List[str]] = None
+ self._models_cache_time: float = 0
+ self._models_cache_ttl = 300
+
+ def _fetch_available_models(self) -> List[str]:
+ """Fetch list of available (pulled) models from Ollama."""
+ current_time = time.time()
+
+ if (
+ self._available_models is not None
+ and current_time - self._models_cache_time < self._models_cache_ttl
+ ):
+ return self._available_models
+
+ try:
+ response = requests.get(f"{self._base_url}/api/tags", timeout=10)
+ if response.status_code == 200:
+ data = response.json()
+ models = [m.get("name", "") for m in data.get("models", [])]
+ self._available_models = models
+ self._models_cache_time = current_time
+ return models
+ except Exception as e:
+ _log_warning("ollama_models_fetch_failed", error=str(e)[:100])
+
+ return []
+
+ def _check_model_available(self, model: str) -> bool:
+ """Check if a specific model is available (pulled)."""
+ models = self._fetch_available_models()
+ return any(m.startswith(model) or model in m for m in models)
+
+ def _make_api_request(self, text: str, system_prompt: str) -> str:
+ """
+ Make API request to Ollama.
+
+ Raises:
+ OllamaProviderError: For any API errors with specific codes
+ """
+ if not text or not text.strip():
+ return text
+
+ if len(text) > 128000:
+ raise OllamaProviderError(
+ code=OLLAMA_CONTEXT_TOO_LONG,
+ message="Texte trop long pour le modèle (max ~128K caractères).",
+ details={"text_length": len(text), "max_chars": 128000},
+ )
+
+ if not self._check_model_available(self._model):
+ raise OllamaProviderError(
+ code=OLLAMA_MODEL_NOT_FOUND,
+ message=f"Modèle '{self._model}' non trouvé. Exécutez: ollama pull {self._model}",
+ details={"model": self._model, "provider": "ollama"},
+ )
+
+ payload = {
+ "model": self._model,
+ "messages": [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": text},
+ ],
+ "stream": False,
+ "options": {"temperature": 0.3},
+ }
+
+ try:
+ response = requests.post(
+ f"{self._base_url}/api/chat",
+ json=payload,
+ timeout=self.timeout,
+ )
+
+ if response.status_code == 404:
+ raise OllamaProviderError(
+ code=OLLAMA_MODEL_NOT_FOUND,
+ message=f"Modèle '{self._model}' non trouvé. Exécutez: ollama pull {self._model}",
+ details={"model": self._model, "status_code": 404},
+ )
+
+ if response.status_code != 200:
+ error_text = response.text[:200] if response.text else "Unknown error"
+ raise OllamaProviderError(
+ code=OLLAMA_GENERATION_ERROR,
+ message=f"Erreur de génération Ollama: {error_text}",
+ details={"status_code": response.status_code, "model": self._model},
+ )
+
+ data = response.json()
+ message = data.get("message", {})
+ content = message.get("content", "")
+
+ if not content:
+ raise OllamaProviderError(
+ code=OLLAMA_GENERATION_ERROR,
+ message="Erreur de génération Ollama: réponse vide",
+ details={"model": self._model, "response": str(data)[:200]},
+ )
+
+ return content.strip()
+
+ except Timeout:
+ raise OllamaProviderError(
+ code=OLLAMA_TIMEOUT,
+ message="Délai d'attente Ollama dépassé. Réessayez avec un texte plus court.",
+ details={"provider": "ollama", "timeout_seconds": self.timeout},
+ )
+ except RequestsConnectionError:
+ raise OllamaProviderError(
+ code=OLLAMA_UNAVAILABLE,
+ message="Service Ollama indisponible. Vérifiez que Ollama est en cours d'exécution.",
+ details={"provider": "ollama", "base_url": self._base_url},
+ )
+ except OllamaProviderError:
+ raise
+ except Exception as e:
+ error_str = str(e).lower()
+ if "connection" in error_str or "refused" in error_str:
+ raise OllamaProviderError(
+ code=OLLAMA_UNAVAILABLE,
+ message="Service Ollama indisponible. Vérifiez que Ollama est en cours d'exécution.",
+ details={"provider": "ollama", "base_url": self._base_url},
+ )
+ raise OllamaProviderError(
+ code=OLLAMA_GENERATION_ERROR,
+ message=f"Erreur de génération Ollama: {str(e)[:100]}",
+ details={"provider": "ollama", "original_error": str(e)[:100]},
+ )
+
+ def get_name(self) -> str:
+ """Return provider name."""
+ return self._provider_name
+
+ def is_available(self) -> bool:
+ """
+ Check if Ollama is available.
+
+ Uses cached result if available and not expired.
+ """
+ current_time = time.time()
+
+ with self._health_cache_lock:
+ if "is_available" in self._health_cache:
+ cached = self._health_cache["is_available"]
+ if current_time - cached["timestamp"] < self._health_cache_ttl:
+ return cached["value"]
+
+ try:
+ response = requests.get(f"{self._base_url}/api/tags", timeout=5)
+ available = response.status_code == 200
+ except Exception as e:
+ _log_warning("ollama_availability_check_failed", error=str(e)[:100])
+ available = False
+
+ with self._health_cache_lock:
+ self._health_cache["is_available"] = {
+ "value": available,
+ "timestamp": current_time,
+ }
+
+ return available
+
+ def translate_text(self, request: TranslationRequest) -> TranslationResponse:
+ """
+ Translate a single text string using Ollama LLM.
+
+ Supports custom system prompt via request.metadata["custom_prompt"].
+
+ Args:
+ request: TranslationRequest with text and language info
+
+ Returns:
+ TranslationResponse with translated text
+ """
+ text = request.text
+ target_language = request.target_language
+ source_language = request.source_language or "auto"
+
+ if not text or not text.strip():
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ )
+
+ source_lang_name = _get_language_name(source_language)
+ target_lang_name = _get_language_name(target_language)
+
+ custom_prompt = None
+ if request.metadata:
+ custom_prompt = request.metadata.get("custom_prompt")
+
+ system_prompt = _build_system_prompt(
+ source_lang_name, target_lang_name, custom_prompt
+ )
+
+ last_error: Optional[OllamaProviderError] = None
+ retries = 0
+
+ while retries <= self.max_retries:
+ try:
+ start_time = time.time()
+ result = self._make_api_request(text, system_prompt)
+ latency = time.time() - start_time
+
+ _log_info(
+ "ollama_translation_success",
+ chars=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ model=self._model,
+ latency_ms=round(latency * 1000, 2),
+ retries=retries,
+ )
+
+ return TranslationResponse(
+ translated_text=result,
+ provider_name=self._provider_name,
+ from_cache=False,
+ source_language=source_language,
+ )
+
+ except OllamaProviderError as e:
+ last_error = e
+
+ if e.code not in _RETRYABLE_ERRORS:
+ break
+
+ retries += 1
+ if retries <= self.max_retries:
+ delay = self.retry_delay * (2 ** (retries - 1))
+ _log_info(
+ "ollama_translation_retry",
+ attempt=retries,
+ delay_s=round(delay, 2),
+ error_code=e.code,
+ text_length=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ )
+ time.sleep(delay)
+
+ except Exception as e:
+ last_error = OllamaProviderError(
+ code=OLLAMA_GENERATION_ERROR,
+ message=f"Erreur de génération Ollama: {str(e)[:100]}",
+ details={"original_error": str(e)[:100]},
+ )
+ retries += 1
+ if retries <= self.max_retries:
+ delay = self.retry_delay * (2 ** (retries - 1))
+ time.sleep(delay)
+
+ if last_error:
+ _log_error(
+ "ollama_translation_failed",
+ error_code=last_error.code,
+ text_length=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ retries=retries,
+ )
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ error=last_error.message,
+ error_code=last_error.code,
+ error_details=last_error.details,
+ )
+
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ error="Unknown error",
+ error_code=OLLAMA_GENERATION_ERROR,
+ )
+
+ def translate_batch(
+ self, requests: List[TranslationRequest]
+ ) -> List[TranslationResponse]:
+ """
+ Translate multiple texts.
+
+ Args:
+ requests: List of TranslationRequest objects
+
+ Returns:
+ List of TranslationResponse objects
+ """
+ if not requests:
+ return []
+
+ return [self.translate_text(req) for req in requests]
+
+ def health_check(self) -> ProviderHealthStatus:
+ """
+ Return health status details for the provider.
+
+ Includes cached result for efficiency.
+
+ Returns:
+ ProviderHealthStatus with availability and latency information
+ """
+ current_time = time.time()
+
+ with self._health_cache_lock:
+ if "health_check" in self._health_cache:
+ cached = self._health_cache["health_check"]
+ if current_time - cached["timestamp"] < self._health_cache_ttl:
+ return cached["value"]
+
+ start_time = time.time()
+ last_check_iso = datetime.now(timezone.utc).isoformat()
+
+ try:
+ models = self._fetch_available_models()
+ model_available = self._check_model_available(self._model)
+ available = len(models) > 0 and model_available
+ latency_ms = (time.time() - start_time) * 1000
+
+ error_msg = None
+ if not available and len(models) == 0:
+ error_msg = "Service Ollama indisponible. Vérifiez que Ollama est en cours d'exécution."
+ elif not model_available:
+ error_msg = f"Modèle '{self._model}' non trouvé. Exécutez: ollama pull {self._model}"
+
+ status = ProviderHealthStatus(
+ name=self._provider_name,
+ available=available,
+ latency_ms=round(latency_ms, 2),
+ error=error_msg,
+ last_check=last_check_iso,
+ model=self._model,
+ model_available=model_available,
+ )
+ except Exception as e:
+ latency_ms = (time.time() - start_time) * 1000
+ status = ProviderHealthStatus(
+ name=self._provider_name,
+ available=False,
+ latency_ms=round(latency_ms, 2),
+ error=str(e)[:100],
+ last_check=last_check_iso,
+ model=self._model,
+ model_available=None,
+ )
+
+ with self._health_cache_lock:
+ self._health_cache["health_check"] = {
+ "value": status,
+ "timestamp": current_time,
+ }
+
+ return status
+
+
+def register_ollama_provider():
+ """
+ Register the Ollama provider in the global registry.
+
+ This function should be called during module initialization
+ to make the provider available through the registry.
+ """
+ from .registry import registry
+
+ provider = get_ollama_provider()
+ registry.register("ollama", provider)
+ return provider
+
+
+_provider_instance: Optional[OllamaTranslationProvider] = None
+_provider_lock = threading.Lock()
+
+
+def get_ollama_provider() -> OllamaTranslationProvider:
+ """Get or create the Ollama provider instance (reads config from env)."""
+ global _provider_instance
+ if _provider_instance is None:
+ with _provider_lock:
+ if _provider_instance is None:
+ from .config import ProvidersConfig
+
+ _provider_instance = OllamaTranslationProvider(
+ base_url=ProvidersConfig.OLLAMA_BASE_URL,
+ model=ProvidersConfig.OLLAMA_MODEL,
+ timeout=ProvidersConfig.OLLAMA_TIMEOUT,
+ max_retries=ProvidersConfig.OLLAMA_MAX_RETRIES,
+ retry_delay=ProvidersConfig.OLLAMA_RETRY_DELAY,
+ )
+ return _provider_instance
diff --git a/services/providers/openai_provider.py b/services/providers/openai_provider.py
new file mode 100644
index 0000000..808ff0a
--- /dev/null
+++ b/services/providers/openai_provider.py
@@ -0,0 +1,670 @@
+"""
+OpenAI Provider - Cloud LLM translation provider.
+
+Extends TranslationProvider base class with robust error handling,
+retry logic, and health monitoring for OpenAI API.
+
+Features:
+- Cloud LLM translation via OpenAI Chat Completions API
+- Custom system prompt support
+- Specific error codes for all OpenAI API errors
+- Retry logic with exponential backoff for transient errors
+- Timeout configuration (faster than local Ollama)
+- Health check with caching
+- Structlog-compatible logging (no document content in logs)
+"""
+
+import threading
+import time
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
+
+try:
+ import structlog
+
+ logger = structlog.get_logger(__name__)
+ _HAS_STRUCTLOG = True
+except ImportError:
+ import logging
+
+ logger = logging.getLogger(__name__)
+ _HAS_STRUCTLOG = False
+
+
+def _log_info(event: str, **kwargs):
+ """Log info with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.info(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.info(msg)
+
+
+def _log_warning(event: str, **kwargs):
+ """Log warning with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.warning(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.warning(msg)
+
+
+def _log_error(event: str, **kwargs):
+ """Log error with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.error(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.error(msg)
+
+
+import requests
+from requests.exceptions import Timeout, ConnectionError as RequestsConnectionError
+
+from .base import TranslationProvider
+from .schemas import (
+ ProviderHealthStatus,
+ TranslationRequest,
+ TranslationResponse,
+)
+
+# Error codes
+OPENAI_RATE_LIMITED = "OPENAI_RATE_LIMITED"
+OPENAI_INVALID_KEY = "OPENAI_INVALID_KEY"
+OPENAI_QUOTA_EXCEEDED = "OPENAI_QUOTA_EXCEEDED"
+OPENAI_TIMEOUT = "OPENAI_TIMEOUT"
+OPENAI_SERVICE_ERROR = "OPENAI_SERVICE_ERROR"
+OPENAI_CONTEXT_TOO_LONG = "OPENAI_CONTEXT_TOO_LONG"
+
+_RETRYABLE_ERRORS = {OPENAI_RATE_LIMITED, OPENAI_TIMEOUT, OPENAI_SERVICE_ERROR}
+
+
+class OpenAIProviderError(Exception):
+ """Exception raised for OpenAI API errors."""
+
+ def __init__(
+ self, code: str, message: str, details: Optional[Dict[str, Any]] = None
+ ):
+ self.code = code
+ self.message = message
+ self.details = details or {}
+ super().__init__(message)
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Convert error to dictionary format."""
+ result = {
+ "error": self.code,
+ "message": self.message,
+ }
+ if self.details:
+ result["details"] = self.details
+ return result
+
+
+DEFAULT_TRANSLATION_PROMPT = """You are a professional translator. Translate the following text from {source_lang} to {target_lang}.
+
+Rules:
+- Translate ONLY the text, do not add explanations or notes
+- Preserve the original formatting, line breaks, and structure
+- Maintain the original tone and style
+- For technical terms, use the standard translation in the target language
+- If the text contains proper nouns or brand names, keep them unchanged unless there's a well-known translation"""
+
+
+def _build_system_prompt(
+ source_lang: str, target_lang: str, custom_prompt: Optional[str] = None
+) -> str:
+ """Build system prompt for translation."""
+ if custom_prompt:
+ return custom_prompt
+ return DEFAULT_TRANSLATION_PROMPT.format(
+ source_lang=source_lang, target_lang=target_lang
+ )
+
+
+def _get_language_name(code: str) -> str:
+ """Convert language code to full name for better LLM understanding."""
+ language_names = {
+ "en": "English",
+ "fr": "French",
+ "es": "Spanish",
+ "de": "German",
+ "it": "Italian",
+ "pt": "Portuguese",
+ "nl": "Dutch",
+ "ru": "Russian",
+ "zh": "Chinese",
+ "ja": "Japanese",
+ "ko": "Korean",
+ "ar": "Arabic",
+ "hi": "Hindi",
+ "tr": "Turkish",
+ "pl": "Polish",
+ "vi": "Vietnamese",
+ "th": "Thai",
+ "id": "Indonesian",
+ "ms": "Malay",
+ "uk": "Ukrainian",
+ "cs": "Czech",
+ "sv": "Swedish",
+ "da": "Danish",
+ "fi": "Finnish",
+ "no": "Norwegian",
+ "el": "Greek",
+ "he": "Hebrew",
+ "ro": "Romanian",
+ "hu": "Hungarian",
+ "bg": "Bulgarian",
+ "sk": "Slovak",
+ "hr": "Croatian",
+ "sl": "Slovenian",
+ "lt": "Lithuanian",
+ "lv": "Latvian",
+ "et": "Estonian",
+ }
+ base_code = code.split("-")[0].lower()
+ return language_names.get(base_code, code)
+
+
+class OpenAITranslationProvider(TranslationProvider):
+ """
+ OpenAI LLM implementation for cloud translation.
+
+ Features:
+ - Uses OpenAI Chat Completions API
+ - Custom system prompt support for translation context
+ - Thread-safe HTTP client
+ - Robust error handling with specific error codes
+ - Retry logic with exponential backoff
+ - Configurable timeout (default 60s for cloud API)
+ - Health check with result caching
+ """
+
+ def __init__(
+ self,
+ api_key: str,
+ model: str = "gpt-4o-mini",
+ timeout: int = 60,
+ max_retries: int = 3,
+ retry_delay: float = 1.0,
+ base_url: str = "https://api.openai.com/v1",
+ health_check_timeout: int = 5,
+ ):
+ """
+ Initialize OpenAI provider.
+
+ Args:
+ api_key: OpenAI API key
+ model: Model name to use (default: gpt-4o-mini)
+ timeout: Request timeout in seconds (default: 60)
+ max_retries: Maximum retry attempts for transient errors (default: 3)
+ retry_delay: Initial retry delay in seconds (default: 1.0)
+ base_url: OpenAI API base URL (default: https://api.openai.com/v1)
+ health_check_timeout: Timeout for health check requests in seconds (default: 5)
+ """
+ if not api_key or not api_key.strip():
+ raise ValueError("OpenAI API key cannot be empty")
+
+ self._api_key = api_key
+ self._model = model
+ self._base_url = base_url.rstrip("/")
+ self._provider_name = "openai"
+ self._timeout = timeout
+ self._max_retries = max_retries
+ self._retry_delay = retry_delay
+ self._health_check_timeout = health_check_timeout
+ self._health_cache: Dict[str, Any] = {}
+ self._health_cache_ttl = 60
+ self._health_cache_lock = threading.Lock()
+
+ def _make_api_request(self, text: str, system_prompt: str) -> tuple:
+ """
+ Make API request to OpenAI.
+
+ Returns:
+ Tuple of (translated_content, usage_dict). usage_dict may be empty.
+
+ Raises:
+ OpenAIProviderError: For any API errors with specific codes
+ """
+ if not text or not text.strip():
+ return text, {}
+
+ # Check text length (rough estimate: 1 token ~= 4 chars)
+ if len(text) > 16000: # ~4000 tokens
+ raise OpenAIProviderError(
+ code=OPENAI_CONTEXT_TOO_LONG,
+ message="Texte trop long pour le modèle (max ~4000 tokens).",
+ details={"text_length": len(text), "max_tokens": 4000},
+ )
+
+ url = f"{self._base_url}/chat/completions"
+ headers = {
+ "Authorization": f"Bearer {self._api_key}",
+ "Content-Type": "application/json",
+ }
+ payload = {
+ "model": self._model,
+ "messages": [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": text},
+ ],
+ "temperature": 0.3,
+ "max_tokens": 4096,
+ }
+
+ try:
+ response = requests.post(
+ url,
+ headers=headers,
+ json=payload,
+ timeout=self._timeout,
+ )
+
+ # Handle specific HTTP status codes
+ if response.status_code == 401:
+ raise OpenAIProviderError(
+ code=OPENAI_INVALID_KEY,
+ message="Clé API OpenAI invalide. Vérifiez votre configuration.",
+ details={"status_code": 401},
+ )
+
+ if response.status_code == 429:
+ try:
+ error_data = response.json().get("error", {}) or {}
+ except Exception:
+ error_data = {}
+ error_code = error_data.get("code", "")
+
+ # Check for rate limit vs quota exceeded
+ if error_code == "insufficient_quota":
+ raise OpenAIProviderError(
+ code=OPENAI_QUOTA_EXCEEDED,
+ message="Quota OpenAI épuisé. Vérifiez votre facturation.",
+ details={"status_code": 429, "error_code": error_code},
+ )
+ else:
+ # Rate limit
+ retry_after = response.headers.get("retry-after", "20")
+ raise OpenAIProviderError(
+ code=OPENAI_RATE_LIMITED,
+ message=f"Limite de requêtes OpenAI atteinte. Réessayez dans {retry_after}s.",
+ details={
+ "status_code": 429,
+ "retry_after_seconds": int(retry_after)
+ if retry_after.isdigit()
+ else 20,
+ },
+ )
+
+ if response.status_code == 400:
+ try:
+ error_data = response.json().get("error", {}) or {}
+ except Exception:
+ error_data = {}
+ error_code = error_data.get("code", "")
+
+ if error_code == "context_length_exceeded":
+ raise OpenAIProviderError(
+ code=OPENAI_CONTEXT_TOO_LONG,
+ message="Texte trop long pour le modèle (max ~4000 tokens).",
+ details={"status_code": 400, "error_code": error_code},
+ )
+
+ if response.status_code >= 500:
+ raise OpenAIProviderError(
+ code=OPENAI_SERVICE_ERROR,
+ message="Service OpenAI temporairement indisponible.",
+ details={"status_code": response.status_code},
+ )
+
+ if response.status_code != 200:
+ error_text = response.text[:200] if response.text else "Unknown error"
+ raise OpenAIProviderError(
+ code=OPENAI_SERVICE_ERROR,
+ message=f"Erreur OpenAI: {error_text}",
+ details={"status_code": response.status_code},
+ )
+
+ data = response.json()
+ choices = data.get("choices", [])
+
+ if not choices:
+ raise OpenAIProviderError(
+ code=OPENAI_SERVICE_ERROR,
+ message="Erreur OpenAI: réponse vide",
+ details={"response": str(data)[:200]},
+ )
+
+ content = choices[0].get("message", {}).get("content", "")
+
+ if not content:
+ raise OpenAIProviderError(
+ code=OPENAI_SERVICE_ERROR,
+ message="Erreur OpenAI: réponse vide",
+ details={"response": str(data)[:200]},
+ )
+
+ usage = data.get("usage", {})
+ return content.strip(), usage
+
+ except Timeout:
+ raise OpenAIProviderError(
+ code=OPENAI_TIMEOUT,
+ message="Délai d'attente OpenAI dépassé. Le service est lent.",
+ details={"timeout_seconds": self._timeout},
+ )
+ except RequestsConnectionError:
+ raise OpenAIProviderError(
+ code=OPENAI_SERVICE_ERROR,
+ message="Service OpenAI temporairement indisponible.",
+ details={"error": "Connection failed"},
+ )
+ except OpenAIProviderError:
+ raise
+ except Exception as e:
+ error_str = str(e).lower()
+ if "connection" in error_str or "refused" in error_str:
+ raise OpenAIProviderError(
+ code=OPENAI_SERVICE_ERROR,
+ message="Service OpenAI temporairement indisponible.",
+ details={"original_error": str(e)[:100]},
+ )
+ raise OpenAIProviderError(
+ code=OPENAI_SERVICE_ERROR,
+ message=f"Erreur OpenAI: {str(e)[:100]}",
+ details={"original_error": str(e)[:100]},
+ )
+
+ def get_name(self) -> str:
+ """Return provider name."""
+ return self._provider_name
+
+ def is_available(self) -> bool:
+ """
+ Check if OpenAI API is available.
+
+ Uses cached result if available and not expired.
+ """
+ current_time = time.time()
+
+ with self._health_cache_lock:
+ if "is_available" in self._health_cache:
+ cached = self._health_cache["is_available"]
+ if current_time - cached["timestamp"] < self._health_cache_ttl:
+ return cached["value"]
+
+ try:
+ url = f"{self._base_url}/models"
+ headers = {"Authorization": f"Bearer {self._api_key}"}
+ response = requests.get(
+ url, headers=headers, timeout=self._health_check_timeout
+ )
+ available = response.status_code == 200
+ except Exception as e:
+ _log_warning("openai_availability_check_failed", error=str(e)[:100])
+ available = False
+
+ with self._health_cache_lock:
+ self._health_cache["is_available"] = {
+ "value": available,
+ "timestamp": current_time,
+ }
+
+ return available
+
+ def translate_text(self, request: TranslationRequest) -> TranslationResponse:
+ """
+ Translate a single text string using OpenAI LLM.
+
+ Supports custom system prompt via request.metadata["custom_prompt"].
+
+ Args:
+ request: TranslationRequest with text and language info
+
+ Returns:
+ TranslationResponse with translated text
+ """
+ text = request.text
+ target_language = request.target_language
+ source_language = request.source_language or "auto"
+
+ if not text or not text.strip():
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ )
+
+ source_lang_name = _get_language_name(source_language)
+ target_lang_name = _get_language_name(target_language)
+
+ custom_prompt = None
+ if request.metadata:
+ custom_prompt = request.metadata.get("custom_prompt")
+
+ system_prompt = _build_system_prompt(
+ source_lang_name, target_lang_name, custom_prompt
+ )
+
+ last_error: Optional[OpenAIProviderError] = None
+ retries = 0
+
+ while retries <= self._max_retries:
+ try:
+ start_time = time.time()
+ result, usage = self._make_api_request(text, system_prompt)
+ latency = time.time() - start_time
+
+ log_kw: Dict[str, Any] = {
+ "chars": len(text),
+ "source_lang": source_language,
+ "target_lang": target_language,
+ "model": self._model,
+ "latency_ms": round(latency * 1000, 2),
+ "retries": retries,
+ }
+ if usage and isinstance(usage.get("total_tokens"), (int, float)):
+ log_kw["tokens_used"] = usage.get("total_tokens")
+ _log_info("openai_translation_success", **log_kw)
+
+ return TranslationResponse(
+ translated_text=result,
+ provider_name=self._provider_name,
+ from_cache=False,
+ source_language=source_language,
+ )
+
+ except OpenAIProviderError as e:
+ last_error = e
+
+ if e.code not in _RETRYABLE_ERRORS:
+ break
+
+ retries += 1
+ if retries <= self._max_retries:
+ delay = self._retry_delay * (2 ** (retries - 1))
+ _log_info(
+ "openai_translation_retry",
+ attempt=retries,
+ delay_s=round(delay, 2),
+ error_code=e.code,
+ text_length=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ )
+ time.sleep(delay)
+
+ except Exception as e:
+ last_error = OpenAIProviderError(
+ code=OPENAI_SERVICE_ERROR,
+ message=f"Erreur OpenAI: {str(e)[:100]}",
+ details={"original_error": str(e)[:100]},
+ )
+ retries += 1
+ if retries <= self._max_retries:
+ delay = self._retry_delay * (2 ** (retries - 1))
+ time.sleep(delay)
+
+ if last_error:
+ _log_error(
+ "openai_translation_failed",
+ error_code=last_error.code,
+ text_length=len(text),
+ source_lang=source_language,
+ target_lang=target_language,
+ retries=retries,
+ )
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ error=last_error.message,
+ error_code=last_error.code,
+ error_details=last_error.details,
+ )
+
+ return TranslationResponse(
+ translated_text=text,
+ provider_name=self._provider_name,
+ from_cache=False,
+ error="Unknown error",
+ error_code=OPENAI_SERVICE_ERROR,
+ )
+
+ def translate_batch(
+ self, requests: List[TranslationRequest]
+ ) -> List[TranslationResponse]:
+ """
+ Translate multiple texts.
+
+ Args:
+ requests: List of TranslationRequest objects
+
+ Returns:
+ List of TranslationResponse objects
+ """
+ if not requests:
+ return []
+
+ return [self.translate_text(req) for req in requests]
+
+ def health_check(self) -> ProviderHealthStatus:
+ """
+ Return health status details for the provider.
+
+ Includes cached result for efficiency.
+
+ Returns:
+ ProviderHealthStatus with availability, latency, and model information
+ """
+ current_time = time.time()
+
+ with self._health_cache_lock:
+ if "health_check" in self._health_cache:
+ cached = self._health_cache["health_check"]
+ if current_time - cached["timestamp"] < self._health_cache_ttl:
+ return cached["value"]
+
+ start_time = time.time()
+ last_check_iso = datetime.now(timezone.utc).isoformat()
+
+ try:
+ url = f"{self._base_url}/models"
+ headers = {"Authorization": f"Bearer {self._api_key}"}
+ response = requests.get(
+ url, headers=headers, timeout=self._health_check_timeout
+ )
+ latency_ms = (time.time() - start_time) * 1000
+ available = response.status_code == 200
+
+ error_msg = None
+ model_available = None
+ if available:
+ try:
+ models_data = response.json().get("data", [])
+ model_ids = [m.get("id", "") for m in models_data]
+ model_available = self._model in model_ids or any(
+ self._model in mid for mid in model_ids
+ )
+ except Exception:
+ model_available = None
+ else:
+ if response.status_code == 401:
+ error_msg = "Invalid API key"
+ else:
+ error_msg = f"OpenAI API returned {response.status_code}"
+
+ status = ProviderHealthStatus(
+ name=self._provider_name,
+ available=available,
+ latency_ms=round(latency_ms, 2),
+ error=error_msg,
+ last_check=last_check_iso,
+ model=self._model,
+ model_available=model_available,
+ )
+ except Exception as e:
+ latency_ms = (time.time() - start_time) * 1000
+ status = ProviderHealthStatus(
+ name=self._provider_name,
+ available=False,
+ latency_ms=round(latency_ms, 2),
+ error=str(e)[:100],
+ last_check=last_check_iso,
+ model=self._model,
+ model_available=False,
+ )
+
+ with self._health_cache_lock:
+ self._health_cache["health_check"] = {
+ "value": status,
+ "timestamp": current_time,
+ }
+
+ return status
+
+
+def register_openai_provider():
+ """
+ Register the OpenAI provider in the global registry.
+
+ This function should be called during module initialization
+ to make the provider available through the registry.
+ """
+ from .registry import registry
+
+ provider = get_openai_provider()
+ registry.register("openai", provider)
+ return provider
+
+
+_provider_instance: Optional[OpenAITranslationProvider] = None
+_provider_lock = threading.Lock()
+
+
+def get_openai_provider() -> OpenAITranslationProvider:
+ """Get or create the OpenAI provider instance (reads config from env)."""
+ global _provider_instance
+ if _provider_instance is None:
+ with _provider_lock:
+ if _provider_instance is None:
+ from .config import ProvidersConfig
+
+ _provider_instance = OpenAITranslationProvider(
+ api_key=ProvidersConfig.OPENAI_API_KEY,
+ model=ProvidersConfig.OPENAI_MODEL,
+ timeout=ProvidersConfig.OPENAI_TIMEOUT,
+ max_retries=ProvidersConfig.OPENAI_MAX_RETRIES,
+ retry_delay=ProvidersConfig.OPENAI_RETRY_DELAY,
+ base_url=ProvidersConfig.OPENAI_BASE_URL,
+ health_check_timeout=ProvidersConfig.OPENAI_HEALTH_CHECK_TIMEOUT,
+ )
+ return _provider_instance
+
+
+def reset_openai_provider() -> None:
+ """Reset the OpenAI provider singleton (useful when config changes)."""
+ global _provider_instance
+ with _provider_lock:
+ _provider_instance = None
diff --git a/services/providers/registry.py b/services/providers/registry.py
new file mode 100644
index 0000000..0d8f57c
--- /dev/null
+++ b/services/providers/registry.py
@@ -0,0 +1,148 @@
+"""
+Provider Registry - Singleton pattern for managing translation providers.
+
+Provides a central registry for all translation providers with:
+- Registration and retrieval by name
+- Listing available providers
+- Fallback chain support
+"""
+
+from typing import Dict, List, Optional
+import threading
+
+from .base import TranslationProvider
+
+
+class ProviderRegistry:
+ """
+ Singleton registry for translation providers.
+
+ Thread-safe implementation for managing multiple translation providers
+ with support for fallback chains.
+ """
+
+ _instance: Optional["ProviderRegistry"] = None
+ _lock: threading.Lock = threading.Lock()
+
+ def __new__(cls) -> "ProviderRegistry":
+ """Create or return the singleton instance."""
+ if cls._instance is None:
+ with cls._lock:
+ if cls._instance is None:
+ cls._instance = super().__new__(cls)
+ cls._instance._providers: Dict[str, TranslationProvider] = {}
+ cls._instance._providers_lock = threading.RLock()
+ return cls._instance
+
+ def register(self, name: str, provider: TranslationProvider) -> None:
+ """
+ Register a translation provider.
+
+ Args:
+ name: Unique name for the provider (e.g., "google", "deepl")
+ provider: TranslationProvider instance
+ """
+ with self._providers_lock:
+ self._providers[name] = provider
+
+ def unregister(self, name: str) -> bool:
+ """
+ Unregister a translation provider.
+
+ Args:
+ name: Name of the provider to remove
+
+ Returns:
+ True if provider was removed, False if not found
+ """
+ with self._providers_lock:
+ if name in self._providers:
+ del self._providers[name]
+ return True
+ return False
+
+ def get(self, name: str) -> Optional[TranslationProvider]:
+ """
+ Get a registered provider by name.
+
+ Args:
+ name: Provider name
+
+ Returns:
+ TranslationProvider instance or None if not found
+ """
+ with self._providers_lock:
+ return self._providers.get(name)
+
+ def list_all(self) -> List[str]:
+ """
+ List all registered provider names.
+
+ Returns:
+ List of provider names
+ """
+ with self._providers_lock:
+ return list(self._providers.keys())
+
+ def list_available(self) -> List[str]:
+ """
+ List names of all available (reachable) providers.
+
+ Returns:
+ List of provider names that are currently available
+ """
+ with self._providers_lock:
+ return [
+ name
+ for name, provider in self._providers.items()
+ if provider.is_available()
+ ]
+
+ def get_first_available(self, names: List[str]) -> Optional[TranslationProvider]:
+ """
+ Get the first available provider from a list of names (fallback chain).
+
+ Iterates through the list in order and returns the first provider
+ that is available. This enables graceful degradation when providers
+ are unavailable.
+
+ Args:
+ names: List of provider names in priority order
+
+ Returns:
+ First available TranslationProvider or None if all are unavailable
+ """
+ for name in names:
+ provider = self.get(name)
+ if provider is not None and provider.is_available():
+ return provider
+ return None
+
+ def clear(self) -> None:
+ """Remove all registered providers."""
+ with self._providers_lock:
+ self._providers.clear()
+
+ def __len__(self) -> int:
+ """Return the number of registered providers."""
+ with self._providers_lock:
+ return len(self._providers)
+
+ def __contains__(self, name: str) -> bool:
+ """Check if a provider is registered."""
+ with self._providers_lock:
+ return name in self._providers
+
+
+def get_registry() -> ProviderRegistry:
+ """
+ Get the global provider registry instance.
+
+ Returns:
+ The singleton ProviderRegistry instance
+ """
+ return ProviderRegistry()
+
+
+# Global registry instance
+registry = ProviderRegistry()
diff --git a/services/providers/schemas.py b/services/providers/schemas.py
new file mode 100644
index 0000000..71265fa
--- /dev/null
+++ b/services/providers/schemas.py
@@ -0,0 +1,120 @@
+"""
+Pydantic models for translation provider request/response schemas.
+"""
+
+import re
+from typing import Optional, List
+from pydantic import BaseModel, ConfigDict, Field, field_validator
+
+
+LANGUAGE_CODE_PATTERN = re.compile(r"^[a-z]{2}(-[A-Z]{2})?$|^auto$")
+
+
+class TranslationRequest(BaseModel):
+ """Request model for translation operations."""
+
+ text: str = Field(..., description="Text to translate")
+ target_language: str = Field(
+ ..., description="Target language code (e.g., 'en', 'fr', 'es')"
+ )
+ source_language: str = Field(
+ default="auto", description="Source language code (default: auto-detect)"
+ )
+ metadata: Optional[dict] = Field(
+ default=None,
+ description="Optional metadata for provider-specific options (e.g., custom_prompt)",
+ )
+
+ @field_validator("target_language", "source_language")
+ @classmethod
+ def validate_language_code(cls, v: str) -> str:
+ if not LANGUAGE_CODE_PATTERN.match(v):
+ raise ValueError(
+ f"Invalid language code '{v}'. Expected format: 'xx' or 'xx-XX' (e.g., 'en', 'fr', 'en-US')"
+ )
+ return v
+
+
+class TranslationResponse(BaseModel):
+ """Response model for translation operations."""
+
+ translated_text: str = Field(..., description="Translated text")
+ provider_name: str = Field(
+ ..., description="Name of the provider that performed the translation"
+ )
+ from_cache: bool = Field(
+ default=False, description="Whether the result came from cache"
+ )
+ source_language: Optional[str] = Field(
+ default=None, description="Detected or specified source language"
+ )
+ error: Optional[str] = Field(
+ default=None, description="Error message if translation failed"
+ )
+ error_code: Optional[str] = Field(
+ default=None, description="Error code for programmatic error handling"
+ )
+ error_details: Optional[dict] = Field(
+ default=None, description="Additional error details"
+ )
+
+ @property
+ def success(self) -> bool:
+ """Check if translation was successful."""
+ return self.error is None
+
+ def to_error_dict(self) -> dict:
+ """Convert error to dictionary format for API responses."""
+ if self.error is None:
+ return {}
+ result = {
+ "error": self.error_code or "UNKNOWN_ERROR",
+ "message": self.error,
+ }
+ if self.error_details:
+ result["details"] = self.error_details
+ return result
+
+
+class BatchTranslationRequest(BaseModel):
+ """Request model for batch translation operations."""
+
+ texts: List[str] = Field(..., description="List of texts to translate")
+ target_language: str = Field(..., description="Target language code")
+ source_language: str = Field(
+ default="auto", description="Source language code (default: auto-detect)"
+ )
+
+
+class BatchTranslationResponse(BaseModel):
+ """Response model for batch translation operations."""
+
+ translated_texts: List[str] = Field(..., description="List of translated texts")
+ provider_name: str = Field(
+ ..., description="Name of the provider that performed the translations"
+ )
+ from_cache_count: int = Field(default=0, description="Number of results from cache")
+
+
+class ProviderHealthStatus(BaseModel):
+ """Health status model for a translation provider."""
+
+ model_config = ConfigDict(protected_namespaces=())
+
+ name: str = Field(..., description="Provider name")
+ available: bool = Field(..., description="Whether the provider is available")
+ latency_ms: Optional[float] = Field(
+ default=None, description="Response latency in milliseconds"
+ )
+ error: Optional[str] = Field(
+ default=None, description="Error message if unavailable"
+ )
+ last_check: Optional[str] = Field(
+ default=None, description="ISO timestamp of last health check"
+ )
+ model: Optional[str] = Field(
+ default=None, description="Model name (e.g. for LLM providers)"
+ )
+ model_available: Optional[bool] = Field(
+ default=None, description="Whether the configured model is available"
+ )
diff --git a/services/storage_tracker.py b/services/storage_tracker.py
new file mode 100644
index 0000000..ac4fedf
--- /dev/null
+++ b/services/storage_tracker.py
@@ -0,0 +1,149 @@
+import os
+import json
+import logging
+from datetime import datetime, timezone
+from typing import Optional, Any, Dict
+from config import config
+
+try:
+ import structlog
+
+ logger = structlog.get_logger(__name__)
+ _HAS_STRUCTLOG = True
+except ImportError:
+ logger = logging.getLogger(__name__)
+ _HAS_STRUCTLOG = False
+
+
+def _log_info(event: str, **kwargs):
+ """Log info with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.info(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.info(msg)
+
+
+def _log_error(event: str, **kwargs):
+ """Log error with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.error(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.error(msg)
+
+
+# Key pattern: translation:file:{job_id}
+KEY_PREFIX = "translation:file"
+
+
+def _get_default_ttl() -> int:
+ """Get TTL from config or default to 60 minutes."""
+ try:
+ from config import config
+
+ return config.FILE_TTL_MINUTES * 60
+ except Exception:
+ return 3600 # 60 minutes default
+
+
+_async_redis = None
+
+
+def _get_async_redis():
+ """Return async Redis client or None. Uses REDIS_URL from env."""
+ global _async_redis
+ if _async_redis is not None:
+ return _async_redis if _async_redis is not False else None
+
+ # Try to get from environment first
+ url = os.getenv("REDIS_URL", "").strip()
+ if not url:
+ _async_redis = False
+ return None
+
+ try:
+ import redis.asyncio as redis
+
+ _async_redis = redis.Redis.from_url(url, decode_responses=True)
+ _log_info("redis_connected", service="storage_tracker")
+ return _async_redis
+ except Exception as e:
+ _log_error("redis_connection_failed", service="storage_tracker", error=str(e))
+ _async_redis = False
+ return None
+
+
+class StorageTracker:
+ """
+ Tracks file locations and metadata in Redis.
+ Pattern: translation:file:{job_id} -> JSON metadata
+ """
+
+ def __init__(self):
+ self._redis = None
+
+ def _redis_client(self):
+ if self._redis is None:
+ self._redis = _get_async_redis()
+ return self._redis
+
+ async def track_file(
+ self, job_id: str, metadata: Dict[str, Any], ttl: Optional[int] = None
+ ) -> bool:
+ """
+ Store file metadata in Redis with TTL and log the upload.
+ """
+ if ttl is None:
+ ttl = _get_default_ttl()
+
+ # Ensure timestamp is present
+ if "timestamp" not in metadata:
+ metadata["timestamp"] = datetime.now(timezone.utc).isoformat()
+
+ # Log metadata (no content)
+ _log_info(
+ "file_uploaded",
+ job_id=job_id,
+ original_filename=metadata.get("original_filename"),
+ file_size=metadata.get("file_size"),
+ file_hash=metadata.get("file_hash"),
+ user_id=metadata.get("user_id"),
+ timestamp=metadata.get("timestamp"),
+ )
+
+ redis_client = self._redis_client()
+ if not redis_client:
+ _log_error(
+ "redis_not_available", job_id=job_id, hint="File tracked in logs only"
+ )
+ return False
+
+ try:
+ key = f"{KEY_PREFIX}:{job_id}"
+ await redis_client.set(key, json.dumps(metadata), ex=ttl)
+ _log_info("file_tracked_in_redis", job_id=job_id, ttl_seconds=ttl)
+ return True
+ except Exception as e:
+ _log_error("redis_track_failed", job_id=job_id, error=str(e))
+ return False
+
+ async def get_file_metadata(self, job_id: str) -> Optional[Dict[str, Any]]:
+ """
+ Retrieve file metadata from Redis.
+ """
+ redis_client = self._redis_client()
+ if not redis_client:
+ return None
+
+ try:
+ key = f"{KEY_PREFIX}:{job_id}"
+ data = await redis_client.get(key)
+ return json.loads(data) if data else None
+ except Exception as e:
+ _log_error("redis_get_failed", job_id=job_id, error=str(e))
+ return None
+
+
+# Singleton for app use
+storage_tracker = StorageTracker()
diff --git a/services/translation_service.py b/services/translation_service.py
index 3be5012..0413223 100644
--- a/services/translation_service.py
+++ b/services/translation_service.py
@@ -3,6 +3,7 @@ Translation Service Abstraction
Provides a unified interface for different translation providers
Optimized for high performance with parallel processing and caching
"""
+
from abc import ABC, abstractmethod
from typing import Optional, List, Dict, Tuple
import requests
@@ -20,15 +21,37 @@ from collections import OrderedDict
logger = logging.getLogger(__name__)
+# Map language codes to full names for LLM prompts (models understand "French" better than "fr")
+_LLM_LANG_NAMES = {
+ "en": "English", "es": "Spanish", "de": "German", "fr": "French", "ja": "Japanese",
+ "pt": "Portuguese", "ru": "Russian", "it": "Italian", "zh": "Chinese", "zh-CN": "Chinese (Simplified)",
+ "zh-TW": "Chinese (Traditional)", "pl": "Polish", "nl": "Dutch", "tr": "Turkish", "ko": "Korean",
+ "ar": "Arabic", "fa": "Persian", "vi": "Vietnamese", "id": "Indonesian", "uk": "Ukrainian",
+ "sv": "Swedish", "cs": "Czech", "el": "Greek", "he": "Hebrew", "hi": "Hindi", "ro": "Romanian",
+ "da": "Danish", "fi": "Finnish", "no": "Norwegian", "hu": "Hungarian", "th": "Thai",
+ "sk": "Slovak", "bg": "Bulgarian", "hr": "Croatian", "ca": "Catalan", "ms": "Malay",
+}
+
+
+def _lang_name(code: str) -> str:
+ """Return full language name for LLM prompts; fallback to code if unknown."""
+ if not code or code == "auto":
+ return ""
+ return _LLM_LANG_NAMES.get(code, _LLM_LANG_NAMES.get(code.split("-")[0], code))
+
+
# Global thread pool for parallel translations
_executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)
-def retry_with_backoff(max_retries: int = 3, base_delay: float = 1.0, max_delay: float = 30.0):
+def retry_with_backoff(
+ max_retries: int = 3, base_delay: float = 1.0, max_delay: float = 30.0
+):
"""
Decorator for retry logic with exponential backoff and jitter.
Used for API calls that may fail due to rate limiting or transient errors.
"""
+
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
@@ -40,32 +63,44 @@ def retry_with_backoff(max_retries: int = 3, base_delay: float = 1.0, max_delay:
last_exception = e
if attempt < max_retries - 1:
# Exponential backoff with jitter
- delay = min(base_delay * (2 ** attempt) + random.uniform(0, 1), max_delay)
- logger.warning(f"Retry {attempt + 1}/{max_retries} for {func.__name__} after {delay:.2f}s: {e}")
+ delay = min(
+ base_delay * (2**attempt) + random.uniform(0, 1), max_delay
+ )
+ logger.warning(
+ f"Retry {attempt + 1}/{max_retries} for {func.__name__} after {delay:.2f}s: {e}"
+ )
time.sleep(delay)
# All retries exhausted
- logger.error(f"All {max_retries} retries failed for {func.__name__}: {last_exception}")
+ logger.error(
+ f"All {max_retries} retries failed for {func.__name__}: {last_exception}"
+ )
raise last_exception
+
return wrapper
+
return decorator
class TranslationCache:
"""Thread-safe LRU cache for translations to avoid redundant API calls"""
-
+
def __init__(self, maxsize: int = 5000):
self.cache: OrderedDict = OrderedDict()
self.maxsize = maxsize
self.lock = threading.RLock()
self.hits = 0
self.misses = 0
-
- def _make_key(self, text: str, target_language: str, source_language: str, provider: str) -> str:
+
+ def _make_key(
+ self, text: str, target_language: str, source_language: str, provider: str
+ ) -> str:
"""Create a unique cache key"""
content = f"{provider}:{source_language}:{target_language}:{text}"
- return hashlib.md5(content.encode('utf-8')).hexdigest()
-
- def get(self, text: str, target_language: str, source_language: str, provider: str) -> Optional[str]:
+ return hashlib.md5(content.encode("utf-8")).hexdigest()
+
+ def get(
+ self, text: str, target_language: str, source_language: str, provider: str
+ ) -> Optional[str]:
"""Get a cached translation if available"""
key = self._make_key(text, target_language, source_language, provider)
with self.lock:
@@ -76,8 +111,15 @@ class TranslationCache:
return self.cache[key]
self.misses += 1
return None
-
- def set(self, text: str, target_language: str, source_language: str, provider: str, translation: str):
+
+ def set(
+ self,
+ text: str,
+ target_language: str,
+ source_language: str,
+ provider: str,
+ translation: str,
+ ):
"""Cache a translation result"""
key = self._make_key(text, target_language, source_language, provider)
with self.lock:
@@ -87,14 +129,14 @@ class TranslationCache:
# Remove oldest if exceeding maxsize
while len(self.cache) > self.maxsize:
self.cache.popitem(last=False)
-
+
def clear(self):
"""Clear the cache"""
with self.lock:
self.cache.clear()
self.hits = 0
self.misses = 0
-
+
def stats(self) -> Dict:
"""Get cache statistics"""
with self.lock:
@@ -105,7 +147,7 @@ class TranslationCache:
"maxsize": self.maxsize,
"hits": self.hits,
"misses": self.misses,
- "hit_rate": f"{hit_rate:.1f}%"
+ "hit_rate": f"{hit_rate:.1f}%",
}
@@ -115,27 +157,39 @@ _translation_cache = TranslationCache(maxsize=5000)
class TranslationProvider(ABC):
"""Abstract base class for translation providers"""
-
+
@abstractmethod
- def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
"""Translate text from source to target language"""
pass
-
- def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto') -> List[str]:
+
+ def translate_batch(
+ self, texts: List[str], target_language: str, source_language: str = "auto"
+ ) -> List[str]:
"""Translate multiple texts at once - default implementation"""
- return [self.translate(text, target_language, source_language) for text in texts]
-
- def translate_batch_parallel(self, texts: List[str], target_language: str, source_language: str = 'auto', max_workers: int = 4) -> List[str]:
+ return [
+ self.translate(text, target_language, source_language) for text in texts
+ ]
+
+ def translate_batch_parallel(
+ self,
+ texts: List[str],
+ target_language: str,
+ source_language: str = "auto",
+ max_workers: int = 4,
+ ) -> List[str]:
"""Parallel batch translation using thread pool"""
if not texts:
return []
-
- results = [''] * len(texts)
+
+ results = [""] * len(texts)
non_empty = [(i, t) for i, t in enumerate(texts) if t and t.strip()]
-
+
if not non_empty:
- return [t if t else '' for t in texts]
-
+ return [t if t else "" for t in texts]
+
def translate_one(item: Tuple[int, str]) -> Tuple[int, str]:
idx, text = item
try:
@@ -143,78 +197,116 @@ class TranslationProvider(ABC):
except Exception as e:
print(f"Translation error at index {idx}: {e}")
return (idx, text)
-
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
for idx, translated in executor.map(translate_one, non_empty):
results[idx] = translated
-
+
# Fill empty positions
for i, text in enumerate(texts):
if not text or not text.strip():
- results[i] = text if text else ''
-
+ results[i] = text if text else ""
+
return results
class GoogleTranslationProvider(TranslationProvider):
"""Google Translate implementation with batch support and caching"""
-
+
+ # deep_translator requires specific codes that differ from BCP-47 / ISO 639-1.
+ # Map common codes sent by the frontend to the codes deep_translator expects.
+ _LANG_MAP: dict[str, str] = {
+ "zh": "zh-CN", # Chinese (Simplified) — deep_translator only accepts zh-CN
+ "zh-cn": "zh-CN",
+ "zh-tw": "zh-TW", # Chinese (Traditional)
+ "iw": "he", # Hebrew: old ISO code → Google uses 'iw' internally
+ "he": "iw", # deep_translator maps Hebrew as 'iw'
+ "jv": "jw", # Javanese
+ "nb": "no", # Norwegian Bokmål
+ }
+
def __init__(self):
self._local = threading.local()
self.provider_name = "google"
-
- def _get_translator(self, source_language: str, target_language: str) -> GoogleTranslator:
+
+ def _normalize_lang(self, code: str) -> str:
+ """Normalise a language code to what deep_translator's GoogleTranslator accepts."""
+ if not code or code == "auto":
+ return "auto"
+ return self._LANG_MAP.get(code, self._LANG_MAP.get(code.lower(), code))
+
+ def _get_translator(
+ self, source_language: str, target_language: str
+ ) -> GoogleTranslator:
"""Get or create a translator instance for the current thread"""
- key = f"{source_language}_{target_language}"
- if not hasattr(self._local, 'translators'):
+ src = self._normalize_lang(source_language)
+ tgt = self._normalize_lang(target_language)
+ key = f"{src}_{tgt}"
+ if not hasattr(self._local, "translators"):
self._local.translators = {}
if key not in self._local.translators:
- self._local.translators[key] = GoogleTranslator(source=source_language, target=target_language)
+ self._local.translators[key] = GoogleTranslator(
+ source=src, target=tgt
+ )
return self._local.translators[key]
-
+
@retry_with_backoff(max_retries=3, base_delay=1.0)
def _do_translate(self, translator: GoogleTranslator, text: str) -> str:
"""Perform translation with retry logic"""
return translator.translate(text)
-
- def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
+
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
if not text or not text.strip():
return text
-
+
# Check cache first
- cached = _translation_cache.get(text, target_language, source_language, self.provider_name)
+ cached = _translation_cache.get(
+ text, target_language, source_language, self.provider_name
+ )
if cached is not None:
return cached
-
+
try:
translator = self._get_translator(source_language, target_language)
result = self._do_translate(translator, text)
# Cache the result
- _translation_cache.set(text, target_language, source_language, self.provider_name, result)
+ _translation_cache.set(
+ text, target_language, source_language, self.provider_name, result
+ )
return result
except Exception as e:
logger.error(f"Translation error: {e}")
return text
-
- def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto', batch_size: int = 50) -> List[str]:
+
+ def translate_batch(
+ self,
+ texts: List[str],
+ target_language: str,
+ source_language: str = "auto",
+ batch_size: int = 50,
+ ) -> List[str]:
"""
Translate multiple texts using batch processing for speed.
Uses caching to avoid redundant translations.
"""
if not texts:
return []
-
+
# Filter and track empty texts
- results = [''] * len(texts)
+ results = [""] * len(texts)
non_empty_indices = []
non_empty_texts = []
texts_to_translate = []
indices_to_translate = []
-
+
for i, text in enumerate(texts):
if text and text.strip():
# Check cache first
- cached = _translation_cache.get(text, target_language, source_language, self.provider_name)
+ cached = _translation_cache.get(
+ text, target_language, source_language, self.provider_name
+ )
if cached is not None:
results[i] = cached
else:
@@ -223,21 +315,23 @@ class GoogleTranslationProvider(TranslationProvider):
texts_to_translate.append(text)
indices_to_translate.append(i)
else:
- results[i] = text if text else ''
-
+ results[i] = text if text else ""
+
if not texts_to_translate:
return results
-
+
try:
- translator = GoogleTranslator(source=source_language, target=target_language)
-
+ translator = GoogleTranslator(
+ source=source_language, target=target_language
+ )
+
# Process in batches
translated_texts = []
for i in range(0, len(texts_to_translate), batch_size):
- batch = texts_to_translate[i:i + batch_size]
+ batch = texts_to_translate[i : i + batch_size]
try:
# Use translate_batch if available
- if hasattr(translator, 'translate_batch'):
+ if hasattr(translator, "translate_batch"):
batch_result = translator.translate_batch(batch)
else:
# Fallback: join with separator, translate, split
@@ -261,22 +355,35 @@ class GoogleTranslationProvider(TranslationProvider):
translated_texts.append(translator.translate(text))
except:
translated_texts.append(text)
-
+
# Map back to original positions and cache results
- for idx, (original, translated) in zip(indices_to_translate, zip(texts_to_translate, translated_texts)):
+ for idx, (original, translated) in zip(
+ indices_to_translate, zip(texts_to_translate, translated_texts)
+ ):
result = translated if translated else texts[idx]
results[idx] = result
# Cache successful translations
- _translation_cache.set(texts[idx], target_language, source_language, self.provider_name, result)
-
+ _translation_cache.set(
+ texts[idx],
+ target_language,
+ source_language,
+ self.provider_name,
+ result,
+ )
+
return results
-
+
except Exception as e:
print(f"Batch translation failed: {e}")
# Fallback to individual translations
for idx, text in zip(indices_to_translate, texts_to_translate):
try:
- results[idx] = GoogleTranslator(source=source_language, target=target_language).translate(text) or text
+ results[idx] = (
+ GoogleTranslator(
+ source=source_language, target=target_language
+ ).translate(text)
+ or text
+ )
except:
results[idx] = text
return results
@@ -284,56 +391,64 @@ class GoogleTranslationProvider(TranslationProvider):
class DeepLTranslationProvider(TranslationProvider):
"""DeepL Translate implementation with batch support"""
-
+
def __init__(self, api_key: str):
self.api_key = api_key
self._translator_cache = {}
-
- def _get_translator(self, source_language: str, target_language: str) -> DeeplTranslator:
+
+ def _get_translator(
+ self, source_language: str, target_language: str
+ ) -> DeeplTranslator:
key = f"{source_language}_{target_language}"
if key not in self._translator_cache:
- self._translator_cache[key] = DeeplTranslator(api_key=self.api_key, source=source_language, target=target_language)
+ self._translator_cache[key] = DeeplTranslator(
+ api_key=self.api_key, source=source_language, target=target_language
+ )
return self._translator_cache[key]
-
- def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
+
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
if not text or not text.strip():
return text
-
+
try:
translator = self._get_translator(source_language, target_language)
return translator.translate(text)
except Exception as e:
print(f"Translation error: {e}")
return text
-
- def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto') -> List[str]:
+
+ def translate_batch(
+ self, texts: List[str], target_language: str, source_language: str = "auto"
+ ) -> List[str]:
"""Batch translate using DeepL"""
if not texts:
return []
-
- results = [''] * len(texts)
+
+ results = [""] * len(texts)
non_empty = [(i, t) for i, t in enumerate(texts) if t and t.strip()]
-
+
if not non_empty:
- return [t if t else '' for t in texts]
-
+ return [t if t else "" for t in texts]
+
try:
translator = self._get_translator(source_language, target_language)
non_empty_texts = [t for _, t in non_empty]
-
- if hasattr(translator, 'translate_batch'):
+
+ if hasattr(translator, "translate_batch"):
translated = translator.translate_batch(non_empty_texts)
else:
translated = [translator.translate(t) for t in non_empty_texts]
-
+
for (idx, _), trans in zip(non_empty, translated):
results[idx] = trans if trans else texts[idx]
-
+
# Fill empty positions
for i, text in enumerate(texts):
if not text or not text.strip():
- results[i] = text if text else ''
-
+ results[i] = text if text else ""
+
return results
except Exception as e:
print(f"DeepL batch error: {e}")
@@ -342,52 +457,62 @@ class DeepLTranslationProvider(TranslationProvider):
class LibreTranslationProvider(TranslationProvider):
"""LibreTranslate implementation with batch support"""
-
+
def __init__(self, custom_url: str = "https://libretranslate.com"):
self.custom_url = custom_url
self._translator_cache = {}
-
- def _get_translator(self, source_language: str, target_language: str) -> LibreTranslator:
+
+ def _get_translator(
+ self, source_language: str, target_language: str
+ ) -> LibreTranslator:
key = f"{source_language}_{target_language}"
if key not in self._translator_cache:
- self._translator_cache[key] = LibreTranslator(source=source_language, target=target_language, custom_url=self.custom_url)
+ self._translator_cache[key] = LibreTranslator(
+ source=source_language,
+ target=target_language,
+ custom_url=self.custom_url,
+ )
return self._translator_cache[key]
-
- def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
+
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
if not text or not text.strip():
return text
-
+
try:
translator = self._get_translator(source_language, target_language)
return translator.translate(text)
except Exception as e:
print(f"LibreTranslate error: {e}")
return text
-
- def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto') -> List[str]:
+
+ def translate_batch(
+ self, texts: List[str], target_language: str, source_language: str = "auto"
+ ) -> List[str]:
"""Batch translate using LibreTranslate"""
if not texts:
return []
-
- results = [''] * len(texts)
+
+ results = [""] * len(texts)
non_empty = [(i, t) for i, t in enumerate(texts) if t and t.strip()]
-
+
if not non_empty:
- return [t if t else '' for t in texts]
-
+ return [t if t else "" for t in texts]
+
try:
translator = self._get_translator(source_language, target_language)
-
+
for idx, text in non_empty:
try:
results[idx] = translator.translate(text) or text
except:
results[idx] = text
-
+
for i, text in enumerate(texts):
if not text or not text.strip():
- results[i] = text if text else ''
-
+ results[i] = text if text else ""
+
return results
except Exception as e:
print(f"LibreTranslate batch error: {e}")
@@ -396,74 +521,70 @@ class LibreTranslationProvider(TranslationProvider):
class OllamaTranslationProvider(TranslationProvider):
"""Ollama LLM translation implementation"""
-
- def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3", vision_model: str = "llava", system_prompt: str = ""):
- self.base_url = base_url.rstrip('/')
+
+ def __init__(
+ self,
+ base_url: str = "http://localhost:11434",
+ model: str = "llama3",
+ vision_model: str = "llava",
+ system_prompt: str = "",
+ ):
+ self.base_url = base_url.rstrip("/")
self.model = model.strip() # Remove any leading/trailing whitespace
self.vision_model = vision_model.strip()
- self.custom_system_prompt = system_prompt # Custom context, glossary, instructions
-
- def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
+ self.custom_system_prompt = (
+ system_prompt # Custom context, glossary, instructions
+ )
+
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
if not text or not text.strip():
return text
-
+
# Skip very short text or numbers only
if len(text.strip()) < 2 or text.strip().isdigit():
return text
-
- try:
- # Build system prompt with custom context if provided
- base_prompt = f"""You are a professional translator. Your ONLY task is to translate text to {target_language}.
-CRITICAL RULES:
-1. Output ONLY the translated text - no explanations, no comments, no notes
-2. Preserve the exact formatting (line breaks, spacing, punctuation)
-3. Do NOT add any prefixes like "Here's the translation:" or "Translation:"
-4. Do NOT refuse to translate or ask clarifying questions
-5. If the text is already in {target_language}, return it unchanged
-6. Translate everything literally and accurately
-7. NEVER provide comments, opinions, or explanations - you are JUST a translator
-8. If you have any doubt about the translation, return the original text unchanged
-9. Do not interpret or analyze the content - simply translate word by word
-10. Your response must contain ONLY the translated text, nothing else"""
-
+ try:
+ target_name = _lang_name(target_language) or target_language
+ source_name = _lang_name(source_language) if source_language and source_language != "auto" else None
+
+ if source_name:
+ base_prompt = f"""You are a translator. Translate the following text FROM {source_name} TO {target_name}. Output ONLY the translated text. No explanations, no quotes. Preserve formatting. If already in {target_name}, return unchanged."""
+ else:
+ base_prompt = f"""You are a translator. Translate the following text TO {target_name}. Output ONLY the translated text. No explanations, no quotes. Preserve formatting. Detect source language if needed. If already in {target_name}, return unchanged."""
+
if self.custom_system_prompt:
system_content = f"""{base_prompt}
-ADDITIONAL CONTEXT AND INSTRUCTIONS:
+ADDITIONAL CONTEXT:
{self.custom_system_prompt}"""
else:
system_content = base_prompt
-
+
# Use /api/chat endpoint (more compatible with all models)
response = requests.post(
f"{self.base_url}/api/chat",
json={
"model": self.model,
"messages": [
- {
- "role": "system",
- "content": system_content
- },
- {
- "role": "user",
- "content": text
- }
+ {"role": "system", "content": system_content},
+ {"role": "user", "content": text},
],
"stream": False,
- "options": {
- "temperature": 0.3,
- "num_predict": 500
- }
+ "options": {"temperature": 0.3, "num_predict": 500},
},
- timeout=120 # 2 minutes timeout
+ timeout=120, # 2 minutes timeout
)
response.raise_for_status()
result = response.json()
translated = result.get("message", {}).get("content", "").strip()
return translated if translated else text
except requests.exceptions.ConnectionError:
- print(f"Ollama error: Cannot connect to {self.base_url}. Is Ollama running?")
+ print(
+ f"Ollama error: Cannot connect to {self.base_url}. Is Ollama running?"
+ )
return text
except requests.exceptions.Timeout:
print(f"Ollama error: Request timeout after 120s")
@@ -471,16 +592,71 @@ ADDITIONAL CONTEXT AND INSTRUCTIONS:
except Exception as e:
print(f"Ollama translation error: {e}")
return text
-
+
+ def translate_batch(
+ self,
+ texts: List[str],
+ target_language: str,
+ source_language: str = "auto",
+ max_workers: int = 4,
+ ) -> List[str]:
+ """
+ Batch translate using parallel requests to Ollama.
+ Uses ThreadPoolExecutor for concurrent translations.
+ """
+ import concurrent.futures
+
+ if not texts:
+ return []
+
+ results = [""] * len(texts)
+ texts_to_translate = []
+ indices_to_translate = []
+
+ for i, text in enumerate(texts):
+ if not text or not text.strip():
+ results[i] = text if text else ""
+ elif len(text.strip()) < 2 or text.strip().isdigit():
+ results[i] = text
+ else:
+ texts_to_translate.append(text)
+ indices_to_translate.append(i)
+
+ if not texts_to_translate:
+ return results
+
+ def translate_one(text: str) -> str:
+ return self.translate(text, target_language, source_language)
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+ translated = list(executor.map(translate_one, texts_to_translate))
+
+ for idx, trans in zip(indices_to_translate, translated):
+ results[idx] = trans
+
+ return results
+
+ def list_models(self) -> List[dict]:
+ """List available models from Ollama server"""
+ try:
+ response = requests.get(f"{self.base_url}/api/tags", timeout=5)
+ if response.ok:
+ data = response.json()
+ return data.get("models", [])
+ return []
+ except Exception as e:
+ print(f"Ollama list_models error: {e}")
+ return []
+
def translate_image(self, image_path: str, target_language: str) -> str:
"""Translate text within an image using Ollama vision model"""
import base64
-
+
try:
# Read and encode image
- with open(image_path, 'rb') as img_file:
- image_data = base64.b64encode(img_file.read()).decode('utf-8')
-
+ with open(image_path, "rb") as img_file:
+ image_data = base64.b64encode(img_file.read()).decode("utf-8")
+
# Use /api/chat for vision models too
response = requests.post(
f"{self.base_url}/api/chat",
@@ -490,12 +666,12 @@ ADDITIONAL CONTEXT AND INSTRUCTIONS:
{
"role": "user",
"content": f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting.",
- "images": [image_data]
+ "images": [image_data],
}
],
- "stream": False
+ "stream": False,
},
- timeout=60
+ timeout=60,
)
response.raise_for_status()
result = response.json()
@@ -503,7 +679,7 @@ ADDITIONAL CONTEXT AND INSTRUCTIONS:
except Exception as e:
print(f"Ollama vision translation error: {e}")
return ""
-
+
@staticmethod
def list_models(base_url: str = "http://localhost:11434") -> List[str]:
"""List available Ollama models"""
@@ -526,138 +702,235 @@ class OpenRouterTranslationProvider(TranslationProvider):
- meta-llama/llama-3.1-8b-instruct: $0.06/M tokens - Good quality
- google/gemma-2-9b-it: $0.08/M tokens - Good for European languages
"""
-
- def __init__(self, api_key: str, model: str = "deepseek/deepseek-chat", system_prompt: str = ""):
+
+ def __init__(
+ self,
+ api_key: str,
+ model: str = "deepseek/deepseek-chat",
+ system_prompt: str = "",
+ ):
self.api_key = api_key
self.model = model
self.custom_system_prompt = system_prompt
self.base_url = "https://openrouter.ai/api/v1"
self.provider_name = "openrouter"
self._session = None
-
+
def _get_session(self):
"""Get or create a requests session for connection pooling"""
if self._session is None:
import requests
+
self._session = requests.Session()
- self._session.headers.update({
- "Authorization": f"Bearer {self.api_key}",
- "HTTP-Referer": "https://translate-app.local",
- "X-Title": "Document Translator",
- "Content-Type": "application/json"
- })
+ self._session.headers.update(
+ {
+ "Authorization": f"Bearer {self.api_key}",
+ "HTTP-Referer": "https://translate-app.local",
+ "X-Title": "Document Translator",
+ "Content-Type": "application/json",
+ }
+ )
return self._session
-
- def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
+
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
if not text or not text.strip():
return text
-
+
# Skip very short text or numbers only
if len(text.strip()) < 2 or text.strip().isdigit():
return text
-
+
# Check cache first
- cached = _translation_cache.get(text, target_language, source_language, self.provider_name)
+ cached = _translation_cache.get(
+ text, target_language, source_language, self.provider_name
+ )
if cached is not None:
return cached
-
- try:
- session = self._get_session()
-
- # Optimized prompt for translation
- system_prompt = f"""Translate to {target_language}. Output ONLY the translation, nothing else. Preserve formatting."""
-
- if self.custom_system_prompt:
- system_prompt = f"{system_prompt}\n\nContext: {self.custom_system_prompt}"
-
- response = session.post(
- f"{self.base_url}/chat/completions",
- json={
- "model": self.model,
- "messages": [
- {"role": "system", "content": system_prompt},
- {"role": "user", "content": text}
- ],
- "temperature": 0.2,
- "max_tokens": 1000
- },
- timeout=30
+
+ session = self._get_session()
+
+ target_name = _lang_name(target_language) or target_language
+ source_name = _lang_name(source_language) if source_language and source_language != "auto" else None
+
+ if source_name:
+ system_prompt = f"""You are a translator. Translate the following text FROM {source_name} TO {target_name}.
+
+RULES:
+- Output ONLY the translated text. No explanations, no quotes, no "Translation:" prefix.
+- Preserve formatting (line breaks, spacing).
+- If the text is already in {target_name}, return it unchanged.
+- Never add comments or notes."""
+ else:
+ system_prompt = f"""You are a translator. Translate the following text TO {target_name}.
+
+RULES:
+- Output ONLY the translated text. No explanations, no quotes, no "Translation:" prefix.
+- Preserve formatting (line breaks, spacing).
+- Detect the source language automatically.
+- If the text is already in {target_name}, return it unchanged.
+- Never add comments or notes."""
+
+ if self.custom_system_prompt:
+ system_prompt = (
+ f"{system_prompt}\n\nAdditional context: {self.custom_system_prompt}"
)
- response.raise_for_status()
- result = response.json()
-
- translated = result.get("choices", [{}])[0].get("message", {}).get("content", "").strip()
-
- if translated:
- # Cache the result
- _translation_cache.set(text, target_language, source_language, self.provider_name, translated)
- return translated
- return text
-
- except Exception as e:
- print(f"OpenRouter translation error: {e}")
- return text
-
- def translate_batch(self, texts: List[str], target_language: str, source_language: str = 'auto') -> List[str]:
+
+ payload = {
+ "model": self.model,
+ "messages": [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": text},
+ ],
+ "temperature": 0.2,
+ "max_tokens": 1000,
+ }
+
+ last_error = None
+ for attempt in range(3):
+ try:
+ response = session.post(
+ f"{self.base_url}/chat/completions",
+ json=payload,
+ timeout=30,
+ )
+ if response.status_code == 429:
+ wait = (attempt + 1) * 5
+ logger.warning(f"OpenRouter rate limit (429), retry in {wait}s (attempt {attempt + 1}/3)")
+ time.sleep(wait)
+ continue
+ response.raise_for_status()
+ result = response.json()
+ translated = (
+ result.get("choices", [{}])[0]
+ .get("message", {})
+ .get("content", "")
+ .strip()
+ )
+ if translated:
+ _translation_cache.set(
+ text, target_language, source_language, self.provider_name, translated
+ )
+ return translated
+ raise ValueError("OpenRouter returned empty translation")
+ except Exception as e:
+ last_error = e
+ if attempt < 2 and "429" in str(e):
+ time.sleep((attempt + 1) * 5)
+ continue
+ break
+
+ err_msg = str(last_error) if last_error else "Unknown error"
+ logger.error(f"OpenRouter translation failed: {err_msg}")
+ raise RuntimeError(
+ f"Traduction IA échouée: {err_msg}. "
+ "Si vous utilisez un modèle gratuit (ex: gemma:free), il est souvent limité. "
+ "Passez à deepseek/deepseek-v3.2 dans les paramètres admin."
+ )
+
+ def translate_batch(
+ self, texts: List[str], target_language: str, source_language: str = "auto"
+ ) -> List[str]:
"""
Batch translate using OpenRouter with parallel requests.
Uses caching to avoid redundant translations.
"""
if not texts:
return []
-
- results = [''] * len(texts)
+
+ results = [""] * len(texts)
texts_to_translate = []
indices_to_translate = []
-
+
# Check cache first
for i, text in enumerate(texts):
if not text or not text.strip():
- results[i] = text if text else ''
+ results[i] = text if text else ""
else:
- cached = _translation_cache.get(text, target_language, source_language, self.provider_name)
+ cached = _translation_cache.get(
+ text, target_language, source_language, self.provider_name
+ )
if cached is not None:
results[i] = cached
else:
texts_to_translate.append(text)
indices_to_translate.append(i)
-
+
if not texts_to_translate:
return results
-
+
# Translate in parallel batches
import concurrent.futures
-
+
def translate_one(text: str) -> str:
return self.translate(text, target_language, source_language)
-
+
# Use thread pool for parallel requests
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
translated = list(executor.map(translate_one, texts_to_translate))
-
+
# Map back results
for idx, trans in zip(indices_to_translate, translated):
results[idx] = trans
-
+
return results
-
+
@staticmethod
def list_recommended_models() -> List[dict]:
"""List recommended models for translation with pricing"""
return [
- {"id": "deepseek/deepseek-chat", "name": "DeepSeek Chat", "price": "$0.14/M tokens", "quality": "Excellent", "speed": "Fast"},
- {"id": "mistralai/mistral-7b-instruct", "name": "Mistral 7B", "price": "$0.06/M tokens", "quality": "Good", "speed": "Very Fast"},
- {"id": "meta-llama/llama-3.1-8b-instruct", "name": "Llama 3.1 8B", "price": "$0.06/M tokens", "quality": "Good", "speed": "Fast"},
- {"id": "google/gemma-2-9b-it", "name": "Gemma 2 9B", "price": "$0.08/M tokens", "quality": "Good", "speed": "Fast"},
- {"id": "anthropic/claude-3-haiku", "name": "Claude 3 Haiku", "price": "$0.25/M tokens", "quality": "Excellent", "speed": "Fast"},
- {"id": "openai/gpt-4o-mini", "name": "GPT-4o Mini", "price": "$0.15/M tokens", "quality": "Excellent", "speed": "Fast"},
+ {
+ "id": "deepseek/deepseek-chat",
+ "name": "DeepSeek Chat",
+ "price": "$0.14/M tokens",
+ "quality": "Excellent",
+ "speed": "Fast",
+ },
+ {
+ "id": "mistralai/mistral-7b-instruct",
+ "name": "Mistral 7B",
+ "price": "$0.06/M tokens",
+ "quality": "Good",
+ "speed": "Very Fast",
+ },
+ {
+ "id": "meta-llama/llama-3.1-8b-instruct",
+ "name": "Llama 3.1 8B",
+ "price": "$0.06/M tokens",
+ "quality": "Good",
+ "speed": "Fast",
+ },
+ {
+ "id": "google/gemma-2-9b-it",
+ "name": "Gemma 2 9B",
+ "price": "$0.08/M tokens",
+ "quality": "Good",
+ "speed": "Fast",
+ },
+ {
+ "id": "anthropic/claude-3-haiku",
+ "name": "Claude 3 Haiku",
+ "price": "$0.25/M tokens",
+ "quality": "Excellent",
+ "speed": "Fast",
+ },
+ {
+ "id": "openai/gpt-4o-mini",
+ "name": "GPT-4o Mini",
+ "price": "$0.15/M tokens",
+ "quality": "Excellent",
+ "speed": "Fast",
+ },
]
class WebLLMTranslationProvider(TranslationProvider):
"""WebLLM browser-based translation (client-side processing)"""
-
- def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
+
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
# WebLLM translation happens client-side in the browser
# This is just a placeholder - actual translation is done by JavaScript
# For server-side, we'll just pass through for now
@@ -665,40 +938,46 @@ class WebLLMTranslationProvider(TranslationProvider):
class OpenAITranslationProvider(TranslationProvider):
- """OpenAI GPT translation implementation with vision support"""
-
- def __init__(self, api_key: str, model: str = "gpt-4o-mini", system_prompt: str = ""):
+ """OpenAI-compatible LLM translation provider (OpenAI, xAI/Grok, Azure, etc.)"""
+
+ def __init__(
+ self,
+ api_key: str,
+ model: str = "gpt-4o-mini",
+ system_prompt: str = "",
+ base_url: Optional[str] = None,
+ ):
self.api_key = api_key
self.model = model
self.custom_system_prompt = system_prompt
-
- def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
+ self.base_url = base_url # None → uses default OpenAI endpoint
+
+ def translate(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
if not text or not text.strip():
return text
-
+
# Skip very short text or numbers only
if len(text.strip()) < 2 or text.strip().isdigit():
return text
-
+
try:
import openai
- client = openai.OpenAI(api_key=self.api_key)
-
- # Build system prompt with custom context if provided
- base_prompt = f"""You are a professional translator. Your ONLY task is to translate text to {target_language}.
-CRITICAL RULES:
-1. Output ONLY the translated text - no explanations, no comments, no notes
-2. Preserve the exact formatting (line breaks, spacing, punctuation)
-3. Do NOT add any prefixes like "Here's the translation:" or "Translation:"
-4. Do NOT refuse to translate or ask clarifying questions
-5. If the text is already in {target_language}, return it unchanged
-6. Translate everything literally and accurately
-7. NEVER provide comments, opinions, or explanations - you are JUST a translator
-8. If you have any doubt about the translation, return the original text unchanged
-9. Do not interpret or analyze the content - simply translate word by word
-10. Your response must contain ONLY the translated text, nothing else"""
-
+ client_kwargs = {"api_key": self.api_key}
+ if self.base_url:
+ client_kwargs["base_url"] = self.base_url
+ client = openai.OpenAI(**client_kwargs)
+
+ target_name = _lang_name(target_language) or target_language
+ source_name = _lang_name(source_language) if source_language and source_language != "auto" else None
+
+ if source_name:
+ base_prompt = f"""You are a translator. Translate the following text FROM {source_name} TO {target_name}. Output ONLY the translated text. No explanations, no quotes. Preserve formatting. If already in {target_name}, return unchanged."""
+ else:
+ base_prompt = f"""You are a translator. Translate the following text TO {target_name}. Output ONLY the translated text. No explanations, no quotes. Preserve formatting. Detect source language if needed. If already in {target_name}, return unchanged."""
+
if self.custom_system_prompt:
system_content = f"""{base_prompt}
@@ -706,39 +985,44 @@ ADDITIONAL CONTEXT AND INSTRUCTIONS:
{self.custom_system_prompt}"""
else:
system_content = base_prompt
-
+
response = client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": system_content},
- {"role": "user", "content": text}
+ {"role": "user", "content": text},
],
temperature=0.3,
- max_tokens=500
+ max_tokens=500,
)
-
+
translated = response.choices[0].message.content.strip()
return translated if translated else text
except Exception as e:
print(f"OpenAI translation error: {e}")
return text
-
+
def translate_image(self, image_path: str, target_language: str) -> str:
"""Translate text within an image using OpenAI vision model"""
import base64
-
+
try:
import openai
+
client = openai.OpenAI(api_key=self.api_key)
-
+
# Read and encode image
- with open(image_path, 'rb') as img_file:
- image_data = base64.b64encode(img_file.read()).decode('utf-8')
-
+ with open(image_path, "rb") as img_file:
+ image_data = base64.b64encode(img_file.read()).decode("utf-8")
+
# Determine image type from extension
- ext = image_path.lower().split('.')[-1]
- media_type = f"image/{ext}" if ext in ['png', 'jpg', 'jpeg', 'gif', 'webp'] else "image/png"
-
+ ext = image_path.lower().split(".")[-1]
+ media_type = (
+ f"image/{ext}"
+ if ext in ["png", "jpg", "jpeg", "gif", "webp"]
+ else "image/png"
+ )
+
response = client.chat.completions.create(
model=self.model, # gpt-4o and gpt-4o-mini support vision
messages=[
@@ -747,20 +1031,20 @@ ADDITIONAL CONTEXT AND INSTRUCTIONS:
"content": [
{
"type": "text",
- "text": f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting."
+ "text": f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting.",
},
{
"type": "image_url",
"image_url": {
"url": f"data:{media_type};base64,{image_data}"
- }
- }
- ]
+ },
+ },
+ ],
}
],
- max_tokens=1000
+ max_tokens=1000,
)
-
+
return response.choices[0].message.content.strip()
except Exception as e:
print(f"OpenAI vision translation error: {e}")
@@ -769,7 +1053,7 @@ ADDITIONAL CONTEXT AND INSTRUCTIONS:
class TranslationService:
"""Main translation service that delegates to the configured provider"""
-
+
def __init__(self, provider: Optional[TranslationProvider] = None):
if provider:
self.provider = provider
@@ -777,73 +1061,82 @@ class TranslationService:
# Auto-select provider based on configuration
self.provider = self._get_default_provider()
self.translate_images = False # Flag to enable image translation
-
+
def _get_default_provider(self) -> TranslationProvider:
"""Get the default translation provider from configuration"""
# Always use Google Translate by default to avoid API key issues
# Provider will be overridden per request in the API endpoint
return GoogleTranslationProvider()
-
- def translate_text(self, text: str, target_language: str, source_language: str = 'auto') -> str:
+
+ def translate_text(
+ self, text: str, target_language: str, source_language: str = "auto"
+ ) -> str:
"""
Translate a single text string
-
+
Args:
text: Text to translate
target_language: Target language code (e.g., 'es', 'fr', 'de')
source_language: Source language code (default: 'auto' for auto-detection)
-
+
Returns:
Translated text
"""
if not text or not text.strip():
return text
-
+
return self.provider.translate(text, target_language, source_language)
-
+
def translate_image(self, image_path: str, target_language: str) -> str:
"""
Translate text in an image using vision model (Ollama or OpenAI)
-
+
Args:
image_path: Path to image file
target_language: Target language code
-
+
Returns:
Translated text from image
"""
if not self.translate_images:
return ""
-
+
# Ollama and OpenAI support image translation
if isinstance(self.provider, OllamaTranslationProvider):
return self.provider.translate_image(image_path, target_language)
elif isinstance(self.provider, OpenAITranslationProvider):
return self.provider.translate_image(image_path, target_language)
-
+
return ""
-
- def translate_batch(self, texts: list[str], target_language: str, source_language: str = 'auto') -> list[str]:
+
+ def translate_batch(
+ self, texts: list[str], target_language: str, source_language: str = "auto"
+ ) -> list[str]:
"""
Translate multiple text strings efficiently using batch processing.
-
+
Args:
texts: List of texts to translate
target_language: Target language code
source_language: Source language code (default: 'auto')
-
+
Returns:
List of translated texts
"""
if not texts:
return []
-
+
# Use provider's batch method if available
- if hasattr(self.provider, 'translate_batch'):
- return self.provider.translate_batch(texts, target_language, source_language)
-
+ if hasattr(self.provider, "translate_batch"):
+ return self.provider.translate_batch(
+ texts, target_language, source_language
+ )
+
# Fallback to individual translations
- return [self.translate_text(text, target_language, source_language) for text in texts]
+ return [
+ self.translate_text(text, target_language, source_language)
+ for text in texts
+ ]
# Global translation service instance
diff --git a/translators/__init__.py b/translators/__init__.py
index 662ddf1..680d745 100644
--- a/translators/__init__.py
+++ b/translators/__init__.py
@@ -1,10 +1,17 @@
"""Translators package initialization"""
-from .excel_translator import ExcelTranslator, excel_translator
-from .word_translator import WordTranslator, word_translator
-from .pptx_translator import PowerPointTranslator, pptx_translator
+
+from .excel_translator import ExcelTranslator, excel_translator, ExcelProcessorError
+from .word_translator import WordTranslator, word_translator, WordProcessorError
+from .pptx_translator import PowerPointTranslator, pptx_translator, PptxProcessorError
__all__ = [
- 'ExcelTranslator', 'excel_translator',
- 'WordTranslator', 'word_translator',
- 'PowerPointTranslator', 'pptx_translator'
+ "ExcelTranslator",
+ "excel_translator",
+ "ExcelProcessorError",
+ "WordTranslator",
+ "word_translator",
+ "WordProcessorError",
+ "PowerPointTranslator",
+ "pptx_translator",
+ "PptxProcessorError",
]
diff --git a/translators/excel_translator.py b/translators/excel_translator.py
index 97ef3c1..ecc8b37 100644
--- a/translators/excel_translator.py
+++ b/translators/excel_translator.py
@@ -2,159 +2,553 @@
Excel Translation Module
Translates Excel files while preserving all formatting, formulas, images, and layout
OPTIMIZED: Uses batch translation for 5-10x faster processing
+
+Updated to use new TranslationProvider interface with structured error handling.
"""
+
import re
import tempfile
import os
+import time
+import concurrent.futures
from pathlib import Path
-from typing import Dict, Set, List, Tuple
+from typing import Dict, Set, List, Tuple, Optional, Callable, Any
+
from openpyxl import load_workbook
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.cell.cell import Cell
from openpyxl.utils import get_column_letter
-from services.translation_service import translation_service
+
+from services.providers.base import TranslationProvider
+
+
+try:
+ import structlog
+
+ logger = structlog.get_logger(__name__)
+ _HAS_STRUCTLOG = True
+except ImportError:
+ import logging
+
+ logger = logging.getLogger(__name__)
+ _HAS_STRUCTLOG = False
+
+
+def _log_info(event: str, **kwargs):
+ """Log info with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.info(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.info(msg)
+
+
+def _log_error(event: str, **kwargs):
+ """Log error with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.error(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.error(msg)
+
+
+class ExcelProcessorError(Exception):
+ """Exception for Excel processing errors with structured error codes."""
+
+ INVALID_FORMAT = "INVALID_FORMAT"
+ EXCEL_CORRUPTED = "EXCEL_CORRUPTED"
+ EXCEL_READ_ERROR = "EXCEL_READ_ERROR"
+ EXCEL_WRITE_ERROR = "EXCEL_WRITE_ERROR"
+ EXCEL_TOO_LARGE = "EXCEL_TOO_LARGE"
+
+ ERROR_MESSAGES = {
+ INVALID_FORMAT: "Format de fichier non supporte. Utilisez .xlsx.",
+ EXCEL_CORRUPTED: "Le fichier Excel est corrompu ou illisible.",
+ EXCEL_READ_ERROR: "Erreur lors de la lecture du fichier Excel.",
+ EXCEL_WRITE_ERROR: "Erreur lors de la creation du fichier traduit.",
+ EXCEL_TOO_LARGE: "Le fichier est trop volumineux (max 50 Mo).",
+ }
+
+ def __init__(
+ self,
+ code: str,
+ message: Optional[str] = None,
+ details: Optional[Dict[str, Any]] = None,
+ ):
+ self.code = code
+ self.message = message or self.ERROR_MESSAGES.get(code, "Erreur inconnue")
+ self.details = details or {}
+ super().__init__(self.message)
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Convert error to dictionary format for API responses."""
+ result = {"error": self.code, "message": self.message}
+ if self.details:
+ result["details"] = self.details
+ return result
class ExcelTranslator:
- """Handles translation of Excel files with strict formatting preservation"""
-
- def __init__(self):
- self.translation_service = translation_service
- self.formula_pattern = re.compile(r'=.*')
-
- def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path:
+ """
+ Handles translation of Excel files with strict formatting preservation.
+
+ Uses the new TranslationProvider interface for improved error handling
+ and fallback chain support.
+ """
+
+ MAX_FILE_SIZE_MB = 50
+ XLSX_MAGIC_BYTES = b"PK" # .xlsx files are ZIP archives
+
+ def __init__(self, provider: Optional[TranslationProvider] = None):
+ """
+ Initialize ExcelTranslator.
+
+ Args:
+ provider: TranslationProvider instance for translations.
+ If None, will use fallback to legacy translation_service.
+ """
+ self._provider = provider
+ self.formula_pattern = re.compile(r"=.*")
+ self._custom_prompt: Optional[str] = None
+
+ def set_provider(self, provider: TranslationProvider) -> None:
+ """Set the translation provider."""
+ self._provider = provider
+
+ def set_custom_prompt(self, prompt: Optional[str]) -> None:
+ """Set custom system prompt for LLM providers."""
+ self._custom_prompt = prompt
+
+ def translate_file(
+ self,
+ input_path: Path,
+ output_path: Path,
+ target_language: str,
+ source_language: str = "auto",
+ progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
+ ) -> Path:
"""
Translate an Excel file while preserving all formatting and structure.
Uses batch translation for improved performance.
+
+ Args:
+ input_path: Path to input Excel file
+ output_path: Path for translated output file
+ target_language: Target language code (e.g., 'fr', 'en')
+ source_language: Source language code (default: auto-detect)
+ progress_callback: Optional callback for progress updates
+ Receives dict with: sheet, total_sheets, cells_translated
+
+ Returns:
+ Path to translated file
+
+ Raises:
+ ExcelProcessorError: If file is invalid, corrupted, or processing fails
"""
- workbook = load_workbook(input_path, data_only=False)
-
- # Collect all translatable text elements
- text_elements = [] # List of (text, setter_function)
- sheet_names_to_translate = []
-
- for sheet_name in workbook.sheetnames:
- worksheet = workbook[sheet_name]
- self._collect_from_worksheet(worksheet, text_elements)
- sheet_names_to_translate.append(sheet_name)
-
- # Add sheet names to translate
- sheet_name_setters = []
- for sheet_name in sheet_names_to_translate:
- text_elements.append((sheet_name, None)) # None setter - handled separately
- sheet_name_setters.append(sheet_name)
-
- # Batch translate all texts at once
- if text_elements:
- texts = [elem[0] for elem in text_elements]
- print(f"Batch translating {len(texts)} text segments...")
- translated_texts = self.translation_service.translate_batch(texts, target_language)
-
- # Apply translations to cells
- sheet_name_offset = len(text_elements) - len(sheet_name_setters)
- for i, ((original_text, setter), translated) in enumerate(zip(text_elements[:sheet_name_offset], translated_texts[:sheet_name_offset])):
- if translated is not None and setter is not None:
+ start_time = time.time()
+
+ input_path = Path(input_path)
+ output_path = Path(output_path)
+
+ self._validate_file(input_path)
+
+ try:
+ workbook = load_workbook(input_path, data_only=False)
+ except Exception as e:
+ raise ExcelProcessorError(
+ code=ExcelProcessorError.EXCEL_CORRUPTED,
+ details={"file_name": input_path.name, "error": str(e)},
+ )
+
+ try:
+ cells_translated = 0
+ total_sheets = len(workbook.sheetnames)
+
+ # Emit initial progress
+ if progress_callback:
+ progress_callback(
+ {
+ "current": 0,
+ "total": total_sheets,
+ "sheet": 0,
+ "total_sheets": total_sheets,
+ "cells_translated": 0,
+ }
+ )
+
+ text_elements: List[Tuple[str, Callable[[str], None]]] = []
+ sheet_names_to_translate = []
+
+ for sheet_idx, sheet_name in enumerate(workbook.sheetnames):
+ worksheet = workbook[sheet_name]
+ self._collect_from_worksheet(worksheet, text_elements)
+ sheet_names_to_translate.append(sheet_name)
+
+ # Emit progress after each sheet collection (ensures < 500ms latency)
+ if progress_callback:
+ progress_callback(
+ {
+ "current": sheet_idx + 1,
+ "total": total_sheets,
+ "sheet": sheet_idx + 1,
+ "total_sheets": total_sheets,
+ "cells_translated": cells_translated,
+ }
+ )
+
+ for sheet_name in sheet_names_to_translate:
+ text_elements.append((sheet_name, None))
+
+ if text_elements:
+ texts = [elem[0] for elem in text_elements]
+ total_texts = len(texts)
+ sheet_name_offset = total_texts - len(sheet_names_to_translate)
+
+ _log_info(
+ "excel_batch_translation_start",
+ file_name=input_path.name,
+ text_count=total_texts,
+ target_lang=target_language,
+ )
+
+ # Translate all text elements in parallel chunks, reporting real-time
+ # progress after each chunk completes.
+ CHUNK_SIZE = 15
+ MAX_WORKERS = 6
+ chunks = [
+ (i, texts[i : i + CHUNK_SIZE])
+ for i in range(0, total_texts, CHUNK_SIZE)
+ ]
+ translated_texts: List[str] = [""] * total_texts
+ completed_items = [0]
+
+ def _translate_chunk(
+ chunk_idx: int, chunk: List[str]
+ ) -> Tuple[int, List[str]]:
+ return chunk_idx, self._batch_translate(
+ chunk, target_language, source_language
+ )
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool:
+ future_map = {
+ pool.submit(_translate_chunk, idx, chunk): (idx, chunk)
+ for idx, chunk in chunks
+ }
+ for future in concurrent.futures.as_completed(future_map):
+ chunk_idx, translated_chunk = future.result()
+ for j, t in enumerate(translated_chunk):
+ translated_texts[chunk_idx + j] = t
+ completed_items[0] += len(translated_chunk)
+ if progress_callback:
+ done = min(completed_items[0], total_texts)
+ progress_callback(
+ {
+ "current": done,
+ "total": total_texts,
+ "sheet": done,
+ "total_sheets": total_texts,
+ "cells_translated": cells_translated,
+ }
+ )
+
+ # Apply cell translations
+ for i, ((original_text, setter), translated) in enumerate(
+ zip(
+ text_elements[:sheet_name_offset],
+ translated_texts[:sheet_name_offset],
+ )
+ ):
+ if translated is not None and setter is not None:
+ try:
+ setter(translated)
+ cells_translated += 1
+ except Exception as e:
+ _log_error(
+ "excel_setter_error",
+ error=str(e),
+ index=i,
+ )
+
+ # Apply sheet name translations
+ sheet_name_mapping = {}
+ for i, (sheet_name, translated) in enumerate(
+ zip(sheet_names_to_translate, translated_texts[sheet_name_offset:])
+ ):
+ if translated and translated != sheet_name:
+ new_name = self._sanitize_sheet_name(translated)
+ counter = 1
+ base_name = new_name[:28] if len(new_name) > 28 else new_name
+ while (
+ new_name in sheet_name_mapping.values()
+ or new_name in workbook.sheetnames
+ ):
+ new_name = f"{base_name}_{counter}"
+ counter += 1
+ sheet_name_mapping[sheet_name] = new_name
+
+ for original_name, new_name in sheet_name_mapping.items():
try:
- setter(translated)
- except Exception as e:
- print(f"Error applying translation: {e}")
-
- # Apply sheet name translations
- sheet_name_mapping = {}
- for i, (sheet_name, translated) in enumerate(zip(sheet_name_setters, translated_texts[sheet_name_offset:])):
- if translated and translated != sheet_name:
- new_name = translated[:31]
- counter = 1
- base_name = new_name[:28] if len(new_name) > 28 else new_name
- while new_name in sheet_name_mapping.values() or new_name in workbook.sheetnames:
- new_name = f"{base_name}_{counter}"
- counter += 1
- sheet_name_mapping[sheet_name] = new_name
-
- # Rename sheets
- for original_name, new_name in sheet_name_mapping.items():
- workbook[original_name].title = new_name
-
- # Translate images if enabled (separate process)
- if getattr(self.translation_service, 'translate_images', False):
- for sheet_name in workbook.sheetnames:
- self._translate_images(workbook[sheet_name], target_language)
-
- workbook.save(output_path)
- workbook.close()
-
- return output_path
-
- def _collect_from_worksheet(self, worksheet: Worksheet, text_elements: List[Tuple[str, callable]]):
- """Collect all translatable text from worksheet cells"""
+ workbook[original_name].title = new_name
+ except ValueError:
+ _log_error(
+ "excel_sheet_rename_failed",
+ original_name=original_name,
+ new_name=new_name,
+ )
+
+ try:
+ workbook.save(output_path)
+ except Exception as e:
+ raise ExcelProcessorError(
+ code=ExcelProcessorError.EXCEL_WRITE_ERROR,
+ details={"file_name": output_path.name, "error": str(e)},
+ )
+
+ workbook.close()
+
+ processing_time_ms = round((time.time() - start_time) * 1000, 2)
+
+ _log_info(
+ "excel_translation_success",
+ file_name=input_path.name,
+ sheets_processed=total_sheets,
+ cells_translated=cells_translated,
+ source_lang=source_language,
+ target_lang=target_language,
+ processing_time_ms=processing_time_ms,
+ )
+
+ return output_path
+
+ except ExcelProcessorError:
+ raise
+ except Exception as e:
+ raise ExcelProcessorError(
+ code=ExcelProcessorError.EXCEL_READ_ERROR,
+ details={"file_name": input_path.name, "error": str(e)},
+ )
+
+ def _validate_file(self, file_path: Path) -> None:
+ """Validate file format and size."""
+ if not file_path.exists():
+ raise ExcelProcessorError(
+ code=ExcelProcessorError.EXCEL_READ_ERROR,
+ message=f"Fichier introuvable: {file_path.name}",
+ details={"file_name": file_path.name},
+ )
+
+ if file_path.suffix.lower() != ".xlsx":
+ raise ExcelProcessorError(
+ code=ExcelProcessorError.INVALID_FORMAT,
+ details={
+ "file_name": file_path.name,
+ "extension": file_path.suffix,
+ "expected": ".xlsx",
+ },
+ )
+
+ with open(file_path, "rb") as f:
+ header = f.read(4)
+ if header[:2] != self.XLSX_MAGIC_BYTES:
+ raise ExcelProcessorError(
+ code=ExcelProcessorError.INVALID_FORMAT,
+ details={"file_name": file_path.name, "reason": "Invalid file header"},
+ )
+
+ file_size_mb = file_path.stat().st_size / (1024 * 1024)
+ if file_size_mb > self.MAX_FILE_SIZE_MB:
+ raise ExcelProcessorError(
+ code=ExcelProcessorError.EXCEL_TOO_LARGE,
+ details={
+ "file_name": file_path.name,
+ "size_mb": round(file_size_mb, 2),
+ "max_mb": self.MAX_FILE_SIZE_MB,
+ },
+ )
+
+ def _sanitize_sheet_name(self, name: str) -> str:
+ """
+ Sanitize a sheet name to be valid for Excel.
+
+ Excel forbids: : \\ / ? * [ ]
+ Max length: 31 characters
+ """
+ invalid_chars = ":\\/?*[]"
+ sanitized = "".join(c if c not in invalid_chars else "_" for c in name)
+ return sanitized[:31]
+
+ def _batch_translate(
+ self, texts: List[str], target_language: str, source_language: str = "auto"
+ ) -> List[str]:
+ """
+ Batch translate using new provider interface.
+
+ Args:
+ texts: List of texts to translate
+ target_language: Target language code
+ source_language: Source language code
+
+ Returns:
+ List of translated texts (same order as input)
+ """
+ if not texts:
+ return []
+
+ if self._provider is not None:
+ return self._translate_with_provider(
+ texts, target_language, source_language
+ )
+
+ return self._translate_with_legacy(texts, target_language, source_language)
+
+ def _translate_with_provider(
+ self, texts: List[str], target_language: str, source_language: str
+ ) -> List[str]:
+ """Translate using the TranslationProvider.translate_batch() interface."""
+ translated = self._provider.translate_batch(texts, target_language, source_language)
+ return [
+ t if (t and t.strip()) else orig
+ for t, orig in zip(translated, texts)
+ ]
+
+ def _translate_with_legacy(
+ self, texts: List[str], target_language: str, source_language: str
+ ) -> List[str]:
+ """Fallback to legacy translation_service for backward compatibility."""
+ from services.translation_service import translation_service
+
+ _log_info(
+ "excel_using_legacy_service",
+ text_count=len(texts),
+ target_lang=target_language,
+ )
+
+ return translation_service.translate_batch(
+ texts, target_language, source_language
+ )
+
+ def _collect_from_worksheet(
+ self,
+ worksheet: Worksheet,
+ text_elements: List[Tuple[str, Callable[[str], None]]],
+ ) -> None:
+ """Collect all translatable text from worksheet cells."""
for row in worksheet.iter_rows():
for cell in row:
if cell.value is not None:
self._collect_from_cell(cell, text_elements)
-
- def _collect_from_cell(self, cell: Cell, text_elements: List[Tuple[str, callable]]):
- """Collect text from a cell"""
+
+ def _collect_from_cell(
+ self, cell: Cell, text_elements: List[Tuple[str, Callable[[str], None]]]
+ ) -> None:
+ """Collect text from a cell."""
original_value = cell.value
-
+
if original_value is None:
return
-
- # Handle formulas - collect text inside quotes
- if isinstance(original_value, str) and original_value.startswith('='):
- string_pattern = re.compile(r'"([^"]*)"')
+
+ if isinstance(original_value, str) and original_value.startswith("="):
+ # Handle both double quotes and single quotes in formulas
+ # Also handles escaped quotes: "He said ""hello""" -> He said "hello"
+ string_pattern = re.compile(r'"((?:[^"\\]|\\.)*)"')
+ single_quote_pattern = re.compile(r"'((?:[^'\\]|\\.)*)'")
+
strings = string_pattern.findall(original_value)
+ strings.extend(single_quote_pattern.findall(original_value))
+
for s in strings:
if s.strip():
+
def make_formula_setter(c, orig_formula, orig_string):
def setter(translated):
- c.value = orig_formula.replace(f'"{orig_string}"', f'"{translated}"')
+ # Escape quotes in translated text to preserve formula validity
+ escaped_translated = translated.replace('"', '""')
+ c.value = orig_formula.replace(
+ f'"{orig_string}"', f'"{escaped_translated}"'
+ )
+
return setter
- text_elements.append((s, make_formula_setter(cell, original_value, s)))
- # Handle regular text
+
+ text_elements.append(
+ (s, make_formula_setter(cell, original_value, s))
+ )
+
elif isinstance(original_value, str) and original_value.strip():
+
def make_setter(c):
def setter(text):
c.value = text
+
return setter
+
text_elements.append((original_value, make_setter(cell)))
-
- def _translate_images(self, worksheet: Worksheet, target_language: str):
- """Translate text in images using vision model"""
- from services.translation_service import OllamaTranslationProvider
-
- if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
- return
-
+
+ def _translate_images(self, worksheet: Worksheet, target_language: str) -> None:
+ """
+ Translate text in images using vision model.
+
+ NOTE: This method is currently NOT CALLED in translate_file() as image translation
+ is not part of the current story scope (Story 2.7). It is intentionally preserved
+ for future implementation when vision model support is prioritized.
+
+ TODO: Call this method during translate_file() when implementing image translation feature.
+ """
try:
- images = getattr(worksheet, '_images', [])
-
+ images = getattr(worksheet, "_images", [])
+
for idx, image in enumerate(images):
try:
image_data = image._data()
- ext = image.format or 'png'
-
- with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
+ ext = image.format or "png"
+
+ with tempfile.NamedTemporaryFile(
+ suffix=f".{ext}", delete=False
+ ) as tmp:
tmp.write(image_data)
tmp_path = tmp.name
-
- translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
+
+ translated_text = self._translate_image_with_legacy(
+ tmp_path, target_language
+ )
os.unlink(tmp_path)
-
+
if translated_text and translated_text.strip():
anchor = image.anchor
- if hasattr(anchor, '_from'):
+ if hasattr(anchor, "_from"):
cell_ref = f"{get_column_letter(anchor._from.col + 1)}{anchor._from.row + 1}"
cell = worksheet[cell_ref]
from openpyxl.comments import Comment
- cell.comment = Comment(f"Image translation: {translated_text}", "Translator")
- print(f"Added Excel image translation at {cell_ref}")
-
+
+ cell.comment = Comment(
+ f"Image translation: {translated_text}", "Translator"
+ )
+ _log_info(
+ "excel_image_translation_added",
+ cell_ref=cell_ref,
+ )
+
except Exception as e:
- print(f"Error translating Excel image {idx}: {e}")
-
+ _log_error(
+ "excel_image_translation_error",
+ image_index=idx,
+ error=str(e),
+ )
+
except Exception as e:
- print(f"Error processing Excel images: {e}")
+ _log_error(
+ "excel_image_processing_error",
+ error=str(e),
+ )
+
+ def _translate_image_with_legacy(
+ self, image_path: str, target_language: str
+ ) -> str:
+ """Translate image using legacy service."""
+ from services.translation_service import translation_service
+
+ if hasattr(translation_service, "translate_image"):
+ return translation_service.translate_image(image_path, target_language)
+ return ""
-# Global translator instance
excel_translator = ExcelTranslator()
diff --git a/translators/pptx_translator.py b/translators/pptx_translator.py
index 02a543f..bf6184a 100644
--- a/translators/pptx_translator.py
+++ b/translators/pptx_translator.py
@@ -2,150 +2,481 @@
PowerPoint Translation Module
Translates PowerPoint files while preserving all layouts, animations, and media
OPTIMIZED: Uses batch translation for 5-10x faster processing
+
+Updated to use new TranslationProvider interface with structured error handling.
"""
+
+import time
+import concurrent.futures
from pathlib import Path
+from typing import Dict, List, Tuple, Optional, Callable, Any
+
+from lxml import etree
from pptx import Presentation
from pptx.shapes.base import BaseShape
from pptx.shapes.group import GroupShape
-from pptx.util import Inches, Pt
from pptx.enum.shapes import MSO_SHAPE_TYPE
-from services.translation_service import translation_service
-from typing import List, Tuple
-import tempfile
-import os
+
+from services.providers.base import TranslationProvider
+
+# DrawingML namespace used by pptx XML
+_NS_A = "http://schemas.openxmlformats.org/drawingml/2006/main"
+
+# Languages written right-to-left
+RTL_LANGUAGES: frozenset = frozenset(
+ {"ar", "he", "fa", "ur", "ku", "ps", "ug", "sd", "yi", "dv", "ckb"}
+)
+
+
+try:
+ import structlog
+
+ logger = structlog.get_logger(__name__)
+ _HAS_STRUCTLOG = True
+except ImportError:
+ import logging
+
+ logger = logging.getLogger(__name__)
+ _HAS_STRUCTLOG = False
+
+
+def _log_info(event: str, **kwargs):
+ """Log info with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.info(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.info(msg)
+
+
+def _log_error(event: str, **kwargs):
+ """Log error with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.error(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.error(msg)
+
+
+def _set_pptx_paragraph_rtl(paragraph) -> None:
+ """
+ Enable RTL mode on a PowerPoint paragraph.
+
+ Sets rtl="1" and algn="r" on the element, which controls
+ both text direction and horizontal alignment in DrawingML.
+ """
+ p_elem = paragraph._p
+ tag_pPr = f"{{{_NS_A}}}pPr"
+ pPr = p_elem.find(tag_pPr)
+ if pPr is None:
+ pPr = etree.Element(tag_pPr)
+ p_elem.insert(0, pPr)
+ pPr.set("rtl", "1")
+ pPr.set("algn", "r")
+
+
+def _apply_rtl_to_presentation(presentation: Presentation) -> None:
+ """Apply RTL direction to every paragraph in all slides."""
+ for slide in presentation.slides:
+ for shape in slide.shapes:
+ _apply_rtl_to_shape(shape)
+
+
+def _apply_rtl_to_shape(shape) -> None:
+ """Recursively apply RTL to a shape (handles groups and tables)."""
+ if shape.has_text_frame:
+ for paragraph in shape.text_frame.paragraphs:
+ _set_pptx_paragraph_rtl(paragraph)
+
+ if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
+ for row in shape.table.rows:
+ for cell in row.cells:
+ for paragraph in cell.text_frame.paragraphs:
+ _set_pptx_paragraph_rtl(paragraph)
+
+ if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
+ for sub_shape in shape.shapes:
+ _apply_rtl_to_shape(sub_shape)
+
+
+class PptxProcessorError(Exception):
+ """Exception for PowerPoint processing errors with structured error codes."""
+
+ INVALID_FORMAT = "INVALID_FORMAT"
+ PPTX_CORRUPTED = "PPTX_CORRUPTED"
+ PPTX_READ_ERROR = "PPTX_READ_ERROR"
+ PPTX_WRITE_ERROR = "PPTX_WRITE_ERROR"
+ PPTX_TOO_LARGE = "PPTX_TOO_LARGE"
+
+ ERROR_MESSAGES = {
+ INVALID_FORMAT: "Format de fichier non supporte. Utilisez .pptx.",
+ PPTX_CORRUPTED: "Le fichier PowerPoint est corrompu ou illisible.",
+ PPTX_READ_ERROR: "Erreur lors de la lecture du fichier PowerPoint.",
+ PPTX_WRITE_ERROR: "Erreur lors de la creation du fichier traduit.",
+ PPTX_TOO_LARGE: "Le fichier est trop volumineux (max 50 Mo).",
+ }
+
+ def __init__(
+ self,
+ code: str,
+ message: Optional[str] = None,
+ details: Optional[Dict[str, Any]] = None,
+ ):
+ self.code = code
+ self.message = message or self.ERROR_MESSAGES.get(code, "Erreur inconnue")
+ self.details = details or {}
+ super().__init__(self.message)
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Convert error to dictionary format for API responses."""
+ result = {"error": self.code, "message": self.message}
+ if self.details:
+ result["details"] = self.details
+ return result
class PowerPointTranslator:
- """Handles translation of PowerPoint presentations with strict formatting preservation"""
-
- def __init__(self):
- self.translation_service = translation_service
-
- def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path:
+ """
+ Handles translation of PowerPoint presentations with strict formatting preservation.
+
+ Uses the new TranslationProvider interface for improved error handling
+ and fallback chain support.
+ """
+
+ MAX_FILE_SIZE_MB = 50
+ PPTX_MAGIC_BYTES = b"PK" # .pptx files are ZIP archives
+
+ def __init__(self, provider: Optional[TranslationProvider] = None):
+ """
+ Initialize PowerPointTranslator.
+
+ Args:
+ provider: TranslationProvider instance for translations.
+ If None, will use fallback to legacy translation_service.
+ """
+ self._provider = provider
+ self._custom_prompt: Optional[str] = None
+
+ def set_provider(self, provider: TranslationProvider) -> None:
+ """Set the translation provider."""
+ self._provider = provider
+
+ def set_custom_prompt(self, prompt: Optional[str]) -> None:
+ """Set custom system prompt for LLM providers."""
+ self._custom_prompt = prompt
+
+ def translate_file(
+ self,
+ input_path: Path,
+ output_path: Path,
+ target_language: str,
+ source_language: str = "auto",
+ progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
+ ) -> Path:
"""
Translate a PowerPoint presentation while preserving all formatting.
Uses batch translation for improved performance.
+
+ Args:
+ input_path: Path to input PowerPoint file
+ output_path: Path for translated output file
+ target_language: Target language code (e.g., 'fr', 'en')
+ source_language: Source language code (default: auto-detect)
+ progress_callback: Optional callback for progress updates
+ Receives dict with: slide, total_slides, runs_translated
+
+ Returns:
+ Path to translated file
+
+ Raises:
+ PptxProcessorError: If file is invalid, corrupted, or processing fails
"""
- presentation = Presentation(input_path)
-
- # Collect all translatable text elements
- text_elements = [] # List of (text, setter_function)
- image_shapes = [] # Collect images for separate processing
-
- for slide_idx, slide in enumerate(presentation.slides):
- # Collect from notes
- if slide.has_notes_slide and slide.notes_slide.notes_text_frame:
- self._collect_from_text_frame(slide.notes_slide.notes_text_frame, text_elements)
-
- # Collect from shapes
- for shape in slide.shapes:
- self._collect_from_shape(shape, text_elements, slide, image_shapes)
-
- # Batch translate all texts at once
- if text_elements:
- texts = [elem[0] for elem in text_elements]
- print(f"Batch translating {len(texts)} text segments...")
- translated_texts = self.translation_service.translate_batch(texts, target_language)
-
- # Apply translations
- for (original_text, setter), translated in zip(text_elements, translated_texts):
- if translated is not None and setter is not None:
- try:
- setter(translated)
- except Exception as e:
- print(f"Error applying translation: {e}")
-
- # Translate images if enabled (separate process, can't batch)
- if getattr(self.translation_service, 'translate_images', False):
- for shape, slide in image_shapes:
- self._translate_image_shape(shape, target_language, slide)
-
- presentation.save(output_path)
-
- return output_path
-
- def _collect_from_shape(self, shape: BaseShape, text_elements: List[Tuple[str, callable]], slide=None, image_shapes=None):
- """Collect text from a shape and its children"""
- # Handle text-containing shapes
+ start_time = time.time()
+
+ input_path = Path(input_path)
+ output_path = Path(output_path)
+
+ self._validate_file(input_path)
+
+ try:
+ presentation = Presentation(input_path)
+ except Exception as e:
+ raise PptxProcessorError(
+ code=PptxProcessorError.PPTX_CORRUPTED,
+ details={"file_name": input_path.name, "error": str(e)},
+ )
+
+ try:
+ runs_translated = 0
+ total_slides = len(presentation.slides)
+
+ if progress_callback:
+ progress_callback(
+ {
+ "current": 0,
+ "total": total_slides,
+ "slide": 0,
+ "total_slides": total_slides,
+ "runs_translated": 0,
+ }
+ )
+
+ text_elements: List[Tuple[str, Callable[[str], None]]] = []
+
+ for slide_idx, slide in enumerate(presentation.slides):
+ if slide.has_notes_slide and slide.notes_slide.notes_text_frame:
+ self._collect_from_text_frame(
+ slide.notes_slide.notes_text_frame, text_elements
+ )
+
+ for shape in slide.shapes:
+ self._collect_from_shape(shape, text_elements)
+
+ if progress_callback:
+ progress_callback(
+ {
+ "current": slide_idx + 1,
+ "total": total_slides,
+ "slide": slide_idx + 1,
+ "total_slides": total_slides,
+ "runs_translated": runs_translated,
+ }
+ )
+
+ if text_elements:
+ texts = [elem[0] for elem in text_elements]
+ total_elements = len(texts)
+ _log_info(
+ "pptx_batch_translation_start",
+ file_name=input_path.name,
+ text_count=total_elements,
+ target_lang=target_language,
+ )
+
+ # Parallel chunk translation with real-time progress.
+ CHUNK_SIZE = 15
+ MAX_WORKERS = 6
+ chunks = [
+ (i, texts[i : i + CHUNK_SIZE])
+ for i in range(0, total_elements, CHUNK_SIZE)
+ ]
+ translated_texts: List[str] = [""] * total_elements
+ completed_items = [0]
+
+ def _translate_chunk(
+ chunk_idx: int, chunk: List[str]
+ ) -> Tuple[int, List[str]]:
+ return chunk_idx, self._batch_translate(
+ chunk, target_language, source_language
+ )
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool:
+ future_map = {
+ pool.submit(_translate_chunk, idx, chunk): (idx, chunk)
+ for idx, chunk in chunks
+ }
+ for future in concurrent.futures.as_completed(future_map):
+ chunk_idx, translated_chunk = future.result()
+ for j, t in enumerate(translated_chunk):
+ translated_texts[chunk_idx + j] = t
+ completed_items[0] += len(translated_chunk)
+ if progress_callback:
+ done = min(completed_items[0], total_elements)
+ progress_callback(
+ {
+ "current": done,
+ "total": total_elements,
+ "slide": done,
+ "total_slides": total_elements,
+ "runs_translated": runs_translated,
+ }
+ )
+
+ # Apply translations
+ for i, ((original_text, setter), translated) in enumerate(
+ zip(text_elements, translated_texts)
+ ):
+ if translated is not None and setter is not None:
+ try:
+ setter(translated)
+ runs_translated += 1
+ except Exception as e:
+ _log_error(
+ "pptx_setter_error",
+ error=str(e),
+ index=i,
+ )
+
+ # Apply RTL layout when the target language is written right-to-left.
+ if target_language.lower() in RTL_LANGUAGES:
+ _apply_rtl_to_presentation(presentation)
+
+ try:
+ presentation.save(output_path)
+ except Exception as e:
+ raise PptxProcessorError(
+ code=PptxProcessorError.PPTX_WRITE_ERROR,
+ details={"file_name": output_path.name, "error": str(e)},
+ )
+
+ processing_time_ms = round((time.time() - start_time) * 1000, 2)
+
+ _log_info(
+ "pptx_translation_success",
+ file_name=input_path.name,
+ slides_count=total_slides,
+ runs_translated=runs_translated,
+ source_lang=source_language,
+ target_lang=target_language,
+ processing_time_ms=processing_time_ms,
+ )
+
+ return output_path
+
+ except PptxProcessorError:
+ raise
+ except Exception as e:
+ raise PptxProcessorError(
+ code=PptxProcessorError.PPTX_READ_ERROR,
+ details={"file_name": input_path.name, "error": str(e)},
+ )
+
+ def _validate_file(self, file_path: Path) -> None:
+ """Validate file format and size."""
+ if not file_path.exists():
+ raise PptxProcessorError(
+ code=PptxProcessorError.PPTX_READ_ERROR,
+ message=f"Fichier introuvable: {file_path.name}",
+ details={"file_name": file_path.name},
+ )
+
+ if file_path.suffix.lower() != ".pptx":
+ raise PptxProcessorError(
+ code=PptxProcessorError.INVALID_FORMAT,
+ details={
+ "file_name": file_path.name,
+ "extension": file_path.suffix,
+ "expected": ".pptx",
+ },
+ )
+
+ with open(file_path, "rb") as f:
+ header = f.read(4)
+ if header[:2] != self.PPTX_MAGIC_BYTES:
+ raise PptxProcessorError(
+ code=PptxProcessorError.INVALID_FORMAT,
+ details={"file_name": file_path.name, "reason": "Invalid file header"},
+ )
+
+ file_size_mb = file_path.stat().st_size / (1024 * 1024)
+ if file_size_mb > self.MAX_FILE_SIZE_MB:
+ raise PptxProcessorError(
+ code=PptxProcessorError.PPTX_TOO_LARGE,
+ details={
+ "file_name": file_path.name,
+ "size_mb": round(file_size_mb, 2),
+ "max_mb": self.MAX_FILE_SIZE_MB,
+ },
+ )
+
+ def _batch_translate(
+ self, texts: List[str], target_language: str, source_language: str = "auto"
+ ) -> List[str]:
+ """
+ Batch translate using new provider interface.
+
+ Args:
+ texts: List of texts to translate
+ target_language: Target language code
+ source_language: Source language code
+
+ Returns:
+ List of translated texts (same order as input)
+ """
+ if not texts:
+ return []
+
+ if self._provider is not None:
+ return self._translate_with_provider(
+ texts, target_language, source_language
+ )
+
+ return self._translate_with_legacy(texts, target_language, source_language)
+
+ def _translate_with_provider(
+ self, texts: List[str], target_language: str, source_language: str
+ ) -> List[str]:
+ """Translate using the TranslationProvider.translate_batch() interface."""
+ translated = self._provider.translate_batch(texts, target_language, source_language)
+ return [
+ t if (t and t.strip()) else orig
+ for t, orig in zip(translated, texts)
+ ]
+
+ def _translate_with_legacy(
+ self, texts: List[str], target_language: str, source_language: str
+ ) -> List[str]:
+ """Fallback to legacy translation_service for backward compatibility."""
+ from services.translation_service import translation_service
+
+ _log_info(
+ "pptx_using_legacy_service",
+ text_count=len(texts),
+ target_lang=target_language,
+ )
+
+ return translation_service.translate_batch(
+ texts, target_language, source_language
+ )
+
+ def _collect_from_shape(
+ self, shape: BaseShape, text_elements: List[Tuple[str, Callable[[str], None]]]
+ ) -> None:
+ """Collect text from a shape and its children."""
if shape.has_text_frame:
self._collect_from_text_frame(shape.text_frame, text_elements)
-
- # Handle tables
+
if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
for row in shape.table.rows:
for cell in row.cells:
self._collect_from_text_frame(cell.text_frame, text_elements)
-
- # Handle pictures/images
- if shape.shape_type == MSO_SHAPE_TYPE.PICTURE and image_shapes is not None:
- image_shapes.append((shape, slide))
-
- # Handle group shapes
+
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
for sub_shape in shape.shapes:
- self._collect_from_shape(sub_shape, text_elements, slide, image_shapes)
-
- # Handle smart art
- if hasattr(shape, 'shapes'):
+ self._collect_from_shape(sub_shape, text_elements)
+
+ if hasattr(shape, "shapes"):
try:
for sub_shape in shape.shapes:
- self._collect_from_shape(sub_shape, text_elements, slide, image_shapes)
- except:
+ self._collect_from_shape(sub_shape, text_elements)
+ except Exception:
pass
-
- def _collect_from_text_frame(self, text_frame, text_elements: List[Tuple[str, callable]]):
- """Collect text from a text frame"""
+
+ def _collect_from_text_frame(
+ self, text_frame, text_elements: List[Tuple[str, Callable[[str], None]]]
+ ) -> None:
+ """Collect text from a text frame, preserving leading/trailing whitespace."""
if not text_frame.text.strip():
return
-
+
for paragraph in text_frame.paragraphs:
if not paragraph.text.strip():
continue
-
+
for run in paragraph.runs:
if run.text and run.text.strip():
- def make_setter(r):
- def setter(text):
- r.text = text
+ original = run.text
+ leading = original[: len(original) - len(original.lstrip())]
+ trailing = original[len(original.rstrip()) :]
+ stripped = original.strip()
+
+ def make_setter(r, lead: str, trail: str):
+ def setter(text: str) -> None:
+ r.text = lead + text.strip() + trail
+
return setter
- text_elements.append((run.text, make_setter(run)))
-
- def _translate_image_shape(self, shape, target_language: str, slide):
- """Translate text in an image using vision model"""
- from services.translation_service import OllamaTranslationProvider
-
- if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
- return
-
- try:
- image_blob = shape.image.blob
- ext = shape.image.ext
-
- with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
- tmp.write(image_blob)
- tmp_path = tmp.name
-
- translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
- os.unlink(tmp_path)
-
- if translated_text and translated_text.strip():
- left = shape.left
- top = shape.top + shape.height + Inches(0.1)
- width = shape.width
- height = Inches(0.5)
-
- textbox = slide.shapes.add_textbox(left, top, width, height)
- tf = textbox.text_frame
- p = tf.paragraphs[0]
- p.text = f"[{translated_text}]"
- p.font.size = Pt(10)
- p.font.italic = True
-
- print(f"Added image translation: {translated_text[:50]}...")
-
- except Exception as e:
- print(f"Error translating image: {e}")
+
+ text_elements.append((stripped, make_setter(run, leading, trailing)))
-# Global translator instance
pptx_translator = PowerPointTranslator()
diff --git a/translators/word_translator.py b/translators/word_translator.py
index fbf35da..2c0fb5d 100644
--- a/translators/word_translator.py
+++ b/translators/word_translator.py
@@ -2,70 +2,456 @@
Word Document Translation Module
Translates Word files while preserving all formatting, styles, tables, and images
OPTIMIZED: Uses batch translation for 5-10x faster processing
+
+Updated to use new TranslationProvider interface with structured error handling.
"""
+
+import time
+import concurrent.futures
from pathlib import Path
+from typing import Dict, List, Tuple, Optional, Callable, Any
+
from docx import Document
from docx.text.paragraph import Paragraph
from docx.table import Table, _Cell
from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl
-from docx.section import Section
-from docx.shared import Inches, Pt
+from docx.oxml import OxmlElement
from docx.oxml.ns import qn
-from services.translation_service import translation_service
-from typing import List, Tuple, Any
-import tempfile
-import os
+from docx.section import Section
+
+from services.providers.base import TranslationProvider
+
+# Languages written right-to-left
+RTL_LANGUAGES: frozenset = frozenset(
+ {"ar", "he", "fa", "ur", "ku", "ps", "ug", "sd", "yi", "dv", "ckb"}
+)
+
+
+try:
+ import structlog
+
+ logger = structlog.get_logger(__name__)
+ _HAS_STRUCTLOG = True
+except ImportError:
+ import logging
+
+ logger = logging.getLogger(__name__)
+ _HAS_STRUCTLOG = False
+
+
+def _log_info(event: str, **kwargs):
+ """Log info with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.info(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.info(msg)
+
+
+def _log_error(event: str, **kwargs):
+ """Log error with structlog or standard logging compatibility."""
+ if _HAS_STRUCTLOG:
+ logger.error(event, **kwargs)
+ else:
+ msg = f"{event} " + " ".join(f"{k}={v}" for k, v in kwargs.items())
+ logger.error(msg)
+
+
+def _set_paragraph_rtl(paragraph: Paragraph) -> None:
+ """
+ Enable RTL mode on a paragraph and all its runs.
+
+ Sets:
+ - w:pPr/w:bidi → paragraph text direction = RTL
+ - w:pPr/w:jc → alignment = right
+ - w:rPr/w:rtl → run-level RTL marker for each run
+ """
+ pPr = paragraph._p.get_or_add_pPr()
+
+ if pPr.find(qn("w:bidi")) is None:
+ pPr.append(OxmlElement("w:bidi"))
+
+ jc = pPr.find(qn("w:jc"))
+ if jc is None:
+ jc = OxmlElement("w:jc")
+ pPr.append(jc)
+ jc.set(qn("w:val"), "right")
+
+ for run in paragraph.runs:
+ rPr = run._r.get_or_add_rPr()
+ if rPr.find(qn("w:rtl")) is None:
+ rPr.append(OxmlElement("w:rtl"))
+
+
+def _apply_rtl_to_document(document: Document) -> None:
+ """Apply RTL direction to every paragraph and section in the document."""
+ # Body paragraphs
+ for para in document.paragraphs:
+ _set_paragraph_rtl(para)
+ # Body tables
+ for table in document.tables:
+ for row in table.rows:
+ for cell in row.cells:
+ for para in cell.paragraphs:
+ _set_paragraph_rtl(para)
+ # Headers, footers, and section-level RTL (page layout direction)
+ for section in document.sections:
+ # Set the section (page) direction to RTL so Word renders margins,
+ # columns and page numbering from right to left.
+ sectPr = section._sectPr
+ if sectPr.find(qn("w:bidi")) is None:
+ sectPr.append(OxmlElement("w:bidi"))
+
+ for hf in (section.header, section.footer):
+ for para in hf.paragraphs:
+ _set_paragraph_rtl(para)
+ for table in hf.tables:
+ for row in table.rows:
+ for cell in row.cells:
+ for para in cell.paragraphs:
+ _set_paragraph_rtl(para)
+
+
+class WordProcessorError(Exception):
+ """Exception for Word processing errors with structured error codes."""
+
+ INVALID_FORMAT = "INVALID_FORMAT"
+ DOCX_CORRUPTED = "DOCX_CORRUPTED"
+ DOCX_READ_ERROR = "DOCX_READ_ERROR"
+ DOCX_WRITE_ERROR = "DOCX_WRITE_ERROR"
+ DOCX_TOO_LARGE = "DOCX_TOO_LARGE"
+
+ ERROR_MESSAGES = {
+ INVALID_FORMAT: "Format de fichier non supporte. Utilisez .docx.",
+ DOCX_CORRUPTED: "Le document Word est corrompu ou illisible.",
+ DOCX_READ_ERROR: "Erreur lors de la lecture du document Word.",
+ DOCX_WRITE_ERROR: "Erreur lors de la creation du document traduit.",
+ DOCX_TOO_LARGE: "Le fichier est trop volumineux (max 50 Mo).",
+ }
+
+ def __init__(
+ self,
+ code: str,
+ message: Optional[str] = None,
+ details: Optional[Dict[str, Any]] = None,
+ ):
+ self.code = code
+ self.message = message or self.ERROR_MESSAGES.get(code, "Erreur inconnue")
+ self.details = details or {}
+ super().__init__(self.message)
+
+ def to_dict(self) -> Dict[str, Any]:
+ """Convert error to dictionary format for API responses."""
+ result = {"error": self.code, "message": self.message}
+ if self.details:
+ result["details"] = self.details
+ return result
class WordTranslator:
- """Handles translation of Word documents with strict formatting preservation"""
-
- def __init__(self):
- self.translation_service = translation_service
-
- def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path:
+ """
+ Handles translation of Word documents with strict formatting preservation.
+
+ Uses the new TranslationProvider interface for improved error handling
+ and fallback chain support.
+ """
+
+ MAX_FILE_SIZE_MB = 50
+ DOCX_MAGIC_BYTES = b"PK" # .docx files are ZIP archives
+
+ def __init__(self, provider: Optional[TranslationProvider] = None):
+ """
+ Initialize WordTranslator.
+
+ Args:
+ provider: TranslationProvider instance for translations.
+ If None, will use fallback to legacy translation_service.
+ """
+ self._provider = provider
+ self._custom_prompt: Optional[str] = None
+
+ def set_provider(self, provider: TranslationProvider) -> None:
+ """Set the translation provider."""
+ self._provider = provider
+
+ def set_custom_prompt(self, prompt: Optional[str]) -> None:
+ """Set custom system prompt for LLM providers."""
+ self._custom_prompt = prompt
+
+ def translate_file(
+ self,
+ input_path: Path,
+ output_path: Path,
+ target_language: str,
+ source_language: str = "auto",
+ progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
+ ) -> Path:
"""
Translate a Word document while preserving all formatting and structure.
Uses batch translation for improved performance.
+
+ Args:
+ input_path: Path to input Word file
+ output_path: Path for translated output file
+ target_language: Target language code (e.g., 'fr', 'en')
+ source_language: Source language code (default: auto-detect)
+ progress_callback: Optional callback for progress updates
+ Receives dict with: element, total_elements, runs_translated
+
+ Returns:
+ Path to translated file
+
+ Raises:
+ WordProcessorError: If file is invalid, corrupted, or processing fails
"""
- document = Document(input_path)
-
- # Collect all translatable text elements
- text_elements = []
-
- # Collect from document body
- self._collect_from_body(document, text_elements)
-
- # Collect from headers and footers
- for section in document.sections:
- self._collect_from_section(section, text_elements)
-
- # Batch translate all texts at once
- if text_elements:
- texts = [elem[0] for elem in text_elements]
- print(f"Batch translating {len(texts)} text segments...")
- translated_texts = self.translation_service.translate_batch(texts, target_language)
-
- # Apply translations
- for (original_text, setter), translated in zip(text_elements, translated_texts):
- if translated is not None and translated != original_text:
- try:
- setter(translated)
- except Exception as e:
- print(f"Error applying translation: {e}")
-
- # Translate images if enabled (separate process)
- if getattr(self.translation_service, 'translate_images', False):
- self._translate_images(document, target_language, input_path)
-
- # Save the translated document
- document.save(output_path)
-
- return output_path
-
- def _collect_from_body(self, document: Document, text_elements: List[Tuple[str, callable]]):
- """Collect all text elements from document body"""
+ start_time = time.time()
+
+ input_path = Path(input_path)
+ output_path = Path(output_path)
+
+ self._validate_file(input_path)
+
+ try:
+ document = Document(input_path)
+ except Exception as e:
+ raise WordProcessorError(
+ code=WordProcessorError.DOCX_CORRUPTED,
+ details={"file_name": input_path.name, "error": str(e)},
+ )
+
+ try:
+ runs_translated = 0
+
+ text_elements: List[Tuple[str, Callable[[str], None]]] = []
+
+ self._collect_from_body(document, text_elements)
+
+ total_sections = len(document.sections)
+ total_elements = 0
+ for section_idx, section in enumerate(document.sections):
+ self._collect_from_section(section, text_elements)
+ total_elements = len(text_elements)
+
+ if progress_callback:
+ progress_callback(
+ {
+ "current": section_idx + 1,
+ "total": total_sections,
+ "paragraph": section_idx + 1,
+ "total_paragraphs": total_sections,
+ "runs_translated": runs_translated,
+ "phase": "collecting",
+ }
+ )
+
+ if text_elements:
+ texts = [elem[0] for elem in text_elements]
+ total_elements = len(text_elements)
+ _log_info(
+ "word_batch_translation_start",
+ file_name=input_path.name,
+ text_count=len(texts),
+ target_lang=target_language,
+ )
+
+ # Split into chunks and translate them IN PARALLEL using a thread
+ # pool. Each worker handles one chunk independently, making
+ # full use of available CPU/network concurrency. Progress is
+ # reported as chunks complete (out-of-order completions are
+ # fine — the tracker only moves forward).
+ CHUNK_SIZE = 15
+ MAX_WORKERS = 6
+ chunks = [
+ (i, texts[i : i + CHUNK_SIZE])
+ for i in range(0, total_elements, CHUNK_SIZE)
+ ]
+ translated_texts: List[str] = [""] * total_elements
+ completed_items = [0] # mutable counter shared across threads
+
+ def _translate_chunk(
+ chunk_idx: int, chunk: List[str]
+ ) -> Tuple[int, List[str]]:
+ result = self._batch_translate(chunk, target_language, source_language)
+ return chunk_idx, result
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool:
+ future_map = {
+ pool.submit(_translate_chunk, idx, chunk): (idx, chunk)
+ for idx, chunk in chunks
+ }
+ for future in concurrent.futures.as_completed(future_map):
+ chunk_idx, translated_chunk = future.result()
+ start = chunk_idx
+ for j, t in enumerate(translated_chunk):
+ translated_texts[start + j] = t
+ completed_items[0] += len(translated_chunk)
+ if progress_callback:
+ done = min(completed_items[0], total_elements)
+ progress_callback(
+ {
+ "current": done,
+ "total": total_elements,
+ "paragraph": done,
+ "total_paragraphs": total_elements,
+ "runs_translated": runs_translated,
+ "phase": "translating",
+ }
+ )
+
+ # Apply translations (fast — just text assignment)
+ for i, ((original_text, setter), translated) in enumerate(
+ zip(text_elements, translated_texts)
+ ):
+ if translated is not None and setter is not None:
+ try:
+ setter(translated)
+ runs_translated += 1
+ except Exception as e:
+ _log_error(
+ "word_setter_error",
+ error=str(e),
+ index=i,
+ )
+
+ # Apply RTL layout when the target language is written right-to-left.
+ if target_language.lower() in RTL_LANGUAGES:
+ _apply_rtl_to_document(document)
+
+ if progress_callback:
+ progress_callback(
+ {
+ "current": total_elements if text_elements else total_sections,
+ "total": total_elements if text_elements else total_sections,
+ "paragraph": total_sections,
+ "total_paragraphs": total_sections,
+ "runs_translated": runs_translated,
+ "phase": "complete",
+ }
+ )
+
+ try:
+ document.save(output_path)
+ except Exception as e:
+ raise WordProcessorError(
+ code=WordProcessorError.DOCX_WRITE_ERROR,
+ details={"file_name": output_path.name, "error": str(e)},
+ )
+
+ processing_time_ms = round((time.time() - start_time) * 1000, 2)
+
+ _log_info(
+ "word_translation_success",
+ file_name=input_path.name,
+ runs_translated=runs_translated,
+ source_lang=source_language,
+ target_lang=target_language,
+ processing_time_ms=processing_time_ms,
+ )
+
+ return output_path
+
+ except WordProcessorError:
+ raise
+ except Exception as e:
+ raise WordProcessorError(
+ code=WordProcessorError.DOCX_READ_ERROR,
+ details={"file_name": input_path.name, "error": str(e)},
+ )
+
+ def _validate_file(self, file_path: Path) -> None:
+ """Validate file format and size."""
+ if not file_path.exists():
+ raise WordProcessorError(
+ code=WordProcessorError.DOCX_READ_ERROR,
+ message=f"Fichier introuvable: {file_path.name}",
+ details={"file_name": file_path.name},
+ )
+
+ if file_path.suffix.lower() != ".docx":
+ raise WordProcessorError(
+ code=WordProcessorError.INVALID_FORMAT,
+ details={
+ "file_name": file_path.name,
+ "extension": file_path.suffix,
+ "expected": ".docx",
+ },
+ )
+
+ with open(file_path, "rb") as f:
+ header = f.read(4)
+ if header[:2] != self.DOCX_MAGIC_BYTES:
+ raise WordProcessorError(
+ code=WordProcessorError.INVALID_FORMAT,
+ details={"file_name": file_path.name, "reason": "Invalid file header"},
+ )
+
+ file_size_mb = file_path.stat().st_size / (1024 * 1024)
+ if file_size_mb > self.MAX_FILE_SIZE_MB:
+ raise WordProcessorError(
+ code=WordProcessorError.DOCX_TOO_LARGE,
+ details={
+ "file_name": file_path.name,
+ "size_mb": round(file_size_mb, 2),
+ "max_mb": self.MAX_FILE_SIZE_MB,
+ },
+ )
+
+ def _batch_translate(
+ self, texts: List[str], target_language: str, source_language: str = "auto"
+ ) -> List[str]:
+ """
+ Batch translate using new provider interface.
+
+ Args:
+ texts: List of texts to translate
+ target_language: Target language code
+ source_language: Source language code
+
+ Returns:
+ List of translated texts (same order as input)
+ """
+ if not texts:
+ return []
+
+ if self._provider is not None:
+ return self._translate_with_provider(
+ texts, target_language, source_language
+ )
+
+ return self._translate_with_legacy(texts, target_language, source_language)
+
+ def _translate_with_provider(
+ self, texts: List[str], target_language: str, source_language: str
+ ) -> List[str]:
+ """Translate using the TranslationProvider.translate_batch() interface."""
+ translated = self._provider.translate_batch(texts, target_language, source_language)
+ # Fallback: keep original text for any empty/failed result
+ return [
+ t if (t and t.strip()) else orig
+ for t, orig in zip(translated, texts)
+ ]
+
+ def _translate_with_legacy(
+ self, texts: List[str], target_language: str, source_language: str
+ ) -> List[str]:
+ """Fallback to legacy translation_service for backward compatibility."""
+ from services.translation_service import translation_service
+
+ _log_info(
+ "word_using_legacy_service",
+ text_count=len(texts),
+ target_lang=target_language,
+ )
+
+ return translation_service.translate_batch(
+ texts, target_language, source_language
+ )
+
+ def _collect_from_body(
+ self, document: Document, text_elements: List[Tuple[str, Callable[[str], None]]]
+ ) -> None:
+ """Collect all text elements from document body."""
for element in document.element.body:
if isinstance(element, CT_P):
paragraph = Paragraph(element, document)
@@ -73,84 +459,69 @@ class WordTranslator:
elif isinstance(element, CT_Tbl):
table = Table(element, document)
self._collect_from_table(table, text_elements)
-
- def _collect_from_paragraph(self, paragraph: Paragraph, text_elements: List[Tuple[str, callable]]):
- """Collect text from paragraph runs"""
+
+ def _collect_from_paragraph(
+ self,
+ paragraph: Paragraph,
+ text_elements: List[Tuple[str, Callable[[str], None]]],
+ ) -> None:
+ """Collect text from paragraph runs, preserving inter-run whitespace.
+
+ Each run is sent for translation WITHOUT its surrounding whitespace.
+ The whitespace is captured and reapplied after translation so that words
+ at formatting boundaries (e.g. bold/normal) do not get concatenated.
+ """
if not paragraph.text.strip():
return
-
+
for run in paragraph.runs:
if run.text and run.text.strip():
- # Create a setter function for this run
- def make_setter(r):
- def setter(text):
- r.text = text
+ original = run.text
+ # Capture leading/trailing whitespace that must survive translation.
+ leading = original[: len(original) - len(original.lstrip())]
+ trailing = original[len(original.rstrip()) :]
+ stripped = original.strip()
+
+ def make_setter(r, lead: str, trail: str):
+ def setter(text: str) -> None:
+ # Strip any whitespace the translator may have added/removed
+ # and reapply the original boundary whitespace.
+ r.text = lead + text.strip() + trail
+
return setter
- text_elements.append((run.text, make_setter(run)))
-
- def _collect_from_table(self, table: Table, text_elements: List[Tuple[str, callable]]):
- """Collect text from table cells"""
+
+ text_elements.append((stripped, make_setter(run, leading, trailing)))
+
+ def _collect_from_table(
+ self, table: Table, text_elements: List[Tuple[str, Callable[[str], None]]]
+ ) -> None:
+ """Collect text from table cells."""
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
self._collect_from_paragraph(paragraph, text_elements)
- # Handle nested tables
for nested_table in cell.tables:
self._collect_from_table(nested_table, text_elements)
-
- def _collect_from_section(self, section: Section, text_elements: List[Tuple[str, callable]]):
- """Collect text from headers and footers"""
+
+ def _collect_from_section(
+ self, section: Section, text_elements: List[Tuple[str, Callable[[str], None]]]
+ ) -> None:
+ """Collect text from headers and footers."""
headers_footers = [
- section.header, section.footer,
- section.first_page_header, section.first_page_footer,
- section.even_page_header, section.even_page_footer
+ section.header,
+ section.footer,
+ section.first_page_header,
+ section.first_page_footer,
+ section.even_page_header,
+ section.even_page_footer,
]
-
+
for hf in headers_footers:
if hf:
for paragraph in hf.paragraphs:
self._collect_from_paragraph(paragraph, text_elements)
for table in hf.tables:
self._collect_from_table(table, text_elements)
-
- def _translate_images(self, document: Document, target_language: str, input_path: Path):
- """Extract text from images and add translations as captions"""
- from services.translation_service import OllamaTranslationProvider
-
- if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
- return
-
- try:
- import zipfile
- import base64
-
- with zipfile.ZipFile(input_path, 'r') as zip_ref:
- image_files = [f for f in zip_ref.namelist() if f.startswith('word/media/')]
-
- for idx, image_file in enumerate(image_files):
- try:
- image_data = zip_ref.read(image_file)
- ext = os.path.splitext(image_file)[1]
-
- with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
- tmp.write(image_data)
- tmp_path = tmp.name
-
- translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
- os.unlink(tmp_path)
-
- if translated_text and translated_text.strip():
- p = document.add_paragraph()
- p.add_run(f"[Image {idx + 1} translation: ").bold = True
- p.add_run(translated_text)
- p.add_run("]").bold = True
- print(f"Translated image {idx + 1}: {translated_text[:50]}...")
- except Exception as e:
- print(f"Error translating image {image_file}: {e}")
-
- except Exception as e:
- print(f"Error processing images: {e}")
-# Global translator instance
word_translator = WordTranslator()
diff --git a/utils/__init__.py b/utils/__init__.py
index a950000..567cd25 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -2,6 +2,7 @@
from .file_handler import FileHandler, file_handler
from .exceptions import (
TranslationError,
+ TranslationProviderError,
UnsupportedFileTypeError,
FileSizeLimitExceededError,
LanguageNotSupportedError,
@@ -12,6 +13,7 @@ from .exceptions import (
__all__ = [
'FileHandler', 'file_handler',
'TranslationError',
+ 'TranslationProviderError',
'UnsupportedFileTypeError',
'FileSizeLimitExceededError',
'LanguageNotSupportedError',
diff --git a/utils/exceptions.py b/utils/exceptions.py
index 237340e..6bad2f8 100644
--- a/utils/exceptions.py
+++ b/utils/exceptions.py
@@ -1,51 +1,141 @@
"""
Custom exceptions for the Document Translation API
"""
-from fastapi import HTTPException
+
+from typing import Any, Dict, Optional
class TranslationError(Exception):
- """Base exception for translation errors"""
- pass
+ """
+ Base exception for translation errors.
+ Includes an error code and optional details for structured JSON responses.
+ """
+
+ def __init__(
+ self,
+ message: str,
+ code: str = "TRANSLATION_ERROR",
+ details: Optional[Dict[str, Any]] = None,
+ ):
+ self.message = message
+ self.code = code
+ self.details = details or {}
+ super().__init__(message)
class UnsupportedFileTypeError(TranslationError):
"""Raised when an unsupported file type is provided"""
- pass
+
+ def __init__(
+ self,
+ message: str = "Format de fichier non supporté.",
+ details: Optional[Dict[str, Any]] = None,
+ ):
+ super().__init__(message, code="INVALID_FORMAT", details=details)
class FileSizeLimitExceededError(TranslationError):
"""Raised when a file exceeds the size limit"""
- pass
+
+ def __init__(
+ self,
+ message: str = "Fichier trop volumineux.",
+ details: Optional[Dict[str, Any]] = None,
+ ):
+ super().__init__(message, code="FILE_TOO_LARGE", details=details)
class LanguageNotSupportedError(TranslationError):
"""Raised when a language code is not supported"""
- pass
+
+ def __init__(
+ self,
+ message: str = "Langue non supportée.",
+ details: Optional[Dict[str, Any]] = None,
+ ):
+ super().__init__(message, code="INVALID_FORMAT", details=details)
class DocumentProcessingError(TranslationError):
"""Raised when there's an error processing the document"""
- pass
+
+ def __init__(
+ self,
+ message: str = "Erreur lors du traitement du document.",
+ details: Optional[Dict[str, Any]] = None,
+ ):
+ super().__init__(message, code="INTERNAL_ERROR", details=details)
-def handle_translation_error(error: Exception) -> HTTPException:
- """
- Convert translation errors to HTTP exceptions
+class TranslationProviderError(TranslationError):
+ """Raised when a translation provider returns a structured error."""
+
+ def __init__(
+ self, error_code: str, message: str, details: Optional[Dict[str, Any]] = None
+ ):
+ super().__init__(message, code=error_code, details=details)
+
+
+class GlossaryNotFoundError(TranslationError):
+ """Raised when a glossary is not found or doesn't belong to the user.
+ Story 3.10: Glossaires - Application lors Traduction LLM
+ """
+
+ def __init__(
+ self,
+ message: str = "Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
+ details: Optional[Dict[str, Any]] = None,
+ ):
+ super().__init__(message, code="GLOSSARY_NOT_FOUND", details=details)
+
+
+class PromptNotFoundError(TranslationError):
+ """Raised when a prompt is not found or doesn't belong to the user.
+
+ Story 3.12: Custom Prompts - Application lors Traduction LLM
+ """
+
+ def __init__(
+ self,
+ message: str = "Prompt introuvable ou vous n'avez pas accès à cette ressource.",
+ details: Optional[Dict[str, Any]] = None,
+ ):
+ super().__init__(message, code="PROMPT_NOT_FOUND", details=details)
+
+
+# Map provider error codes to HTTP status (Story 2.2, 2.3, 2.6)
+_PROVIDER_ERROR_HTTP_STATUS = {
+ "GOOGLE_QUOTA_EXCEEDED": 429,
+ "GOOGLE_INVALID_KEY": 401,
+ "GOOGLE_NETWORK_ERROR": 502,
+ "GOOGLE_UNSUPPORTED_LANGUAGE": 400,
+ "GOOGLE_TEXT_TOO_LONG": 413,
+ "DEEPL_QUOTA_EXCEEDED": 429,
+ "DEEPL_INVALID_KEY": 401,
+ "DEEPL_NETWORK_ERROR": 502,
+ "DEEPL_UNSUPPORTED_LANGUAGE": 400,
+ "DEEPL_TEXT_TOO_LONG": 413,
+ "ALL_PROVIDERS_FAILED": 502,
+}
+
+
+def handle_translation_error(error: TranslationError) -> tuple[dict, int]:
+ """
+ Handle a translation error and return a tuple of (response_body, status_code).
+
Args:
- error: Exception that occurred
-
+ error: The TranslationError to handle
+
Returns:
- HTTPException with appropriate status code and message
+ Tuple of (error response dict, HTTP status code)
"""
- if isinstance(error, UnsupportedFileTypeError):
- return HTTPException(status_code=400, detail=str(error))
- elif isinstance(error, FileSizeLimitExceededError):
- return HTTPException(status_code=413, detail=str(error))
- elif isinstance(error, LanguageNotSupportedError):
- return HTTPException(status_code=400, detail=str(error))
- elif isinstance(error, DocumentProcessingError):
- return HTTPException(status_code=500, detail=str(error))
- else:
- return HTTPException(status_code=500, detail="An unexpected error occurred during translation")
+ status_code = _PROVIDER_ERROR_HTTP_STATUS.get(error.code, 400)
+
+ response = {
+ "error": error.code,
+ "message": error.message,
+ "details": error.details if error.details else {},
+ }
+
+ return response, status_code
diff --git a/utils/file_handler.py b/utils/file_handler.py
index 7dae916..a3e9ce8 100644
--- a/utils/file_handler.py
+++ b/utils/file_handler.py
@@ -1,8 +1,10 @@
"""
Utility functions for file handling and validation
"""
+
import os
import uuid
+import hashlib
from pathlib import Path
from typing import Optional
from fastapi import UploadFile, HTTPException
@@ -11,39 +13,66 @@ from config import config
class FileHandler:
"""Handles file operations for the translation API"""
-
+
+ @staticmethod
+ def calculate_sha256(file_path: Path) -> Optional[str]:
+ """
+ Calculate the SHA256 hash of a file
+
+ Args:
+ file_path: Path to the file
+
+ Returns:
+ SHA256 hash string or None if error
+ """
+ try:
+ if not file_path.exists():
+ return None
+ sha256_hash = hashlib.sha256()
+ with open(file_path, "rb") as f:
+ for byte_block in iter(lambda: f.read(4096), b""):
+ sha256_hash.update(byte_block)
+ return sha256_hash.hexdigest()
+ except Exception as e:
+ import logging
+
+ logging.getLogger(__name__).error(
+ f"SHA256 calculation failed for {file_path}: {e}"
+ )
+ return None
+
@staticmethod
def validate_file_extension(filename: str) -> str:
"""
Validate that the file extension is supported
-
+
Args:
filename: Name of the file
-
+
Returns:
File extension (lowercase, with dot)
-
+
Raises:
HTTPException: If file extension is not supported
"""
file_extension = Path(filename).suffix.lower()
-
+
if file_extension not in config.SUPPORTED_EXTENSIONS:
raise HTTPException(
status_code=400,
- detail=f"Unsupported file type. Supported types: {', '.join(config.SUPPORTED_EXTENSIONS)}"
+ detail=f"Unsupported file type. Supported types: {', '.join(config.SUPPORTED_EXTENSIONS)}",
)
-
+
return file_extension
-
+
@staticmethod
def validate_file_size(file: UploadFile) -> None:
"""
Validate that the file size is within limits
-
+
Args:
file: Uploaded file
-
+
Raises:
HTTPException: If file is too large
"""
@@ -51,90 +80,100 @@ class FileHandler:
file.file.seek(0, 2) # Move to end of file
file_size = file.file.tell() # Get position (file size)
file.file.seek(0) # Reset to beginning
-
+
if file_size > config.MAX_FILE_SIZE_BYTES:
raise HTTPException(
status_code=400,
- detail=f"File too large. Maximum size: {config.MAX_FILE_SIZE_MB}MB"
+ detail=f"File too large. Maximum size: {config.MAX_FILE_SIZE_MB}MB",
)
-
+
@staticmethod
- async def save_upload_file(file: UploadFile, destination: Path) -> Path:
+ async def save_upload_file(file: UploadFile, destination: Path, chunk_size: int = 65536) -> Path:
"""
- Save an uploaded file to disk
-
+ Save an uploaded file to disk using chunked streaming to avoid loading
+ the entire file into memory at once.
+
Args:
file: Uploaded file
destination: Path to save the file
-
+ chunk_size: Read/write chunk size in bytes (default 64KB)
+
Returns:
Path to the saved file
"""
destination.parent.mkdir(parents=True, exist_ok=True)
-
+
with open(destination, "wb") as buffer:
- content = await file.read()
- buffer.write(content)
-
+ while True:
+ chunk = await file.read(chunk_size)
+ if not chunk:
+ break
+ buffer.write(chunk)
+
return destination
-
+
@staticmethod
def generate_unique_filename(original_filename: str, prefix: str = "") -> str:
"""
Generate a unique filename to avoid collisions
-
+
Args:
original_filename: Original filename
prefix: Optional prefix for the filename
-
+
Returns:
Unique filename
"""
file_path = Path(original_filename)
unique_id = str(uuid.uuid4())[:8]
-
+
if prefix:
return f"{prefix}_{unique_id}_{file_path.stem}{file_path.suffix}"
else:
return f"{unique_id}_{file_path.stem}{file_path.suffix}"
-
+
@staticmethod
def cleanup_file(file_path: Path) -> None:
"""
Delete a file if it exists
-
+
Args:
file_path: Path to the file to delete
"""
+ import logging
+
+ _logger = logging.getLogger(__name__)
try:
if file_path.exists():
file_path.unlink()
+ _logger.debug(f"Deleted file: {file_path}")
except Exception as e:
- print(f"Error deleting file {file_path}: {e}")
-
+ _logger.warning(f"Error deleting file {file_path}: {e}")
+
@staticmethod
def get_file_info(file_path: Path) -> dict:
"""
Get information about a file
-
+
Args:
file_path: Path to the file
-
+
Returns:
Dictionary with file information
"""
if not file_path.exists():
return {}
-
+
stat = file_path.stat()
-
+
return {
"filename": file_path.name,
"size_bytes": stat.st_size,
"size_mb": round(stat.st_size / (1024 * 1024), 2),
+ "sha256": FileHandler.calculate_sha256(file_path),
"extension": file_path.suffix,
"created": stat.st_ctime,
- "modified": stat.st_mtime
+ "modified": stat.st_mtime,
}
|