""" Document Translation API FastAPI application for translating complex documents while preserving formatting SaaS-ready with rate limiting, validation, and robust error handling Story 3.5: API Versioning - All endpoints under /api/v1/ prefix Story 3.6: Documentation OpenAPI (Swagger + ReDoc) """ # Story 6.6: Fail-fast on missing required env before any other imports (NFR10) import sys from config import config _missing = config.validate_required_env() if _missing: msg = ( "Missing required env: " + ", ".join(_missing) + ". Set them in .env or environment. See .env.example." ) print(msg, file=sys.stderr) sys.exit(1) from fastapi import ( FastAPI, Request, ) from fastapi.openapi.utils import get_openapi from starlette.exceptions import HTTPException as StarletteHTTPException from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.exceptions import RequestValidationError from contextlib import asynccontextmanager from pathlib import Path from translators import ( excel_translator, word_translator, pptx_translator, ExcelProcessorError, WordProcessorError, PptxProcessorError, ) from utils import handle_translation_error, DocumentProcessingError from services.providers.fallback import AllProvidersFailedError from services.translation_service import _translation_cache from routes.api_v1_router import router as api_v1_router from middleware.rate_limiting import ( RateLimitMiddleware, RateLimitManager, RateLimitConfig, ) from middleware.security import ( SecurityHeadersMiddleware, RequestLoggingMiddleware, ) from middleware.error_handler import ErrorHandlingMiddleware, format_error_response from middleware.cleanup import ( MemoryMonitor, HealthChecker, create_cleanup_manager, ) from middleware.validation import ValidationError from utils.exceptions import ( TranslationError, UnsupportedFileTypeError, FileSizeLimitExceededError, LanguageNotSupportedError, DocumentProcessingError as UtilsDocumentProcessingError, ) from core.logging import configure_logging, get_logger # Configure structlog-based logging once at startup (single source of truth: config). _json_logs = ( config.LOG_FORMAT.lower() == "json" or config.ENV == "production" ) configure_logging( json_logs=_json_logs, log_level=config.LOG_LEVEL, ) logger = get_logger(__name__) rate_limit_config = RateLimitConfig( requests_per_minute=config.RATE_LIMIT_PER_MINUTE, requests_per_hour=config.RATE_LIMIT_PER_HOUR, translations_per_minute=config.TRANSLATIONS_PER_MINUTE, translations_per_hour=config.TRANSLATIONS_PER_HOUR, max_concurrent_translations=config.MAX_CONCURRENT_TRANSLATIONS, ) rate_limit_manager = RateLimitManager(rate_limit_config) cleanup_manager = create_cleanup_manager(config) memory_monitor = MemoryMonitor(max_memory_percent=config.MAX_MEMORY_PERCENT) health_checker = HealthChecker(cleanup_manager, memory_monitor) @asynccontextmanager async def lifespan(app: FastAPI): """Handle startup and shutdown events""" logger.info("Starting Document Translation API...") config.ensure_directories() try: from database.connection import init_db, check_db_connection await init_db() if await check_db_connection(): logger.info("Database connection verified") else: logger.warning("Database connection check failed") except Exception as e: logger.warning(f"Database initialization skipped: {e}") await cleanup_manager.start() logger.info("API ready to accept requests") yield logger.info("Shutting down...") await cleanup_manager.stop() logger.info("Cleanup completed") # OpenAPI Tags Metadata OPENAPI_TAGS = [ { "name": "Translation", "description": "Endpoints de traduction de documents. Upload, suivi et téléchargement des traductions.", }, { "name": "Authentication", "description": "Authentification utilisateur via JWT. Inscription, connexion, déconnexion et rafraîchissement des tokens.", }, { "name": "API Keys", "description": "Gestion des clés API pour l'automatisation (utilisateurs Pro uniquement). Génération, liste et révocation.", }, { "name": "Admin", "description": "Endpoints d'administration. Dashboard, gestion utilisateurs, statistiques et configuration.", }, { "name": "Health", "description": "Endpoints de santé pour monitoring et probes Kubernetes.", }, { "name": "Legacy", "description": "Endpoints utilitaires et de compatibilité. Langues, métriques, configuration Ollama.", }, ] def custom_openapi(): """Generate custom OpenAPI schema with comprehensive documentation.""" if app.openapi_schema: return app.openapi_schema openapi_schema = get_openapi( title="Office Translator API", version="1.0.0", description=""" API de traduction de documents Office avec préservation parfaite du format. ## 🔐 Authentification L'API supporte deux méthodes d'authentification: ### 1. JWT (Web Dashboard & Admin) Utilisé pour l'interface web et le dashboard admin. **Obtenir un token:** ``` POST /api/v1/auth/login { "email": "user@example.com", "password": "password123" } ``` **Utiliser le token:** ``` Authorization: Bearer eyJhbGciOiJIUzI1NiIs... ``` **Détails:** - Access token expire en 15 minutes - Refresh token expire en 7 jours - Utilisez `/api/v1/auth/refresh` pour renouveler l'access token ### 2. API Key (Automation) Utilisé pour l'automatisation et l'intégration (Pro users only). **Obtenir une clé:** ``` POST /api/v1/api-keys Authorization: Bearer ``` **Utiliser la clé:** ``` X-API-Key: sk_live_abc123def456... ``` **Détails:** - Clé statique, pas d'expiration - Peut être révoquée à tout moment - Uniquement pour utilisateurs Pro ## 📄 Endpoints Principaux ### Translation - `POST /api/v1/translate` - Traduire un document - `GET /api/v1/translations/{id}` - Vérifier le statut - `GET /api/v1/download/{id}` - Télécharger le fichier traduit - `GET /api/v1/languages` - Langues supportées ### Authentication - `POST /api/v1/auth/register` - Créer un compte - `POST /api/v1/auth/login` - Connexion - `POST /api/v1/auth/logout` - Déconnexion - `POST /api/v1/auth/refresh` - Renouveler le token ### API Keys (Pro) - `POST /api/v1/api-keys` - Générer une clé - `GET /api/v1/api-keys` - Lister les clés - `DELETE /api/v1/api-keys/{key_id}` - Révoquer une clé ### Admin - `POST /api/v1/admin/login` - Connexion admin - `GET /api/v1/admin/dashboard` - Dashboard admin - `GET /api/v1/admin/users` - Gestion utilisateurs - `PATCH /api/v1/admin/users/{user_id}` - Modifier tier utilisateur ## 📋 Format des Réponses ### Succès ```json { "data": { "id": "tr_abc123", "status": "processing", "file_name": "report.xlsx" }, "meta": { "rate_limit_remaining": 45 } } ``` ### Erreur ```json { "error": "INVALID_FORMAT", "message": "Format PDF non supporté. Formats acceptés: .xlsx, .docx, .pptx", "details": { "accepted_formats": [".xlsx", ".docx", ".pptx"] } } ``` ## ⚠️ Codes d'Erreur Courants | Code | HTTP | Description | |------|------|-------------| | `INVALID_FORMAT` | 400 | Format fichier non supporté | | `FILE_TOO_LARGE` | 413 | Fichier > 50 MB | | `QUOTA_EXCEEDED` | 429 | Limite quotidienne atteinte | | `UNAUTHORIZED` | 401 | Token/API key invalide | | `FORBIDDEN` | 403 | Pas les droits requis | | `PRO_FEATURE_REQUIRED` | 403 | Feature réservée Pro | | `PROVIDER_ERROR` | 502 | Erreur provider externe | ## 📊 Rate Limiting - **Free**: 5 fichiers par jour - **Pro**: Illimité (fair use policy) - Rate limit info dans `meta.rate_limit_remaining` - Header `Retry-After` si quota dépassé ## 📁 Formats Supportés - **Excel**: .xlsx - **Word**: .docx - **PowerPoint**: .pptx - Taille max: 50 MB ## 🌐 Langues Supportées Utilisez `GET /api/v1/languages` pour obtenir la liste complète. Codes ISO 639-1 (ex: en, fr, de, es, it, pt, ja, zh, ar, ru...) ## 🔔 Webhooks (Pro) Spécifiez `webhook_url` dans votre requête pour recevoir une notification POST quand la traduction termine. Payload envoyé: ```json { "translation_id": "tr_abc123", "status": "completed", "timestamp": "2024-01-15T10:35:00Z", "file_name": "report.xlsx", "error_message": null } ``` """, routes=app.routes, tags=OPENAPI_TAGS, ) # Configuration des security schemes openapi_schema["components"]["securitySchemes"] = { "JWT": { "type": "http", "scheme": "bearer", "bearerFormat": "JWT", "description": "JWT token obtenu via /api/v1/auth/login. Format: Bearer " }, "APIKey": { "type": "apiKey", "in": "header", "name": "X-API-Key", "description": "Clé API obtenue via /api/v1/api-keys (utilisateurs Pro uniquement). Format: sk_live_..." } } # Contact and license info openapi_schema["info"]["contact"] = { "name": "Office Translator Support", "email": "support@office-translator.com", } openapi_schema["info"]["license"] = { "name": "Proprietary", } app.openapi_schema = openapi_schema return app.openapi_schema app = FastAPI( title="Office Translator API", version="1.0.0", description="API de traduction de documents Office (Excel, Word, PowerPoint) avec préservation du format.", lifespan=lifespan, docs_url="/docs", redoc_url="/redoc", openapi_url="/openapi.json", contact={ "name": "Office Translator Support", "email": "support@office-translator.com", }, license_info={ "name": "Proprietary", }, ) # Apply custom OpenAPI schema app.openapi = custom_openapi app.add_middleware(ErrorHandlingMiddleware) app.add_middleware(RequestLoggingMiddleware, log_body=False) app.add_middleware( SecurityHeadersMiddleware, config={"enable_hsts": config.ENABLE_HSTS}, ) app.add_middleware(RateLimitMiddleware, rate_limit_manager=rate_limit_manager) # Local dev frontends often run on 3000, 3001, etc.; .env may list only one port and break the other. _CORS_EXTRA_DEV_ORIGINS = [ "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1:3000", "http://127.0.0.1:3001", ] _cors_env = config.CORS_ORIGINS_RAW if _cors_env == "*" or not _cors_env: logger.warning( "CORS_ORIGINS not properly configured. Using permissive settings for development only!" ) allowed_origins = ["*"] else: allowed_origins = list(config.CORS_ORIGINS) if config.ENV != "production": for _o in _CORS_EXTRA_DEV_ORIGINS: if _o not in allowed_origins: allowed_origins.append(_o) logger.info( "CORS: non-production — localhost dev ports merged into allowed origins: %s", allowed_origins, ) else: logger.info("CORS configured for origins: %s", allowed_origins) app.add_middleware( CORSMiddleware, allow_origins=allowed_origins, allow_credentials=True if allowed_origins != ["*"] else False, allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"], allow_headers=["*"], expose_headers=[ "X-Request-ID", "X-Original-Filename", "X-File-Size-MB", "X-Target-Language", ], ) static_dir = Path(__file__).parent / "static" if static_dir.exists(): app.mount("/static", StaticFiles(directory=str(static_dir)), name="static") app.include_router(api_v1_router) @app.exception_handler(TranslationError) async def translation_error_handler(request: Request, exc: TranslationError): """Handle custom translation errors.""" request_id = getattr(request.state, "request_id", "unknown") status_code = 400 if isinstance(exc, FileSizeLimitExceededError): status_code = 413 elif isinstance(exc, UtilsDocumentProcessingError): status_code = 500 return format_error_response( status_code=status_code, message=exc.message, error_code=exc.code, details=exc.details, request_id=request_id, ) @app.exception_handler(StarletteHTTPException) async def http_exception_handler(request: Request, exc: StarletteHTTPException): """Handle standard FastAPI/Starlette HTTP exceptions (like 404, 405).""" request_id = getattr(request.state, "request_id", "unknown") headers = getattr(exc, "headers", None) detail = exc.detail if hasattr(exc, "detail") else "An error occurred" if isinstance(detail, dict): return format_error_response( status_code=exc.status_code, message=detail.get("message", "An error occurred"), error_code=detail.get("error"), request_id=request_id, headers=headers, ) return format_error_response( status_code=exc.status_code, message=str(detail), request_id=request_id, headers=headers, ) @app.exception_handler(RequestValidationError) async def validation_exception_handler(request: Request, exc: RequestValidationError): """Handle Pydantic validation errors (422) and convert them to structured 400.""" request_id = getattr(request.state, "request_id", "unknown") error_details = [] for err in exc.errors(): loc = ".".join(str(p) for p in err.get("loc", [])) msg = err.get("msg", "Validation error") error_details.append({"field": loc, "message": msg}) return format_error_response( status_code=400, message="Erreur de validation des donnees transmises.", error_code="INVALID_FORMAT", details={"validation_errors": error_details}, request_id=request_id, ) @app.exception_handler(ValidationError) async def custom_validation_error_handler(request: Request, exc: ValidationError): """Handle custom validation errors with user-friendly messages.""" request_id = getattr(request.state, "request_id", "unknown") return format_error_response( status_code=400, message=exc.message, error_code=exc.code, details=exc.details, request_id=request_id, ) @app.exception_handler(AllProvidersFailedError) async def all_providers_failed_handler(request: Request, exc: AllProvidersFailedError): """Return 502 with structured JSON when all providers in the chain fail.""" request_id = getattr(request.state, "request_id", "unknown") return format_error_response( status_code=502, message=exc.message, error_code=exc.code, details={ "providers_tried": exc.providers_tried, "error_count": len(exc.errors), }, request_id=request_id, ) @app.exception_handler(ExcelProcessorError) async def excel_processor_error_handler(request: Request, exc: ExcelProcessorError): """Handle Excel processing errors with structured JSON response.""" status_code = 400 if exc.code == ExcelProcessorError.EXCEL_WRITE_ERROR: status_code = 500 elif exc.code == ExcelProcessorError.EXCEL_TOO_LARGE: status_code = 413 request_id = getattr(request.state, "request_id", "unknown") return format_error_response( status_code=status_code, message=exc.message, error_code=exc.code, details=exc.details, request_id=request_id, ) @app.exception_handler(WordProcessorError) async def word_processor_error_handler(request: Request, exc: WordProcessorError): """Handle Word processing errors with structured JSON response.""" status_code = 400 if exc.code == WordProcessorError.DOCX_WRITE_ERROR: status_code = 500 elif exc.code == WordProcessorError.DOCX_TOO_LARGE: status_code = 413 request_id = getattr(request.state, "request_id", "unknown") return format_error_response( status_code=status_code, message=exc.message, error_code=exc.code, details=exc.details, request_id=request_id, ) @app.exception_handler(PptxProcessorError) async def pptx_processor_error_handler(request: Request, exc: PptxProcessorError): """Handle PowerPoint processing errors with structured JSON response.""" status_code = 400 if exc.code == PptxProcessorError.PPTX_WRITE_ERROR: status_code = 500 elif exc.code == PptxProcessorError.PPTX_TOO_LARGE: status_code = 413 request_id = getattr(request.state, "request_id", "unknown") return format_error_response( status_code=status_code, message=exc.message, error_code=exc.code, details=exc.details, request_id=request_id, ) @app.get("/") async def root(): """Root endpoint with API information""" return { "name": config.API_TITLE, "version": config.API_VERSION, "status": "operational", "docs": "/docs", "redoc": "/redoc", "api_base": "/api/v1", "supported_formats": list(config.SUPPORTED_EXTENSIONS), } @app.get("/health", tags=["Health"]) async def health_check(): """Health check endpoint with detailed system status (Kubernetes liveness probe)""" from core.redis import get_redis_url, ping_sync health_status = await health_checker.check_health() status_code = 200 if health_status.get("status") == "healthy" else 503 db_status = {"status": "not_configured"} try: from database.connection import check_db_connection if await check_db_connection(): db_status = {"status": "healthy"} else: db_status = {"status": "unhealthy"} except Exception as e: db_status = {"status": "error", "error": str(e)} redis_status = {"status": "not_configured"} if get_redis_url(): ok, err = ping_sync() redis_status = {"status": "healthy"} if ok else {"status": "unhealthy", "error": err or "ping failed"} return JSONResponse( status_code=status_code, content={ "status": health_status.get("status", "unknown"), "translation_service": config.TRANSLATION_SERVICE, "database": db_status, "redis": redis_status, "memory": health_status.get("memory", {}), "disk": health_status.get("disk", {}), "cleanup_service": health_status.get("cleanup_service", {}), "rate_limits": { "requests_per_minute": rate_limit_config.requests_per_minute, "translations_per_minute": rate_limit_config.translations_per_minute, }, "translation_cache": _translation_cache.stats(), }, ) @app.get("/ready", tags=["Health"]) async def readiness_check(): """Kubernetes readiness probe - check if app can serve traffic""" from core.redis import get_redis_url, ping_sync issues = [] try: from database.connection import check_db_connection, DATABASE_URL if DATABASE_URL: if not await check_db_connection(): issues.append("database_unavailable") except ImportError: pass except Exception as e: issues.append(f"database_error: {str(e)}") if get_redis_url(): ok, _ = ping_sync() if not ok: issues.append("redis_unavailable") if issues: return JSONResponse(status_code=503, content={"ready": False, "issues": issues}) return {"ready": True} if __name__ == "__main__": import uvicorn uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)