Files
office_translator/main.py
sepehr 26dfa08730
Some checks failed
Deploy to Homelab / Deploy Wordly to 192.168.1.151 (push) Has been cancelled
Deploy to Homelab / Deploy Monitoring (if configured) (push) Has been cancelled
feat: add Prometheus metrics + fix CI/CD health check port
- Add prometheus-client dependency
- Create middleware/metrics.py with PrometheusMiddleware
- Expose /metrics endpoint in Prometheus text format
- Track http_requests_total, translation_total, translation_duration_seconds,
  file_size_bytes
- Instrument translate routes with record_translation() and record_file_size()
- Fix deploy.yml health check: localhost:8000 -> localhost:8001 (Portainer conflict)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-15 14:33:10 +02:00

661 lines
20 KiB
Python

"""
Document Translation API
FastAPI application for translating complex documents while preserving formatting
SaaS-ready with rate limiting, validation, and robust error handling
Story 3.5: API Versioning - All endpoints under /api/v1/ prefix
Story 3.6: Documentation OpenAPI (Swagger + ReDoc)
"""
# Story 6.6: Fail-fast on missing required env before any other imports (NFR10)
import sys
from config import config
_missing = config.validate_required_env()
if _missing:
msg = (
"Missing required env: " + ", ".join(_missing)
+ ". Set them in .env or environment. See .env.example."
)
print(msg, file=sys.stderr)
sys.exit(1)
from fastapi import (
FastAPI,
Request,
)
from fastapi.openapi.utils import get_openapi
from starlette.exceptions import HTTPException as StarletteHTTPException
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.exceptions import RequestValidationError
from contextlib import asynccontextmanager
from pathlib import Path
from translators import (
excel_translator,
word_translator,
pptx_translator,
ExcelProcessorError,
WordProcessorError,
PptxProcessorError,
)
from utils import handle_translation_error, DocumentProcessingError
from services.providers.fallback import AllProvidersFailedError
from services.translation_service import _translation_cache
from routes.api_v1_router import router as api_v1_router
from middleware.rate_limiting import (
RateLimitMiddleware,
RateLimitManager,
RateLimitConfig,
)
from middleware.security import (
SecurityHeadersMiddleware,
RequestLoggingMiddleware,
)
from middleware.error_handler import ErrorHandlingMiddleware, format_error_response
from middleware.metrics import PrometheusMiddleware, get_metrics
from middleware.cleanup import (
MemoryMonitor,
HealthChecker,
create_cleanup_manager,
)
from middleware.validation import ValidationError
from utils.exceptions import (
TranslationError,
UnsupportedFileTypeError,
FileSizeLimitExceededError,
LanguageNotSupportedError,
DocumentProcessingError as UtilsDocumentProcessingError,
)
from core.logging import configure_logging, get_logger
# Configure structlog-based logging once at startup (single source of truth: config).
_json_logs = (
config.LOG_FORMAT.lower() == "json" or config.ENV == "production"
)
configure_logging(
json_logs=_json_logs,
log_level=config.LOG_LEVEL,
)
logger = get_logger(__name__)
rate_limit_config = RateLimitConfig(
requests_per_minute=config.RATE_LIMIT_PER_MINUTE,
requests_per_hour=config.RATE_LIMIT_PER_HOUR,
translations_per_minute=config.TRANSLATIONS_PER_MINUTE,
translations_per_hour=config.TRANSLATIONS_PER_HOUR,
max_concurrent_translations=config.MAX_CONCURRENT_TRANSLATIONS,
)
rate_limit_manager = RateLimitManager(rate_limit_config)
cleanup_manager = create_cleanup_manager(config)
memory_monitor = MemoryMonitor(max_memory_percent=config.MAX_MEMORY_PERCENT)
health_checker = HealthChecker(cleanup_manager, memory_monitor)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Handle startup and shutdown events"""
logger.info("Starting Document Translation API...")
config.ensure_directories()
try:
from database.connection import init_db, check_db_connection
await init_db()
if await check_db_connection():
logger.info("Database connection verified")
else:
logger.warning("Database connection check failed")
except Exception as e:
logger.warning(f"Database initialization skipped: {e}")
await cleanup_manager.start()
logger.info("API ready to accept requests")
yield
logger.info("Shutting down...")
await cleanup_manager.stop()
logger.info("Cleanup completed")
# OpenAPI Tags Metadata
OPENAPI_TAGS = [
{
"name": "Translation",
"description": "Endpoints de traduction de documents. Upload, suivi et téléchargement des traductions.",
},
{
"name": "Authentication",
"description": "Authentification utilisateur via JWT. Inscription, connexion, déconnexion et rafraîchissement des tokens.",
},
{
"name": "API Keys",
"description": "Gestion des clés API pour l'automatisation (utilisateurs Pro uniquement). Génération, liste et révocation.",
},
{
"name": "Admin",
"description": "Endpoints d'administration. Dashboard, gestion utilisateurs, statistiques et configuration.",
},
{
"name": "Health",
"description": "Endpoints de santé pour monitoring et probes Kubernetes.",
},
{
"name": "Legacy",
"description": "Endpoints utilitaires et de compatibilité. Langues, métriques, configuration Ollama.",
},
]
def custom_openapi():
"""Generate custom OpenAPI schema with comprehensive documentation."""
if app.openapi_schema:
return app.openapi_schema
openapi_schema = get_openapi(
title="Office Translator API",
version="1.0.0",
description="""
API de traduction de documents Office avec préservation parfaite du format.
## 🔐 Authentification
L'API supporte deux méthodes d'authentification:
### 1. JWT (Web Dashboard & Admin)
Utilisé pour l'interface web et le dashboard admin.
**Obtenir un token:**
```
POST /api/v1/auth/login
{
"email": "user@example.com",
"password": "password123"
}
```
**Utiliser le token:**
```
Authorization: Bearer eyJhbGciOiJIUzI1NiIs...
```
**Détails:**
- Access token expire en 15 minutes
- Refresh token expire en 7 jours
- Utilisez `/api/v1/auth/refresh` pour renouveler l'access token
### 2. API Key (Automation)
Utilisé pour l'automatisation et l'intégration (Pro users only).
**Obtenir une clé:**
```
POST /api/v1/api-keys
Authorization: Bearer <jwt_token>
```
**Utiliser la clé:**
```
X-API-Key: sk_live_abc123def456...
```
**Détails:**
- Clé statique, pas d'expiration
- Peut être révoquée à tout moment
- Uniquement pour utilisateurs Pro
## 📄 Endpoints Principaux
### Translation
- `POST /api/v1/translate` - Traduire un document
- `GET /api/v1/translations/{id}` - Vérifier le statut
- `GET /api/v1/download/{id}` - Télécharger le fichier traduit
- `GET /api/v1/languages` - Langues supportées
### Authentication
- `POST /api/v1/auth/register` - Créer un compte
- `POST /api/v1/auth/login` - Connexion
- `POST /api/v1/auth/logout` - Déconnexion
- `POST /api/v1/auth/refresh` - Renouveler le token
### API Keys (Pro)
- `POST /api/v1/api-keys` - Générer une clé
- `GET /api/v1/api-keys` - Lister les clés
- `DELETE /api/v1/api-keys/{key_id}` - Révoquer une clé
### Admin
- `POST /api/v1/admin/login` - Connexion admin
- `GET /api/v1/admin/dashboard` - Dashboard admin
- `GET /api/v1/admin/users` - Gestion utilisateurs
- `PATCH /api/v1/admin/users/{user_id}` - Modifier tier utilisateur
## 📋 Format des Réponses
### Succès
```json
{
"data": {
"id": "tr_abc123",
"status": "processing",
"file_name": "report.xlsx"
},
"meta": {
"rate_limit_remaining": 45
}
}
```
### Erreur
```json
{
"error": "INVALID_FORMAT",
"message": "Format PDF non supporté. Formats acceptés: .xlsx, .docx, .pptx",
"details": {
"accepted_formats": [".xlsx", ".docx", ".pptx"]
}
}
```
## ⚠️ Codes d'Erreur Courants
| Code | HTTP | Description |
|------|------|-------------|
| `INVALID_FORMAT` | 400 | Format fichier non supporté |
| `FILE_TOO_LARGE` | 413 | Fichier > 50 MB |
| `QUOTA_EXCEEDED` | 429 | Limite quotidienne atteinte |
| `UNAUTHORIZED` | 401 | Token/API key invalide |
| `FORBIDDEN` | 403 | Pas les droits requis |
| `PRO_FEATURE_REQUIRED` | 403 | Feature réservée Pro |
| `PROVIDER_ERROR` | 502 | Erreur provider externe |
## 📊 Rate Limiting
- **Free**: 5 fichiers par jour
- **Pro**: Illimité (fair use policy)
- Rate limit info dans `meta.rate_limit_remaining`
- Header `Retry-After` si quota dépassé
## 📁 Formats Supportés
- **Excel**: .xlsx
- **Word**: .docx
- **PowerPoint**: .pptx
- Taille max: 50 MB
## 🌐 Langues Supportées
Utilisez `GET /api/v1/languages` pour obtenir la liste complète.
Codes ISO 639-1 (ex: en, fr, de, es, it, pt, ja, zh, ar, ru...)
## 🔔 Webhooks (Pro)
Spécifiez `webhook_url` dans votre requête pour recevoir une notification POST quand la traduction termine.
Payload envoyé:
```json
{
"translation_id": "tr_abc123",
"status": "completed",
"timestamp": "2024-01-15T10:35:00Z",
"file_name": "report.xlsx",
"error_message": null
}
```
""",
routes=app.routes,
tags=OPENAPI_TAGS,
)
# Configuration des security schemes
openapi_schema["components"]["securitySchemes"] = {
"JWT": {
"type": "http",
"scheme": "bearer",
"bearerFormat": "JWT",
"description": "JWT token obtenu via /api/v1/auth/login. Format: Bearer <token>"
},
"APIKey": {
"type": "apiKey",
"in": "header",
"name": "X-API-Key",
"description": "Clé API obtenue via /api/v1/api-keys (utilisateurs Pro uniquement). Format: sk_live_..."
}
}
# Contact and license info
openapi_schema["info"]["contact"] = {
"name": "Office Translator Support",
"email": "support@office-translator.com",
}
openapi_schema["info"]["license"] = {
"name": "Proprietary",
}
app.openapi_schema = openapi_schema
return app.openapi_schema
app = FastAPI(
title="Office Translator API",
version="1.0.0",
description="API de traduction de documents Office (Excel, Word, PowerPoint) avec préservation du format.",
lifespan=lifespan,
docs_url="/docs",
redoc_url="/redoc",
openapi_url="/openapi.json",
contact={
"name": "Office Translator Support",
"email": "support@office-translator.com",
},
license_info={
"name": "Proprietary",
},
)
# Apply custom OpenAPI schema
app.openapi = custom_openapi
app.add_middleware(ErrorHandlingMiddleware)
app.add_middleware(PrometheusMiddleware)
app.add_middleware(RequestLoggingMiddleware, log_body=False)
app.add_middleware(
SecurityHeadersMiddleware,
config={"enable_hsts": config.ENABLE_HSTS},
)
app.add_middleware(RateLimitMiddleware, rate_limit_manager=rate_limit_manager)
# Local dev frontends often run on 3000, 3001, etc.; .env may list only one port and break the other.
_CORS_EXTRA_DEV_ORIGINS = [
"http://localhost:3000",
"http://localhost:3001",
"http://127.0.0.1:3000",
"http://127.0.0.1:3001",
]
_cors_env = config.CORS_ORIGINS_RAW
if _cors_env == "*" or not _cors_env:
logger.warning(
"CORS_ORIGINS not properly configured. Using permissive settings for development only!"
)
allowed_origins = ["*"]
else:
allowed_origins = list(config.CORS_ORIGINS)
if config.ENV != "production":
for _o in _CORS_EXTRA_DEV_ORIGINS:
if _o not in allowed_origins:
allowed_origins.append(_o)
logger.info(
"CORS: non-production — localhost dev ports merged into allowed origins: %s",
allowed_origins,
)
else:
logger.info("CORS configured for origins: %s", allowed_origins)
app.add_middleware(
CORSMiddleware,
allow_origins=allowed_origins,
allow_credentials=True if allowed_origins != ["*"] else False,
allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
allow_headers=["*"],
expose_headers=[
"X-Request-ID",
"X-Original-Filename",
"X-File-Size-MB",
"X-Target-Language",
],
)
static_dir = Path(__file__).parent / "static"
if static_dir.exists():
app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
app.include_router(api_v1_router)
@app.exception_handler(TranslationError)
async def translation_error_handler(request: Request, exc: TranslationError):
"""Handle custom translation errors."""
request_id = getattr(request.state, "request_id", "unknown")
status_code = 400
if isinstance(exc, FileSizeLimitExceededError):
status_code = 413
elif isinstance(exc, UtilsDocumentProcessingError):
status_code = 500
return format_error_response(
status_code=status_code,
message=exc.message,
error_code=exc.code,
details=exc.details,
request_id=request_id,
)
@app.exception_handler(StarletteHTTPException)
async def http_exception_handler(request: Request, exc: StarletteHTTPException):
"""Handle standard FastAPI/Starlette HTTP exceptions (like 404, 405)."""
request_id = getattr(request.state, "request_id", "unknown")
headers = getattr(exc, "headers", None)
detail = exc.detail if hasattr(exc, "detail") else "An error occurred"
if isinstance(detail, dict):
return format_error_response(
status_code=exc.status_code,
message=detail.get("message", "An error occurred"),
error_code=detail.get("error"),
request_id=request_id,
headers=headers,
)
return format_error_response(
status_code=exc.status_code,
message=str(detail),
request_id=request_id,
headers=headers,
)
@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError):
"""Handle Pydantic validation errors (422) and convert them to structured 400."""
request_id = getattr(request.state, "request_id", "unknown")
error_details = []
for err in exc.errors():
loc = ".".join(str(p) for p in err.get("loc", []))
msg = err.get("msg", "Validation error")
error_details.append({"field": loc, "message": msg})
return format_error_response(
status_code=400,
message="Erreur de validation des donnees transmises.",
error_code="INVALID_FORMAT",
details={"validation_errors": error_details},
request_id=request_id,
)
@app.exception_handler(ValidationError)
async def custom_validation_error_handler(request: Request, exc: ValidationError):
"""Handle custom validation errors with user-friendly messages."""
request_id = getattr(request.state, "request_id", "unknown")
return format_error_response(
status_code=400,
message=exc.message,
error_code=exc.code,
details=exc.details,
request_id=request_id,
)
@app.exception_handler(AllProvidersFailedError)
async def all_providers_failed_handler(request: Request, exc: AllProvidersFailedError):
"""Return 502 with structured JSON when all providers in the chain fail."""
request_id = getattr(request.state, "request_id", "unknown")
return format_error_response(
status_code=502,
message=exc.message,
error_code=exc.code,
details={
"providers_tried": exc.providers_tried,
"error_count": len(exc.errors),
},
request_id=request_id,
)
@app.exception_handler(ExcelProcessorError)
async def excel_processor_error_handler(request: Request, exc: ExcelProcessorError):
"""Handle Excel processing errors with structured JSON response."""
status_code = 400
if exc.code == ExcelProcessorError.EXCEL_WRITE_ERROR:
status_code = 500
elif exc.code == ExcelProcessorError.EXCEL_TOO_LARGE:
status_code = 413
request_id = getattr(request.state, "request_id", "unknown")
return format_error_response(
status_code=status_code,
message=exc.message,
error_code=exc.code,
details=exc.details,
request_id=request_id,
)
@app.exception_handler(WordProcessorError)
async def word_processor_error_handler(request: Request, exc: WordProcessorError):
"""Handle Word processing errors with structured JSON response."""
status_code = 400
if exc.code == WordProcessorError.DOCX_WRITE_ERROR:
status_code = 500
elif exc.code == WordProcessorError.DOCX_TOO_LARGE:
status_code = 413
request_id = getattr(request.state, "request_id", "unknown")
return format_error_response(
status_code=status_code,
message=exc.message,
error_code=exc.code,
details=exc.details,
request_id=request_id,
)
@app.exception_handler(PptxProcessorError)
async def pptx_processor_error_handler(request: Request, exc: PptxProcessorError):
"""Handle PowerPoint processing errors with structured JSON response."""
status_code = 400
if exc.code == PptxProcessorError.PPTX_WRITE_ERROR:
status_code = 500
elif exc.code == PptxProcessorError.PPTX_TOO_LARGE:
status_code = 413
request_id = getattr(request.state, "request_id", "unknown")
return format_error_response(
status_code=status_code,
message=exc.message,
error_code=exc.code,
details=exc.details,
request_id=request_id,
)
@app.get("/")
async def root():
"""Root endpoint with API information"""
return {
"name": config.API_TITLE,
"version": config.API_VERSION,
"status": "operational",
"docs": "/docs",
"redoc": "/redoc",
"api_base": "/api/v1",
"supported_formats": list(config.SUPPORTED_EXTENSIONS),
}
@app.get("/metrics", tags=["Health"])
async def metrics_endpoint():
"""Prometheus metrics endpoint"""
return get_metrics()
@app.get("/health", tags=["Health"])
async def health_check():
"""Health check endpoint with detailed system status (Kubernetes liveness probe)"""
from core.redis import get_redis_url, ping_sync
health_status = await health_checker.check_health()
status_code = 200 if health_status.get("status") == "healthy" else 503
db_status = {"status": "not_configured"}
try:
from database.connection import check_db_connection
if await check_db_connection():
db_status = {"status": "healthy"}
else:
db_status = {"status": "unhealthy"}
except Exception as e:
db_status = {"status": "error", "error": str(e)}
redis_status = {"status": "not_configured"}
if get_redis_url():
ok, err = ping_sync()
redis_status = {"status": "healthy"} if ok else {"status": "unhealthy", "error": err or "ping failed"}
return JSONResponse(
status_code=status_code,
content={
"status": health_status.get("status", "unknown"),
"translation_service": config.TRANSLATION_SERVICE,
"database": db_status,
"redis": redis_status,
"memory": health_status.get("memory", {}),
"disk": health_status.get("disk", {}),
"cleanup_service": health_status.get("cleanup_service", {}),
"rate_limits": {
"requests_per_minute": rate_limit_config.requests_per_minute,
"translations_per_minute": rate_limit_config.translations_per_minute,
},
"translation_cache": _translation_cache.stats(),
},
)
@app.get("/ready", tags=["Health"])
async def readiness_check():
"""Kubernetes readiness probe - check if app can serve traffic"""
from core.redis import get_redis_url, ping_sync
issues = []
try:
from database.connection import check_db_connection, DATABASE_URL
if DATABASE_URL:
if not await check_db_connection():
issues.append("database_unavailable")
except ImportError:
pass
except Exception as e:
issues.append(f"database_error: {str(e)}")
if get_redis_url():
ok, _ = ping_sync()
if not ok:
issues.append("redis_unavailable")
if issues:
return JSONResponse(status_code=503, content={"ready": False, "issues": issues})
return {"ready": True}
if __name__ == "__main__":
import uvicorn
uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)