office_translator/routes/translate_routes.py

"""
API v1 Translate Endpoint (Story 2.10, 2.11, 2.12, 3.6)
POST /api/v1/translate - Submit document for translation
GET /api/v1/translations/{id} - Get translation status with real-time progress
GET /api/v1/download/{id} - Download translated file

Story 3.6: Documentation OpenAPI complète avec exemples et codes d'erreur
"""

import os
import re
import uuid
import time
import socket
import asyncio
import ipaddress
import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional, Any, Literal, Dict
from urllib.parse import urlparse, unquote

import aiofiles

JOB_ID_PATTERN = re.compile(r"^tr_[a-zA-Z0-9_\-]+$")

import httpx
from fastapi import (
    APIRouter,
    File,
    Form,
    Header,
    HTTPException,
    Request,
    UploadFile,
    Depends,
)
from fastapi.responses import JSONResponse, FileResponse
from starlette.background import BackgroundTask
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from pydantic import BaseModel, Field, field_validator
from typing_extensions import Annotated

from config import config
from models.subscription import PlanType
from middleware.tier_quota import tier_quota_service
from middleware.validation import FileValidator, ValidationError, LanguageValidator, webhook_validator
from middleware.api_key_auth import get_authenticated_user, get_user_from_api_key
from utils import file_handler

# Import models from schemas (Story 3.6 - DRY principle)
from schemas.translation import (
    TranslateResponseData,
    TranslateResponseMeta,
    TranslateResponse,
    TranslationStatusData,
    TranslationStatusMeta,
    TranslationStatusResponse,
)
from schemas.errors import ErrorResponse
from utils.file_handler import FileHandler
from services.progress_tracker import ProgressTracker
from services.storage_tracker import storage_tracker
from services.glossary_service import get_glossary_terms, validate_glossary_access, build_full_prompt
from services.prompt_service import get_prompt_content, validate_prompt_access
from utils.exceptions import GlossaryNotFoundError, PromptNotFoundError

logger = logging.getLogger(__name__)

router_v1 = APIRouter(prefix="/api/v1", tags=["Translation v1"])
security = HTTPBearer(auto_error=False)


MAX_FILE_SIZE_MB = 50
OFFICE_MAGIC_BYTES = b"PK\x03\x04"
ACCEPTED_EXTENSIONS = {".xlsx", ".docx", ".pptx"}


class TranslateEndpointError(Exception):
    """Exception for translate endpoint errors with structured error codes."""

    INVALID_FORMAT = "INVALID_FORMAT"
    CORRUPTED_FILE = "CORRUPTED_FILE"
    FILE_TOO_LARGE = "FILE_TOO_LARGE"
    QUOTA_EXCEEDED = "QUOTA_EXCEEDED"
    URL_DOWNLOAD_FAILED = "URL_DOWNLOAD_FAILED"
    URL_UNREACHABLE = "URL_UNREACHABLE"
    UNAUTHORIZED = "UNAUTHORIZED"
    MISSING_FILE = "MISSING_FILE"
    PRO_FEATURE_REQUIRED = "PRO_FEATURE_REQUIRED"

    ERROR_MESSAGES = {
        INVALID_FORMAT: "Format de fichier non supporte. Formats acceptes : .xlsx, .docx, .pptx",
        CORRUPTED_FILE: "Le fichier semble corrompu ou n'est pas un document Office valide.",
        FILE_TOO_LARGE: f"Le fichier est trop volumineux (max {MAX_FILE_SIZE_MB} Mo).",
        QUOTA_EXCEEDED: "Limite quotidienne atteinte.",
        URL_DOWNLOAD_FAILED: "Impossible de telecharger le fichier depuis l'URL.",
        URL_UNREACHABLE: "URL inaccessible.",
        UNAUTHORIZED: "Authentification requise.",
        MISSING_FILE: "Fichier ou URL requis.",
        PRO_FEATURE_REQUIRED: "Cette fonctionnalite necessite un abonnement Pro.",
    }

    def __init__(
        self, code: str, message: Optional[str] = None, details: Optional[dict] = None
    ):
        self.code = code
        self.message = message or self.ERROR_MESSAGES.get(code, "Erreur inconnue")
        self.details = details or {}
        super().__init__(self.message)

    def to_dict(self) -> dict:
        result = {
            "error": self.code,
            "message": self.message,
        }
        if self.details:
            result["details"] = self.details
        return result


# NOTE: Response models are now imported from schemas/ module (DRY principle)
# TranslateResponseData, TranslateResponseMeta, TranslateResponse,
# TranslationStatusData, TranslationStatusMeta, TranslationStatusResponse, ErrorResponse

file_validator = FileValidator(
    max_size_mb=MAX_FILE_SIZE_MB, allowed_extensions=ACCEPTED_EXTENSIONS
)
file_handler_util = FileHandler()


def _tier_for_quota(plan) -> str:
    """Map plan to quota tier: pro (and equivalent) = unlimited, else free."""
    if plan in (PlanType.PRO, PlanType.BUSINESS, PlanType.ENTERPRISE):
        return "pro"
    return "free"


def _next_midnight_utc() -> datetime:
    """Get next midnight UTC."""
    now = datetime.now(timezone.utc)
    from datetime import timedelta

    tomorrow = now.date() + timedelta(days=1)
    return datetime(tomorrow.year, tomorrow.month, tomorrow.day, tzinfo=timezone.utc)


def _seconds_until_midnight_utc() -> int:
    """Seconds until next midnight UTC."""
    now = datetime.now(timezone.utc)
    next_mid = _next_midnight_utc()
    return max(0, int((next_mid - now).total_seconds()))


async def validate_file_content(content: bytes, extension: str) -> None:
    """Validate file content by checking magic bytes."""
    if len(content) < 4:
        raise TranslateEndpointError(
            code=TranslateEndpointError.CORRUPTED_FILE,
            message="Le fichier est trop petit pour etre un document Office valide.",
            details={"reason": "File is too small"},
        )

    header = content[:4]
    if header != OFFICE_MAGIC_BYTES:
        raise TranslateEndpointError(
            code=TranslateEndpointError.CORRUPTED_FILE,
            message="Le fichier n'est pas un document Office valide ou est corrompu.",
            details={
                "accepted_formats": list(ACCEPTED_EXTENSIONS),
                "hint": "Les fichiers .xlsx, .docx, .pptx doivent etre des archives ZIP valides.",
            },
        )


def _parse_content_disposition(content_disp: str) -> Optional[str]:
    """Parse filename from Content-Disposition header (RFC 5987 compliant)."""
    import re

    for part in content_disp.split(";"):
        part = part.strip()
        if part.lower().startswith("filename*="):
            match = re.match(r"filename\*=([^']+)'([^']*)'(.+)", part, re.IGNORECASE)
            if match:
                from urllib.parse import unquote

                return unquote(match.group(3))
        if part.lower().startswith("filename="):
            filename = part.split("=", 1)[1].strip().strip('"').strip("'")
            if filename:
                return filename
    return None


def _is_ssrf_risk(hostname: str) -> bool:
    """Return True if hostname resolves to a private/reserved IP (SSRF prevention).

    Blocks: loopback, private, link-local, reserved, multicast ranges.
    Also blocks DNS resolution failures to avoid bypass via non-resolvable names.
    """
    try:
        ip_str = socket.gethostbyname(hostname)
        addr = ipaddress.ip_address(ip_str)
        return (
            addr.is_loopback
            or addr.is_private
            or addr.is_link_local
            or addr.is_reserved
            or addr.is_multicast
            or addr.is_unspecified
        )
    except Exception:
        return True


async def download_from_url(url: str, timeout: int = 30) -> tuple[Path, str]:
    """Download file from URL using streaming and return (temp_path, filename).

    Uses HTTP streaming to avoid loading entire file in memory.
    Validates file extension and magic bytes for security.
    """
    temp_path = None

    parsed_url = urlparse(url)
    if parsed_url.scheme not in ("http", "https"):
        raise TranslateEndpointError(
            code=TranslateEndpointError.URL_UNREACHABLE,
            message="Seules les URLs HTTP/HTTPS sont acceptees.",
            details={"scheme": parsed_url.scheme or "none"},
        )

    hostname = parsed_url.hostname or ""
    if not hostname or _is_ssrf_risk(hostname):
        raise TranslateEndpointError(
            code=TranslateEndpointError.URL_UNREACHABLE,
            message="L'URL pointe vers une adresse interdite (adresse privee ou interne).",
            details={"reason": "ssrf_blocked"},
        )

    try:
        async with httpx.AsyncClient(
            timeout=timeout, follow_redirects=True, max_redirects=10
        ) as client:
            async with client.stream("GET", url) as response:
                if response.status_code != 200:
                    raise TranslateEndpointError(
                        code=TranslateEndpointError.URL_UNREACHABLE,
                        message=f"URL inaccessible (HTTP {response.status_code})",
                        details={"status_code": response.status_code, "url": url[:100]},
                    )

                content_length = response.headers.get("content-length")
                if content_length:
                    try:
                        file_size = int(content_length)
                        max_size_bytes = MAX_FILE_SIZE_MB * 1024 * 1024
                        if file_size > max_size_bytes:
                            raise TranslateEndpointError(
                                code=TranslateEndpointError.FILE_TOO_LARGE,
                                message=f"Le fichier est trop volumineux ({round(file_size / (1024 * 1024), 2)} Mo, max {MAX_FILE_SIZE_MB} Mo).",
                                details={
                                    "size_mb": round(file_size / (1024 * 1024), 2),
                                    "max_mb": MAX_FILE_SIZE_MB,
                                },
                            )
                    except ValueError:
                        pass

                filename = None
                content_disp = response.headers.get("content-disposition", "")
                if content_disp:
                    filename = _parse_content_disposition(content_disp)

                if not filename:
                    filename = unquote(Path(parsed_url.path).name) or "downloaded_file"

                extension = Path(filename).suffix.lower()
                if extension not in ACCEPTED_EXTENSIONS:
                    raise TranslateEndpointError(
                        code=TranslateEndpointError.INVALID_FORMAT,
                        details={
                            "detected_extension": extension or "none",
                            "accepted_formats": list(ACCEPTED_EXTENSIONS),
                        },
                    )

                unique_id = str(uuid.uuid4())[:8]
                safe_filename = f"{unique_id}_{filename}"
                temp_path = config.UPLOAD_DIR / safe_filename

                temp_path.parent.mkdir(parents=True, exist_ok=True)

                max_size_bytes = MAX_FILE_SIZE_MB * 1024 * 1024
                downloaded_bytes = 0

                async with aiofiles.open(temp_path, "wb") as f:
                    async for chunk in response.aiter_bytes(chunk_size=65536):
                        downloaded_bytes += len(chunk)

                        if downloaded_bytes > max_size_bytes:
                            await f.close()
                            if temp_path.exists():
                                temp_path.unlink()
                            raise TranslateEndpointError(
                                code=TranslateEndpointError.FILE_TOO_LARGE,
                                details={
                                    "size_mb": round(
                                        downloaded_bytes / (1024 * 1024), 2
                                    ),
                                    "max_mb": MAX_FILE_SIZE_MB,
                                },
                            )

                        await f.write(chunk)

                async with aiofiles.open(temp_path, "rb") as f:
                    header = await f.read(4)
                await validate_file_content(header, extension)

                return temp_path, filename

    except httpx.TimeoutException:
        if temp_path and temp_path.exists():
            temp_path.unlink()
        raise TranslateEndpointError(
            code=TranslateEndpointError.URL_UNREACHABLE,
            message="Timeout lors du telechargement.",
            details={"timeout_seconds": timeout},
        )
    except httpx.RequestError as e:
        if temp_path and temp_path.exists():
            temp_path.unlink()
        raise TranslateEndpointError(
            code=TranslateEndpointError.URL_DOWNLOAD_FAILED,
            message=f"Erreur de telechargement: {str(e)}",
            details={"error": str(e)},
        )
    except TranslateEndpointError:
        if temp_path and temp_path.exists():
            temp_path.unlink()
        raise
    except Exception as e:
        if temp_path and temp_path.exists():
            temp_path.unlink()
        raise TranslateEndpointError(
            code=TranslateEndpointError.URL_DOWNLOAD_FAILED,
            message=f"Erreur inattendue lors du telechargement: {str(e)}",
            details={"error": str(e), "error_type": type(e).__name__},
        )


_translation_jobs: dict[str, dict] = {}
_JOB_TTL_SECONDS = 3600
_last_cleanup_ts: float = 0.0
_CLEANUP_INTERVAL_SECONDS = 300  # run cleanup every 5 minutes at most


def _cleanup_old_jobs() -> None:
    """Remove completed/failed jobs older than TTL to prevent memory leak.

    Throttled to run at most every _CLEANUP_INTERVAL_SECONDS to avoid
    iterating the full dict on every translation request.
    """
    global _last_cleanup_ts
    current_time = time.time()
    if current_time - _last_cleanup_ts < _CLEANUP_INTERVAL_SECONDS:
        return
    _last_cleanup_ts = current_time

    expired_job_ids = [
        job_id
        for job_id, job in _translation_jobs.items()
        if job.get("status") in ("completed", "failed")
        and (
            (ts := job.get("completed_at") or job.get("failed_at"))
            and _job_age_seconds(ts) > _JOB_TTL_SECONDS
        )
    ]

    for job_id in expired_job_ids:
        del _translation_jobs[job_id]
        logger.debug(f"Cleaned up expired job: {job_id}")


def _job_age_seconds(timestamp_str: str) -> float:
    """Return how many seconds ago a ISO timestamp was."""
    try:
        ts = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")).timestamp()
        return time.time() - ts
    except Exception:
        return 0.0


@router_v1.post(
    "/translate",
    response_model=TranslateResponse,
    responses={
        202: {"description": "Translation job accepted", "model": TranslateResponse},
        400: {"description": "Invalid request", "model": ErrorResponse},
        401: {"description": "Unauthorized", "model": ErrorResponse},
        403: {"description": "Pro feature required", "model": ErrorResponse},
        413: {"description": "File too large", "model": ErrorResponse},
        429: {"description": "Quota exceeded", "model": ErrorResponse},
    },
    status_code=202,
)
async def translate_document_v1(
    request: Request,
    file: Optional[UploadFile] = File(
        None, description="Document file (.xlsx, .docx, .pptx)"
    ),
    file_url: Optional[str] = Form(None, description="URL to download file (Pro only)"),
    source_lang: str = Form(default="auto", description="Source language code"),
    target_lang: str = Form(..., description="Target language code"),
    mode: Literal["classic", "llm"] = Form(
        default="classic", description="Translation mode"
    ),
    provider: Optional[str] = Form(default=None, description="Provider override"),
    webhook_url: Optional[str] = Form(None, description="Webhook URL for notification"),
    glossary_id: Optional[str] = Form(None, description="Glossary ID (Pro only)"),
    custom_prompt: Optional[str] = Form(None, description="Custom prompt (Pro only)"),
    prompt_id: Optional[str] = Form(None, description="Prompt ID from saved prompts (Pro only)"),
    current_user: Optional[Any] = Depends(get_authenticated_user),
):
    """
    Submit a document for translation.

    **Authentication:**
    - JWT Bearer token in Authorization header (web users)
    - X-API-Key header (automation users)

    **File Input:**
    - `file`: Upload file directly (multipart/form-data)
    - `file_url`: URL to download file from (Pro feature)

    **Parameters:**
    - `source_lang`: Source language code (default: auto-detect)
    - `target_lang`: Target language code (required)
    - `mode`: Translation mode - "classic" or "llm" (default: classic)
    - `provider`: Provider override (google, deepl, ollama, openai, openrouter)
    - `webhook_url`: URL to receive POST notification when complete
    - `glossary_id`: Glossary ID for LLM translation (Pro only)
    - `custom_prompt`: Custom system prompt (Pro only)
    - `prompt_id`: Saved prompt ID to use (Pro only). Takes priority over custom_prompt.

    **Webhook Notification:**
    If `webhook_url` is provided, a POST request will be sent when translation completes.

    **Webhook Payload (Success):**
    ```json
    {
        "event_id": "evt_abc123def456xyz",
        "translation_id": "tr_abc123def456",
        "status": "completed",
        "timestamp": "2024-01-15T10:30:00Z",
        "file_name": "report.xlsx",
        "source_lang": "en",
        "target_lang": "fr",
        "error_message": null
    }
    ```

    **Webhook Payload (Failure):**
    ```json
    {
        "event_id": "evt_abc123def456xyz",
        "translation_id": "tr_abc123def456",
        "status": "failed",
        "timestamp": "2024-01-15T10:30:00Z",
        "file_name": "report.xlsx",
        "source_lang": "en",
        "target_lang": "fr",
        "error_message": "Provider unavailable: connection timeout"
    }
    ```

    **Webhook Fields:**
    - `event_id`: Unique identifier for webhook deduplication (format: evt_xxxxxxxxxxxxxxxx)
    - `translation_id`: The translation job ID
    - `status`: "completed" or "failed"
    - `timestamp`: ISO 8601 UTC timestamp
    - `file_name`: Original file name
    - `source_lang`: Source language code
    - `target_lang`: Target language code
    - `error_message`: Error description (null if successful)

    **Webhook Behavior:**
    - Timeout: 10 seconds
    - Fire & Forget: Translation succeeds even if webhook fails
    - Retries: None (implement retry logic on your server if needed)

    **Returns:**
    - HTTP 202 with job ID and status "processing"
    """
    request_id = getattr(request.state, "request_id", str(uuid.uuid4())[:8])

    try:
        if not file and not file_url:
            raise TranslateEndpointError(
                code=TranslateEndpointError.MISSING_FILE,
                details={"hint": "Provide either 'file' or 'file_url' parameter"},
            )

        tier = "free"
        user_id = None
        if current_user:
            tier = _tier_for_quota(current_user.plan)
            user_id = current_user.id

        if file_url:
            if tier == "free":
                raise TranslateEndpointError(
                    code=TranslateEndpointError.PRO_FEATURE_REQUIRED,
                    message="L'ingestion par URL est reservee aux utilisateurs Pro.",
                    details={"feature": "file_url", "tier": tier},
                )

        # Story 3.12: Include prompt_id in Pro feature check
        if (glossary_id or custom_prompt or prompt_id) and tier == "free":
            raise TranslateEndpointError(
                code=TranslateEndpointError.PRO_FEATURE_REQUIRED,
                message="Les glossaires et prompts personnalises sont reserves aux utilisateurs Pro.",
                details={"feature": "glossary_id, custom_prompt, or prompt_id", "tier": tier},
            )

        # Story 3.10: Validate glossary access before creating the job
        if glossary_id and user_id:
            try:
                validate_glossary_access(glossary_id, user_id)
            except GlossaryNotFoundError as e:
                raise TranslateEndpointError(
                    code="GLOSSARY_NOT_FOUND",
                    message=str(e),
                    details={"glossary_id": glossary_id}
                )

        # Story 3.12: Validate prompt access before creating the job
        if prompt_id and user_id:
            try:
                validate_prompt_access(prompt_id, user_id)
            except PromptNotFoundError as e:
                raise TranslateEndpointError(
                    code="PROMPT_NOT_FOUND",
                    message=str(e),
                    details={**e.details, "prompt_id": prompt_id} if e.details else {"prompt_id": prompt_id}
                )

        if webhook_url:
            is_valid, error_msg, error_details = webhook_validator.validate(webhook_url)
            if not is_valid:
                raise TranslateEndpointError(
                    code="INVALID_WEBHOOK_URL",
                    message=error_msg,
                    details=error_details,
                )

        if current_user:
            quota = await tier_quota_service.check_quota(user_id, tier)
            if not quota.allowed:
                retry_after = _seconds_until_midnight_utc()
                raise HTTPException(
                    status_code=429,
                    detail={
                        "error": "QUOTA_EXCEEDED",
                        "message": f"Limite quotidienne atteinte ({quota.current_usage}/{quota.limit} fichiers). Reessayez apres minuit UTC.",
                        "details": {
                            "current_usage": quota.current_usage,
                            "limit": quota.limit,
                            "tier": tier,
                            "reset_at": quota.reset_at_utc.isoformat(),
                        },
                    },
                    headers={"Retry-After": str(retry_after)},
                )
            rate_limit_remaining = quota.remaining
        else:
            rate_limit_remaining = -1

        try:
            LanguageValidator.validate(target_lang)
        except ValidationError as e:
            raise TranslateEndpointError(
                code="INVALID_FORMAT",
                message=f"Code langue cible invalide: {target_lang}",
                details={"field": "target_lang"},
            )

        if source_lang and source_lang != "auto":
            try:
                LanguageValidator.validate(source_lang)
            except ValidationError:
                raise TranslateEndpointError(
                    code="INVALID_FORMAT",
                    message=f"Code langue source invalide: {source_lang}",
                    details={"field": "source_lang"},
                )

        input_path = None
        original_filename = None
        file_extension = None
        file_size = 0
        file_hash = None

        if file:
            validation_result = await file_validator.validate_async(file)
            if not validation_result.is_valid:
                error_msg = "; ".join(validation_result.errors)

                # Use structured error codes from validator
                if validation_result.error_code == "file_too_large":
                    raise TranslateEndpointError(
                        code=TranslateEndpointError.FILE_TOO_LARGE,
                        message=error_msg,
                        details={
                            "errors": validation_result.errors,
                            "max_size_mb": MAX_FILE_SIZE_MB,
                        },
                    )
                elif validation_result.error_code == "invalid_file_content":
                    raise TranslateEndpointError(
                        code=TranslateEndpointError.CORRUPTED_FILE,
                        message=error_msg,
                        details={"errors": validation_result.errors},
                    )
                else:
                    raise TranslateEndpointError(
                        code=TranslateEndpointError.INVALID_FORMAT,
                        message=error_msg,
                        details={"errors": validation_result.errors},
                    )

            original_filename = file.filename
            file_extension = validation_result.data.get("extension")
            file_size = validation_result.data.get("size_bytes", 0)

            input_filename = file_handler_util.generate_unique_filename(
                file.filename, "input"
            )
            input_path = config.UPLOAD_DIR / input_filename
            await file_handler_util.save_upload_file(file, input_path)

            file_hash = file_handler_util.calculate_sha256(input_path)
            if file_hash is None:
                file_handler_util.cleanup_file(input_path)
                raise TranslateEndpointError(
                    code=TranslateEndpointError.CORRUPTED_FILE,
                    message="Impossible de calculer le hash du fichier. Fichier potentiellement corrompu.",
                    details={"error": "sha256_calculation_failed"},
                )

        elif file_url:
            input_path, original_filename = await download_from_url(file_url)
            file_extension = Path(original_filename).suffix.lower()
            file_size = input_path.stat().st_size
            file_hash = file_handler_util.calculate_sha256(input_path)
            if file_hash is None:
                file_handler_util.cleanup_file(input_path)
                raise TranslateEndpointError(
                    code=TranslateEndpointError.CORRUPTED_FILE,
                    message="Impossible de calculer le hash du fichier telecharge.",
                    details={"error": "sha256_calculation_failed"},
                )

        job_id = f"tr_{uuid.uuid4().hex[:12]}"

        # Track file metadata in Redis with TTL
        await storage_tracker.track_file(
            job_id=job_id,
            metadata={
                "original_filename": original_filename,
                "file_size": file_size,
                "file_hash": file_hash,
                "input_path": str(input_path),
                "user_id": str(user_id) if user_id else None,
                "timestamp": datetime.now(timezone.utc).isoformat(),
            },
        )

        _cleanup_old_jobs()

        _translation_jobs[job_id] = {
            "id": job_id,
            "status": "queued",
            "progress_percent": 0,
            "current_step": "Initializing",
            "total_items": 0,
            "processed_items": 0,
            "error_message": None,
            "file_name": original_filename,
            "source_lang": source_lang,
            "target_lang": target_lang,
            "created_at": datetime.now(timezone.utc).isoformat(),
            "user_id": user_id,
            "input_path": str(input_path),
            "file_extension": file_extension,
            "provider": provider or mode,
            "webhook_url": webhook_url,
            "custom_prompt": custom_prompt,
            "glossary_id": glossary_id,
            "prompt_id": prompt_id,  # Story 3.12: Store prompt_id
        }

        provider_to_use = provider or ("openrouter" if mode == "llm" else "google")

        asyncio.create_task(
            _run_translation_job(
                job_id=job_id,
                input_path=input_path,
                file_extension=file_extension,
                target_lang=target_lang,
                source_lang=source_lang,
                provider=provider_to_use,
                user_id=user_id,
                custom_prompt=custom_prompt,
                glossary_id=glossary_id,
                prompt_id=prompt_id,  # Story 3.12: Pass prompt_id
                webhook_url=webhook_url,
            )
        )

        logger.info(
            f"[{request_id}] Created translation job {job_id} for {original_filename}"
        )

        return JSONResponse(
            status_code=202,
            content={
                "data": {
                    "id": job_id,
                    "status": "processing",
                    "file_name": original_filename,
                    "source_lang": source_lang,
                    "target_lang": target_lang,
                },
                "meta": {
                    "rate_limit_remaining": rate_limit_remaining,
                    "estimated_time_seconds": 15,
                },
            },
        )

    except TranslateEndpointError as e:
        status_code = 400
        if e.code == TranslateEndpointError.FILE_TOO_LARGE:
            status_code = 413
        elif e.code == TranslateEndpointError.UNAUTHORIZED:
            status_code = 401
        elif e.code == TranslateEndpointError.PRO_FEATURE_REQUIRED:
            status_code = 403

        return JSONResponse(
            status_code=status_code,
            content=e.to_dict(),
        )
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"[{request_id}] Unexpected error: {e}")
        return JSONResponse(
            status_code=400,
            content={
                "error": "PROCESSING_ERROR",
                "message": "Erreur lors du traitement de la requete.",
                "details": {"error_type": type(e).__name__},
            },
        )


async def _run_translation_job(
    job_id: str,
    input_path: Path,
    file_extension: str,
    target_lang: str,
    source_lang: str,
    provider: str,
    user_id: Optional[str],
    custom_prompt: Optional[str],
    glossary_id: Optional[str],
    prompt_id: Optional[str] = None,  # Story 3.12: Add prompt_id parameter
    webhook_url: Optional[str] = None,
) -> None:
    """
    Run translation job in background with progress tracking.

    Args:
        job_id: Unique job identifier
        input_path: Path to input file
        file_extension: File extension (.xlsx, .docx, .pptx)
        target_lang: Target language code
        source_lang: Source language code
        provider: Translation provider name
        user_id: Optional user ID for quota tracking
        custom_prompt: Optional custom prompt text (Pro only)
        glossary_id: Optional glossary ID for LLM translation (Pro only)
        prompt_id: Optional saved prompt ID - takes priority over custom_prompt (Pro only, Story 3.12)
        webhook_url: Optional webhook URL for completion notification
    """
    job = _translation_jobs.get(job_id)
    if not job:
        return

    tracker = ProgressTracker(job_id, _translation_jobs)

    try:
        job["status"] = "processing"
        tracker.update(10, "Validating file")

        output_filename = file_handler_util.generate_unique_filename(
            input_path.name.replace("input_", "translated_"), "translated"
        )
        output_path = config.OUTPUT_DIR / output_filename

        from translators import excel_translator, word_translator, pptx_translator
        from services.translation_service import (
            OpenRouterTranslationProvider,
            OllamaTranslationProvider,
            translation_service,
        )
        from routes.admin_routes import load_settings as _load_admin_settings

        _admin_cfg = _load_admin_settings()

        # Helper: prefer value from admin settings JSON, fall back to env var
        def _cfg(admin_val: str | None, env_var: str, default: str = "") -> str:
            return (admin_val or "").strip() or os.getenv(env_var, default)

        api_key = _cfg(_admin_cfg.openrouter.api_key, "OPENROUTER_API_KEY")
        model = _cfg(_admin_cfg.openrouter.model, "OPENROUTER_MODEL", "deepseek/deepseek-v3.2")

        # Story 3.10: Retrieve and format glossary terms for LLM prompt
        glossary_terms = None
        if glossary_id and user_id:
            try:
                glossary_terms = get_glossary_terms(glossary_id, user_id)
                logger.info(f"Job {job_id}: Loaded {len(glossary_terms)} glossary terms")
            except GlossaryNotFoundError as e:
                tracker.set_error(str(e))
                logger.error(f"Job {job_id}: Glossary error - {e}")
                return

        # Story 3.12: Retrieve prompt content if prompt_id provided
        # Priority: prompt_id > custom_prompt
        effective_prompt = None
        if prompt_id and user_id:
            try:
                effective_prompt = get_prompt_content(prompt_id, user_id)
                logger.info(f"Job {job_id}: Loaded prompt content from {prompt_id}")
            except PromptNotFoundError as e:
                tracker.set_error(str(e))
                logger.error(f"Job {job_id}: Prompt error - {e}")
                return
        elif custom_prompt:
            # Use custom_prompt if no prompt_id
            effective_prompt = custom_prompt

        # Build the full prompt combining effective prompt and glossary
        full_prompt = build_full_prompt(effective_prompt, glossary_terms)

        translation_provider = None
        _p = provider.lower()

        if _p in ("openrouter", "llm") and api_key:
            translation_provider = OpenRouterTranslationProvider(
                api_key, model, full_prompt
            )
        elif _p == "openrouter_premium":
            premium_key = _cfg(_admin_cfg.openrouter_premium.api_key, "OPENROUTER_API_KEY")
            premium_model = _cfg(_admin_cfg.openrouter_premium.model, "OPENROUTER_PREMIUM_MODEL", "anthropic/claude-3.5-haiku")
            if not premium_key:
                premium_key = api_key  # fall back to main openrouter key
            if premium_key:
                translation_provider = OpenRouterTranslationProvider(
                    premium_key, premium_model, full_prompt
                )
        elif _p == "openai":
            from services.translation_service import OpenAITranslationProvider
            openai_key = _cfg(_admin_cfg.openai.api_key, "OPENAI_API_KEY")
            openai_model = _cfg(_admin_cfg.openai.model, "OPENAI_MODEL", "gpt-4o-mini")
            if openai_key:
                translation_provider = OpenAITranslationProvider(
                    api_key=openai_key,
                    model=openai_model,
                    system_prompt=full_prompt,
                )
        elif _p == "deepl":
            deepl_key = _cfg(_admin_cfg.deepl.api_key, "DEEPL_API_KEY")
            if deepl_key:
                from services.translation_service import DeepLTranslationProvider
                translation_provider = DeepLTranslationProvider(deepl_key, full_prompt)
        elif _p == "zai":
            from services.translation_service import OpenAITranslationProvider as _OAI
            zai_key = _cfg(_admin_cfg.zai.api_key, "ZAI_API_KEY")
            zai_model = _cfg(_admin_cfg.zai.model, "ZAI_MODEL", "grok-2-1212")
            zai_url = _cfg(_admin_cfg.zai.base_url, "ZAI_BASE_URL", "https://api.x.ai/v1")
            if zai_key:
                translation_provider = _OAI(
                    api_key=zai_key,
                    model=zai_model,
                    base_url=zai_url,
                    system_prompt=full_prompt,
                )
        elif _p == "ollama":
            ollama_url = _cfg(_admin_cfg.ollama.base_url, "OLLAMA_BASE_URL", "http://localhost:11434")
            ollama_model = _cfg(_admin_cfg.ollama.model, "OLLAMA_MODEL", "llama3")
            translation_provider = OllamaTranslationProvider(
                ollama_url,
                ollama_model,
                ollama_model,
                full_prompt,
            )

        tracker.update(20, "Preparing translation")

        def progress_callback(progress_info: dict) -> None:
            """Callback for translator progress updates with standardized key handling."""
            current = progress_info.get(
                "current",
                progress_info.get(
                    "slide",
                    progress_info.get(
                        "sheet",
                        progress_info.get("paragraph", progress_info.get("element", 1)),
                    ),
                ),
            )
            total = progress_info.get(
                "total",
                progress_info.get(
                    "total_slides",
                    progress_info.get(
                        "total_sheets", progress_info.get("total_paragraphs", 1)
                    ),
                ),
            )

            item_name = "Translating"
            if file_extension == ".pptx":
                item_name = "Translating slide"
            elif file_extension == ".xlsx":
                item_name = "Translating sheet"
            elif file_extension == ".docx":
                item_name = "Processing paragraph"

            # max_percent=95: the translator reaches current==total when its last
            # chunk finishes, but the file is not yet written.  set_completed()
            # pushes to 100% once the file is saved.
            tracker.update_item(current, total, item_name, max_percent=95)

        # Run synchronous translators in a thread pool to avoid blocking the event loop.
        # Without this, status polling requests from the frontend would time out during
        # translation, causing the "Connection lost" error and frozen progress bar.
        # Always call set_provider (even with None) to reset any previously-set
        # provider on the singleton translator instances between jobs.
        if file_extension == ".xlsx":
            excel_translator.set_provider(translation_provider)
            await asyncio.to_thread(
                excel_translator.translate_file,
                input_path,
                output_path,
                target_lang,
                source_lang,
                progress_callback=progress_callback,
            )
        elif file_extension == ".docx":
            word_translator.set_provider(translation_provider)
            await asyncio.to_thread(
                word_translator.translate_file,
                input_path,
                output_path,
                target_lang,
                source_lang,
                progress_callback=progress_callback,
            )
        elif file_extension == ".pptx":
            pptx_translator.set_provider(translation_provider)
            await asyncio.to_thread(
                pptx_translator.translate_file,
                input_path,
                output_path,
                target_lang,
                source_lang,
                progress_callback=progress_callback,
            )
        else:
            raise ValueError(f"Unsupported file type: {file_extension}")

        if user_id:
            await tier_quota_service.increment_on_success(user_id)

        tracker.set_completed(str(output_path))
        logger.info(f"Job {job_id}: Completed successfully")

    except Exception as e:
        tracker.set_error(str(e))
        logger.error(f"Job {job_id}: Failed - {e}")

    finally:
        if webhook_url:
            try:
                # Generate unique event_id for webhook deduplication
                event_id = f"evt_{uuid.uuid4().hex[:16]}"

                async with httpx.AsyncClient(timeout=10) as client:
                    response = await client.post(
                        webhook_url,
                        json={
                            "event_id": event_id,
                            "translation_id": job_id,
                            "status": job["status"],
                            "timestamp": datetime.now(timezone.utc).isoformat(),
                            "file_name": job.get("file_name"),
                            "source_lang": job.get("source_lang"),
                            "target_lang": job.get("target_lang"),
                            "error_message": job.get("error_message"),
                        },
                    )

                    # Log successful webhook delivery
                    if response.is_success:
                        logger.info(
                            f"Job {job_id}: Webhook notification sent successfully to {webhook_url} "
                            f"(status={response.status_code}, event_id={event_id})"
                        )
                    else:
                        # Log non-2xx response with body for debugging
                        try:
                            response_body = await response.aread()
                            body_preview = response_body[:500].decode('utf-8', errors='replace')
                        except Exception:
                            body_preview = "<unable to read body>"
                        logger.warning(
                            f"Job {job_id}: Webhook returned non-success status "
                            f"(status={response.status_code}, url={webhook_url}, event_id={event_id}, "
                            f"response_body={body_preview})"
                        )

            except httpx.TimeoutException:
                logger.warning(
                    f"Job {job_id}: Webhook notification timed out after 10s (url={webhook_url}, event_id={event_id})"
                )
            except httpx.RequestError as e:
                logger.warning(
                    f"Job {job_id}: Webhook notification failed - {type(e).__name__}: {e} "
                    f"(url={webhook_url}, event_id={event_id})"
                )
            except Exception as e:
                logger.warning(
                    f"Job {job_id}: Unexpected webhook error - {type(e).__name__}: {e} (event_id={event_id})"
                )


@router_v1.get(
    "/translations/{job_id}",
    response_model=TranslationStatusResponse,
    responses={
        200: {"description": "Translation status", "model": TranslationStatusResponse},
        404: {"description": "Job not found", "model": ErrorResponse},
    },
)
async def get_translation_status(
    job_id: str,
    current_user: Optional[Any] = Depends(get_authenticated_user),
):
    """
    Get translation job status with real-time progress.

    Returns current status and progress of a translation job.

    **Status Values:**
    - `queued`: Job is waiting to be processed
    - `processing`: Job is actively being translated
    - `completed`: Translation finished successfully
    - `failed`: Translation encountered an error

    **Progress Fields:**
    - `progress_percent`: 0-100 indicating completion percentage
    - `current_step`: Human-readable description of current operation
    - `error_message`: Present only when status is "failed"

    **Example Response (Processing):**
    ```json
    {
      "data": {
        "id": "tr_abc123",
        "status": "processing",
        "progress_percent": 45,
        "current_step": "Translating slide 5/10",
        "file_name": "presentation.pptx",
        "source_lang": "en",
        "target_lang": "fr",
        "created_at": "2024-01-15T10:30:00Z"
      },
      "meta": {}
    }
    ```
    """
    job = _translation_jobs.get(job_id)

    if not job:
        return JSONResponse(
            status_code=404,
            content={
                "error": "NOT_FOUND",
                "message": "Job de traduction non trouve.",
                "details": {"job_id": job_id},
            },
        )

    response_data = {
        "id": job["id"],
        "status": job["status"],
        "progress_percent": job.get("progress_percent", 0),
        "current_step": job.get("current_step", "Unknown"),
        "file_name": job.get("file_name"),
        "source_lang": job.get("source_lang"),
        "target_lang": job.get("target_lang"),
        "created_at": job.get("created_at"),
    }

    estimated_remaining = None
    if job["status"] == "processing" and job.get("progress_percent", 0) > 0:
        try:
            created_at_str = job.get("created_at")
            if created_at_str:
                created_at = datetime.fromisoformat(
                    created_at_str.replace("Z", "+00:00")
                )
                elapsed_seconds = (
                    datetime.now(timezone.utc) - created_at
                ).total_seconds()
                progress_percent = job.get("progress_percent", 0)
                if progress_percent > 0:
                    total_estimated = elapsed_seconds / (progress_percent / 100)
                    estimated_remaining = max(1, int(total_estimated - elapsed_seconds))
        except Exception:
            pass

    if job["status"] == "completed":
        response_data["completed_at"] = job.get("completed_at")
    elif job["status"] == "failed":
        response_data["failed_at"] = job.get("failed_at")
        response_data["error_message"] = job.get("error_message")

    return {
        "data": response_data,
        "meta": {"estimated_remaining_seconds": estimated_remaining},
    }


@router_v1.get("/translate/health")
async def translate_health():
    """Health check for translation endpoint."""
    return {"status": "healthy", "endpoint": "/api/v1/translate"}


MIME_TYPES = {
    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
}


def _cleanup_files(input_path: Optional[str], output_path: Optional[str]) -> None:
    """Delete input and output files after download completes."""
    try:
        if output_path:
            out_path = Path(output_path)
            if out_path.exists():
                out_path.unlink()
                logger.info(f"Deleted output file: {output_path}")
    except Exception as e:
        logger.warning(f"Failed to delete output file {output_path}: {e}")

    try:
        if input_path:
            in_path = Path(input_path)
            if in_path.exists():
                in_path.unlink()
                logger.info(f"Deleted input file: {input_path}")
    except Exception as e:
        logger.warning(f"Failed to delete input file {input_path}: {e}")


@router_v1.get(
    "/download/{job_id}",
    responses={
        200: {
            "description": "Translated file download",
            "content": {"application/octet-stream": {}},
        },
        404: {"description": "File not found or not ready", "model": ErrorResponse},
    },
)
async def download_translated_file(
    job_id: str,
    current_user: Optional[Any] = Depends(get_authenticated_user),
):
    """
    Download a translated file.

    Returns the translated file as a binary download with proper Content-Type
    and Content-Disposition headers. The file is automatically deleted after
    the download completes.

    **Status Requirements:**
    - Job must exist and have status "completed"
    - Job must have an output_path field

    **Error Codes:**
    - `FILE_EXPIRED`: Job not found, expired, or no output file
    - `NOT_READY`: Job exists but translation is not complete

    **Response Headers:**
    - `Content-Type`: Appropriate MIME type for the file format
    - `Content-Disposition`: attachment with filename containing "_translated" suffix

    **Example:**
    ```
    GET /api/v1/download/tr_abc123def456
    → Returns file with Content-Disposition: attachment; filename="report_translated.xlsx"
    ```
    """
    if not JOB_ID_PATTERN.match(job_id):
        return JSONResponse(
            status_code=400,
            content={
                "error": "INVALID_JOB_ID",
                "message": "Format d'identifiant de travail invalide.",
                "details": {"job_id": job_id, "expected_format": "tr_xxxxxxxxxxxx"},
            },
        )

    job = _translation_jobs.get(job_id)

    if not job:
        return JSONResponse(
            status_code=404,
            content={
                "error": "FILE_EXPIRED",
                "message": "Le fichier traduit n'est plus disponible ou a expire.",
                "details": {"job_id": job_id, "status": "not_found"},
            },
        )

    job_user_id = job.get("user_id")
    if current_user and job_user_id and str(job_user_id) != str(current_user.id):
        return JSONResponse(
            status_code=403,
            content={
                "error": "ACCESS_DENIED",
                "message": "Vous n'avez pas acces a ce fichier.",
                "details": {"job_id": job_id},
            },
        )

    if job.get("status") != "completed":
        return JSONResponse(
            status_code=404,
            content={
                "error": "NOT_READY",
                "message": "La traduction est encore en cours.",
                "details": {
                    "job_id": job_id,
                    "status": job.get("status"),
                    "progress_percent": job.get("progress_percent", 0),
                },
            },
        )

    output_path_str = job.get("output_path")
    if not output_path_str:
        return JSONResponse(
            status_code=404,
            content={
                "error": "FILE_EXPIRED",
                "message": "Le fichier traduit n'est plus disponible ou a expire.",
                "details": {"job_id": job_id, "status": "no_output_path"},
            },
        )

    output_path = Path(output_path_str)
    if not output_path.exists():
        return JSONResponse(
            status_code=404,
            content={
                "error": "FILE_EXPIRED",
                "message": "Le fichier traduit n'est plus disponible ou a expire.",
                "details": {"job_id": job_id, "status": "file_deleted"},
            },
        )

    original_filename = job.get("file_name", "document")
    if original_filename:
        name_without_ext = Path(original_filename).stem
        extension = Path(original_filename).suffix.lower()
        download_filename = f"{name_without_ext}_translated{extension}"
    else:
        file_extension = job.get("file_extension", ".xlsx")
        download_filename = f"document_translated{file_extension}"
        extension = file_extension

    mime_type = MIME_TYPES.get(extension, "application/octet-stream")

    input_path_str = job.get("input_path")

    logger.info(f"Download requested for job {job_id}: {download_filename}")

    return FileResponse(
        path=str(output_path),
        media_type=mime_type,
        filename=download_filename,
        background=BackgroundTask(_cleanup_files, input_path_str, output_path_str),
    )