office_translator/main.py

"""
Document Translation API
FastAPI application for translating complex documents while preserving formatting
SaaS-ready with rate limiting, validation, and robust error handling
"""
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Request, Depends, Header
from fastapi.responses import FileResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.security import HTTPBasic, HTTPBasicCredentials
from pydantic import BaseModel
from contextlib import asynccontextmanager
from pathlib import Path
from typing import Optional
import asyncio
import logging
import os
import secrets
import hashlib
import time

from config import config
from translators import excel_translator, word_translator, pptx_translator
from utils import file_handler, handle_translation_error, DocumentProcessingError
from services.translation_service import _translation_cache

# Import auth routes
from routes.auth_routes import router as auth_router

# Import SaaS middleware
from middleware.rate_limiting import RateLimitMiddleware, RateLimitManager, RateLimitConfig
from middleware.security import SecurityHeadersMiddleware, RequestLoggingMiddleware, ErrorHandlingMiddleware
from middleware.cleanup import FileCleanupManager, MemoryMonitor, HealthChecker, create_cleanup_manager
from middleware.validation import FileValidator, LanguageValidator, ProviderValidator, InputSanitizer, ValidationError

# Configure structured logging
logging.basicConfig(
    level=getattr(logging, os.getenv("LOG_LEVEL", "INFO")),
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# ============== Admin Authentication ==============
ADMIN_USERNAME = os.getenv("ADMIN_USERNAME")
ADMIN_PASSWORD_HASH = os.getenv("ADMIN_PASSWORD_HASH")  # SHA256 hash of password (preferred)
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")  # Plain password (use hash in production!)
ADMIN_TOKEN_SECRET = os.getenv("ADMIN_TOKEN_SECRET", secrets.token_hex(32))

# Validate admin credentials are configured
if not ADMIN_USERNAME:
    logger.warning("⚠️  ADMIN_USERNAME not set - admin endpoints will be disabled")
if not ADMIN_PASSWORD_HASH and not ADMIN_PASSWORD:
    logger.warning("⚠️  ADMIN_PASSWORD/ADMIN_PASSWORD_HASH not set - admin endpoints will be disabled")

# Redis connection for sessions (fallback to in-memory if not available)
REDIS_URL = os.getenv("REDIS_URL", "")
_redis_client = None

def get_redis_client():
    """Get Redis client for session storage"""
    global _redis_client
    if _redis_client is None and REDIS_URL:
        try:
            import redis
            _redis_client = redis.from_url(REDIS_URL, decode_responses=True)
            _redis_client.ping()
            logger.info("✅ Connected to Redis for session storage")
        except Exception as e:
            logger.warning(f"⚠️  Redis connection failed: {e}. Using in-memory sessions.")
            _redis_client = False  # Mark as failed
    return _redis_client if _redis_client else None

# In-memory fallback for sessions (not recommended for production)
_memory_sessions: dict = {}

def hash_password(password: str) -> str:
    """Hash password with SHA256"""
    return hashlib.sha256(password.encode()).hexdigest()

def verify_admin_password(password: str) -> bool:
    """Verify admin password"""
    if not ADMIN_PASSWORD_HASH and not ADMIN_PASSWORD:
        return False  # No credentials configured
    if ADMIN_PASSWORD_HASH:
        return hash_password(password) == ADMIN_PASSWORD_HASH
    return password == ADMIN_PASSWORD

def _get_session_key(token: str) -> str:
    """Get Redis key for session token"""
    return f"admin_session:{token}"

def create_admin_token() -> str:
    """Create a new admin session token with Redis or memory fallback"""
    token = secrets.token_urlsafe(32)
    expiry = int(time.time()) + (24 * 60 * 60)  # 24 hours

    redis_client = get_redis_client()
    if redis_client:
        try:
            redis_client.setex(_get_session_key(token), 24 * 60 * 60, str(expiry))
        except Exception as e:
            logger.warning(f"Redis session save failed: {e}")
            _memory_sessions[token] = expiry
    else:
        _memory_sessions[token] = expiry

    return token

def verify_admin_token(token: str) -> bool:
    """Verify admin token is valid and not expired"""
    redis_client = get_redis_client()

    if redis_client:
        try:
            expiry = redis_client.get(_get_session_key(token))
            if expiry and int(expiry) > time.time():
                return True
            return False
        except Exception as e:
            logger.warning(f"Redis session check failed: {e}")

    # Fallback to memory
    if token not in _memory_sessions:
        return False
    if time.time() > _memory_sessions[token]:
        del _memory_sessions[token]
        return False
    return True

def delete_admin_token(token: str):
    """Delete an admin session token"""
    redis_client = get_redis_client()
    if redis_client:
        try:
            redis_client.delete(_get_session_key(token))
        except Exception:
            pass
    if token in _memory_sessions:
        del _memory_sessions[token]

async def require_admin(authorization: Optional[str] = Header(None)) -> bool:
    """Dependency to require admin authentication"""
    if not ADMIN_USERNAME or (not ADMIN_PASSWORD_HASH and not ADMIN_PASSWORD):
        raise HTTPException(status_code=503, detail="Admin authentication not configured")

    if not authorization:
        raise HTTPException(status_code=401, detail="Authorization header required")

    # Expect "Bearer <token>"
    parts = authorization.split(" ")
    if len(parts) != 2 or parts[0].lower() != "bearer":
        raise HTTPException(status_code=401, detail="Invalid authorization format. Use: Bearer <token>")

    token = parts[1]
    if not verify_admin_token(token):
        raise HTTPException(status_code=401, detail="Invalid or expired token")

    return True

# Initialize SaaS components
rate_limit_config = RateLimitConfig(
    requests_per_minute=int(os.getenv("RATE_LIMIT_PER_MINUTE", "30")),
    requests_per_hour=int(os.getenv("RATE_LIMIT_PER_HOUR", "200")),
    translations_per_minute=int(os.getenv("TRANSLATIONS_PER_MINUTE", "10")),
    translations_per_hour=int(os.getenv("TRANSLATIONS_PER_HOUR", "50")),
    max_concurrent_translations=int(os.getenv("MAX_CONCURRENT_TRANSLATIONS", "5")),
)
rate_limit_manager = RateLimitManager(rate_limit_config)

cleanup_manager = create_cleanup_manager(config)
memory_monitor = MemoryMonitor(max_memory_percent=float(os.getenv("MAX_MEMORY_PERCENT", "80")))
health_checker = HealthChecker(cleanup_manager, memory_monitor)

file_validator = FileValidator(
    max_size_mb=config.MAX_FILE_SIZE_MB,
    allowed_extensions=config.SUPPORTED_EXTENSIONS
)


def build_full_prompt(system_prompt: str, glossary: str) -> str:
    """Combine system prompt and glossary into a single prompt for LLM translation."""
    parts = []

    # Add system prompt if provided
    if system_prompt and system_prompt.strip():
        parts.append(system_prompt.strip())

    # Add glossary if provided
    if glossary and glossary.strip():
        glossary_section = """
TECHNICAL GLOSSARY - Use these exact translations for the following terms:
{}

Always use the translations from this glossary when you encounter these terms.""".format(glossary.strip())
        parts.append(glossary_section)

    return "\n\n".join(parts) if parts else ""


# Lifespan context manager for startup/shutdown
@asynccontextmanager
async def lifespan(app: FastAPI):
    """Handle startup and shutdown events"""
    # Startup
    logger.info("Starting Document Translation API...")
    config.ensure_directories()

    # Initialize database
    try:
        from database.connection import init_db, check_db_connection
        init_db()
        if check_db_connection():
            logger.info("✅ Database connection verified")
        else:
            logger.warning("⚠️  Database connection check failed")
    except Exception as e:
        logger.warning(f"⚠️  Database initialization skipped: {e}")

    await cleanup_manager.start()
    logger.info("API ready to accept requests")

    yield

    # Shutdown
    logger.info("Shutting down...")
    await cleanup_manager.stop()
    logger.info("Cleanup completed")


# Create FastAPI app with lifespan
app = FastAPI(
    title=config.API_TITLE,
    version=config.API_VERSION,
    description=config.API_DESCRIPTION,
    lifespan=lifespan
)

# Add middleware (order matters - first added is outermost)
app.add_middleware(ErrorHandlingMiddleware)
app.add_middleware(RequestLoggingMiddleware, log_body=False)
app.add_middleware(SecurityHeadersMiddleware, config={"enable_hsts": os.getenv("ENABLE_HSTS", "false").lower() == "true"})
app.add_middleware(RateLimitMiddleware, rate_limit_manager=rate_limit_manager)

# CORS - configure for production
# WARNING: Do not use "*" in production! Set CORS_ORIGINS to your actual frontend domains
_cors_env = os.getenv("CORS_ORIGINS", "")
if _cors_env == "*" or not _cors_env:
    logger.warning("⚠️  CORS_ORIGINS not properly configured. Using permissive settings for development only!")
    allowed_origins = ["*"]
else:
    allowed_origins = [origin.strip() for origin in _cors_env.split(",") if origin.strip()]
    logger.info(f"✅ CORS configured for origins: {allowed_origins}")

app.add_middleware(
    CORSMiddleware,
    allow_origins=allowed_origins,
    allow_credentials=True if allowed_origins != ["*"] else False,  # Can't use credentials with wildcard
    allow_methods=["GET", "POST", "DELETE", "OPTIONS"],
    allow_headers=["*"],
    expose_headers=["X-Request-ID", "X-Original-Filename", "X-File-Size-MB", "X-Target-Language"]
)

# Mount static files
static_dir = Path(__file__).parent / "static"
if static_dir.exists():
    app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")

# Include auth routes
app.include_router(auth_router)


# Custom exception handler for ValidationError
@app.exception_handler(ValidationError)
async def validation_error_handler(request: Request, exc: ValidationError):
    """Handle validation errors with user-friendly messages"""
    return JSONResponse(
        status_code=400,
        content={
            "error": exc.code,
            "message": exc.message,
            "details": exc.details
        }
    )


@app.get("/")
async def root():
    """Root endpoint with API information"""
    return {
        "name": config.API_TITLE,
        "version": config.API_VERSION,
        "status": "operational",
        "supported_formats": list(config.SUPPORTED_EXTENSIONS),
        "endpoints": {
            "translate": "/translate",
            "health": "/health",
            "supported_languages": "/languages"
        }
    }


@app.get("/health")
async def health_check():
    """Health check endpoint with detailed system status"""
    health_status = await health_checker.check_health()
    status_code = 200 if health_status.get("status") == "healthy" else 503

    # Check database connection
    db_status = {"status": "not_configured"}
    try:
        from database.connection import check_db_connection
        if check_db_connection():
            db_status = {"status": "healthy"}
        else:
            db_status = {"status": "unhealthy"}
    except Exception as e:
        db_status = {"status": "error", "error": str(e)}

    # Check Redis connection
    redis_status = {"status": "not_configured"}
    redis_client = get_redis_client()
    if redis_client:
        try:
            redis_client.ping()
            redis_status = {"status": "healthy"}
        except Exception as e:
            redis_status = {"status": "unhealthy", "error": str(e)}
    elif redis_client is False:
        redis_status = {"status": "connection_failed"}

    return JSONResponse(
        status_code=status_code,
        content={
            "status": health_status.get("status", "unknown"),
            "translation_service": config.TRANSLATION_SERVICE,
            "database": db_status,
            "redis": redis_status,
            "memory": health_status.get("memory", {}),
            "disk": health_status.get("disk", {}),
            "cleanup_service": health_status.get("cleanup_service", {}),
            "rate_limits": {
                "requests_per_minute": rate_limit_config.requests_per_minute,
                "translations_per_minute": rate_limit_config.translations_per_minute,
            },
            "translation_cache": _translation_cache.stats()
        }
    )


@app.get("/ready")
async def readiness_check():
    """Kubernetes readiness probe - check if app can serve traffic"""
    issues = []

    # Check database
    try:
        from database.connection import check_db_connection, DATABASE_URL
        if DATABASE_URL:  # Only check if configured
            if not check_db_connection():
                issues.append("database_unavailable")
    except ImportError:
        pass  # Database module not available - OK for development
    except Exception as e:
        issues.append(f"database_error: {str(e)}")

    # Check Redis (optional but log if configured and unavailable)
    if REDIS_URL:
        redis_client = get_redis_client()
        if redis_client:
            try:
                redis_client.ping()
            except Exception:
                issues.append("redis_unavailable")
        elif redis_client is False:
            issues.append("redis_connection_failed")

    if issues:
        return JSONResponse(
            status_code=503,
            content={"ready": False, "issues": issues}
        )

    return {"ready": True}


@app.get("/languages")
async def get_supported_languages():
    """Get list of supported language codes"""
    return {
        "supported_languages": {
            "es": "Spanish",
            "fr": "French",
            "de": "German",
            "it": "Italian",
            "pt": "Portuguese",
            "ru": "Russian",
            "zh": "Chinese (Simplified)",
            "ja": "Japanese",
            "ko": "Korean",
            "ar": "Arabic",
            "hi": "Hindi",
            "nl": "Dutch",
            "pl": "Polish",
            "tr": "Turkish",
            "sv": "Swedish",
            "da": "Danish",
            "no": "Norwegian",
            "fi": "Finnish",
            "cs": "Czech",
            "el": "Greek",
            "th": "Thai",
            "vi": "Vietnamese",
            "id": "Indonesian",
            "uk": "Ukrainian",
            "ro": "Romanian",
            "hu": "Hungarian"
        },
        "note": "Supported languages may vary depending on the translation service configured"
    }


@app.post("/translate")
async def translate_document(
    request: Request,
    file: UploadFile = File(..., description="Document file to translate (.xlsx, .docx, or .pptx)"),
    target_language: str = Form(..., description="Target language code (e.g., 'es', 'fr', 'de')"),
    source_language: str = Form(default="auto", description="Source language code (default: auto-detect)"),
    provider: str = Form(default="openrouter", description="Translation provider (openrouter, google, ollama, deepl, libre, openai)"),
    translate_images: bool = Form(default=False, description="Translate images with multimodal Ollama/OpenAI model"),
    ollama_model: str = Form(default="", description="Ollama model to use (also used for vision if multimodal)"),
    system_prompt: str = Form(default="", description="Custom system prompt with context or instructions for LLM translation"),
    glossary: str = Form(default="", description="Technical glossary (format: source=target, one per line)"),
    libre_url: str = Form(default="https://libretranslate.com", description="LibreTranslate server URL"),
    openai_api_key: str = Form(default="", description="OpenAI API key"),
    openai_model: str = Form(default="gpt-4o-mini", description="OpenAI model to use (gpt-4o-mini is cheapest with vision)"),
    openrouter_api_key: str = Form(default="", description="OpenRouter API key"),
    openrouter_model: str = Form(default="deepseek/deepseek-chat", description="OpenRouter model (deepseek/deepseek-chat is best value)"),
    cleanup: bool = Form(default=True, description="Delete input file after translation")
):
    """
    Translate a document while preserving all formatting, layout, and embedded media

    **Supported File Types:**
    - Excel (.xlsx) - Preserves formulas, merged cells, styling, and images
    - Word (.docx) - Preserves headings, tables, images, headers/footers
    - PowerPoint (.pptx) - Preserves layouts, animations, and media

    **Parameters:**
    - **file**: The document file to translate
    - **target_language**: Target language code (e.g., 'es' for Spanish, 'fr' for French)
    - **source_language**: Source language code (optional, default: auto-detect)
    - **cleanup**: Whether to delete the uploaded file after translation (default: True)

    **Returns:**
    - Translated document file with preserved formatting
    """
    input_path = None
    output_path = None
    request_id = getattr(request.state, 'request_id', 'unknown')

    try:
        # Validate inputs
        sanitized_language = InputSanitizer.sanitize_language_code(target_language)
        LanguageValidator.validate(sanitized_language)
        ProviderValidator.validate(provider)

        # Validate file before processing
        validation_result = await file_validator.validate_async(file)
        if not validation_result.is_valid:
            raise ValidationError(
                message=f"File validation failed: {'; '.join(validation_result.errors)}",
                code="INVALID_FILE",
                details={"errors": validation_result.errors, "warnings": validation_result.warnings}
            )

        # Log any warnings
        if validation_result.warnings:
            logger.warning(f"[{request_id}] File validation warnings: {validation_result.warnings}")

        # Reset file position after validation read
        await file.seek(0)

        # Check rate limit for translations
        client_ip = request.client.host if request.client else "unknown"
        if not await rate_limit_manager.check_translation_limit(client_ip):
            raise HTTPException(
                status_code=429,
                detail="Translation rate limit exceeded. Please try again later."
            )

        # Validate file extension
        file_extension = file_handler.validate_file_extension(file.filename)
        logger.info(f"[{request_id}] Processing {file_extension} file: {file.filename}")

        # Validate file size
        file_handler.validate_file_size(file)

        # Generate unique filenames
        input_filename = file_handler.generate_unique_filename(file.filename, "input")
        output_filename = file_handler.generate_unique_filename(file.filename, "translated")

        # Save uploaded file
        input_path = config.UPLOAD_DIR / input_filename
        output_path = config.OUTPUT_DIR / output_filename

        await file_handler.save_upload_file(file, input_path)
        logger.info(f"[{request_id}] Saved input file to: {input_path}")

        # Track file for cleanup
        await cleanup_manager.track_file(input_path, ttl_minutes=30)
        await cleanup_manager.track_file(output_path, ttl_minutes=60)

        # Configure translation provider
        from services.translation_service import GoogleTranslationProvider, DeepLTranslationProvider, LibreTranslationProvider, OllamaTranslationProvider, OpenAITranslationProvider, OpenRouterTranslationProvider, translation_service

        if provider.lower() == "openrouter":
            api_key = openrouter_api_key.strip() if openrouter_api_key else os.getenv("OPENROUTER_API_KEY", "")
            if not api_key:
                raise HTTPException(status_code=400, detail="OpenRouter API key not provided. Get one at https://openrouter.ai/keys")
            model_to_use = openrouter_model.strip() if openrouter_model else "deepseek/deepseek-chat"
            custom_prompt = build_full_prompt(system_prompt, glossary)
            logger.info(f"Using OpenRouter model: {model_to_use}")
            if custom_prompt:
                logger.info(f"Custom system prompt provided ({len(custom_prompt)} chars)")
            translation_provider = OpenRouterTranslationProvider(api_key, model_to_use, custom_prompt)
        elif provider.lower() == "deepl":
            if not config.DEEPL_API_KEY:
                raise HTTPException(status_code=400, detail="DeepL API key not configured")
            translation_provider = DeepLTranslationProvider(config.DEEPL_API_KEY)
        elif provider.lower() == "libre":
            libre_server = libre_url.strip() if libre_url else "https://libretranslate.com"
            logger.info(f"Using LibreTranslate server: {libre_server}")
            translation_provider = LibreTranslationProvider(libre_server)
        elif provider.lower() == "openai":
            api_key = openai_api_key.strip() if openai_api_key else ""
            if not api_key:
                raise HTTPException(status_code=400, detail="OpenAI API key not provided")
            model_to_use = openai_model.strip() if openai_model else "gpt-4o-mini"
            # Combine system prompt and glossary
            custom_prompt = build_full_prompt(system_prompt, glossary)
            logger.info(f"Using OpenAI model: {model_to_use}")
            if custom_prompt:
                logger.info(f"Custom system prompt provided ({len(custom_prompt)} chars)")
            translation_provider = OpenAITranslationProvider(api_key, model_to_use, custom_prompt)
        elif provider.lower() == "ollama":
            # Use the same model for text and vision (multimodal models like gemma3, qwen3-vl)
            model_to_use = ollama_model.strip() if ollama_model else config.OLLAMA_MODEL
            # Combine system prompt and glossary
            custom_prompt = build_full_prompt(system_prompt, glossary)
            logger.info(f"Using Ollama model: {model_to_use} (text + vision)")
            if custom_prompt:
                logger.info(f"Custom system prompt provided ({len(custom_prompt)} chars)")
            translation_provider = OllamaTranslationProvider(config.OLLAMA_BASE_URL, model_to_use, model_to_use, custom_prompt)
        elif provider.lower() == "google":
            translation_provider = GoogleTranslationProvider()
        else:
            # Default to OpenRouter with DeepSeek (best value)
            api_key = openrouter_api_key.strip() if openrouter_api_key else os.getenv("OPENROUTER_API_KEY", "")
            if api_key:
                translation_provider = OpenRouterTranslationProvider(api_key, "deepseek/deepseek-chat", build_full_prompt(system_prompt, glossary))
            else:
                translation_provider = GoogleTranslationProvider()

        # Update the global translation service
        translation_service.provider = translation_provider

        # Store translate_images flag for translators to access
        translation_service.translate_images = translate_images

        # Translate based on file type
        if file_extension == ".xlsx":
            logger.info("Translating Excel file...")
            excel_translator.translate_file(input_path, output_path, target_language)
        elif file_extension == ".docx":
            logger.info("Translating Word document...")
            word_translator.translate_file(input_path, output_path, target_language)
        elif file_extension == ".pptx":
            logger.info("Translating PowerPoint presentation...")
            pptx_translator.translate_file(input_path, output_path, target_language)
        else:
            raise DocumentProcessingError(f"Unsupported file type: {file_extension}")

        logger.info(f"Translation completed: {output_path}")

        # Get file info
        output_info = file_handler.get_file_info(output_path)

        # Cleanup input file if requested
        if cleanup and input_path:
            file_handler.cleanup_file(input_path)
            logger.info(f"Cleaned up input file: {input_path}")

        # Return the translated file
        return FileResponse(
            path=output_path,
            filename=f"translated_{file.filename}",
            media_type="application/octet-stream",
            headers={
                "X-Original-Filename": file.filename,
                "X-File-Size-MB": str(output_info.get("size_mb", 0)),
                "X-Target-Language": target_language
            }
        )

    except HTTPException:
        # Re-raise HTTP exceptions
        raise
    except Exception as e:
        logger.error(f"Translation error: {str(e)}", exc_info=True)

        # Cleanup files on error
        if input_path:
            file_handler.cleanup_file(input_path)
        if output_path:
            file_handler.cleanup_file(output_path)

        raise handle_translation_error(e)


@app.delete("/cleanup/{filename}")
async def cleanup_translated_file(filename: str):
    """
    Cleanup a translated file after download

    **Parameters:**
    - **filename**: Name of the file to delete from the outputs directory
    """
    try:
        file_path = config.OUTPUT_DIR / filename

        if not file_path.exists():
            raise HTTPException(status_code=404, detail="File not found")

        file_handler.cleanup_file(file_path)

        return {"message": f"File {filename} deleted successfully"}

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Cleanup error: {str(e)}")
        raise HTTPException(status_code=500, detail="Error cleaning up file")


@app.post("/translate-batch")
async def translate_batch_documents(
    files: list[UploadFile] = File(..., description="Multiple document files to translate"),
    target_language: str = Form(..., description="Target language code"),
    source_language: str = Form(default="auto", description="Source language code")
):
    """
    Translate multiple documents in batch

    **Note:** This endpoint processes files sequentially. For large batches, consider
    calling the single file endpoint multiple times with concurrent requests.
    """
    results = []

    for file in files:
        try:
            # Process each file using the same logic as single file translation
            file_extension = file_handler.validate_file_extension(file.filename)
            file_handler.validate_file_size(file)

            input_filename = file_handler.generate_unique_filename(file.filename, "input")
            output_filename = file_handler.generate_unique_filename(file.filename, "translated")

            input_path = config.UPLOAD_DIR / input_filename
            output_path = config.OUTPUT_DIR / output_filename

            await file_handler.save_upload_file(file, input_path)

            # Translate based on file type
            if file_extension == ".xlsx":
                excel_translator.translate_file(input_path, output_path, target_language)
            elif file_extension == ".docx":
                word_translator.translate_file(input_path, output_path, target_language)
            elif file_extension == ".pptx":
                pptx_translator.translate_file(input_path, output_path, target_language)

            # Cleanup input file
            file_handler.cleanup_file(input_path)

            results.append({
                "filename": file.filename,
                "status": "success",
                "output_file": output_filename,
                "download_url": f"/download/{output_filename}"
            })

        except Exception as e:
            logger.error(f"Error processing {file.filename}: {str(e)}")
            results.append({
                "filename": file.filename,
                "status": "error",
                "error": str(e)
            })

    return {
        "total_files": len(files),
        "successful": len([r for r in results if r["status"] == "success"]),
        "failed": len([r for r in results if r["status"] == "error"]),
        "results": results
    }


@app.get("/download/{filename}")
async def download_file(filename: str):
    """
    Download a translated file by filename

    **Parameters:**
    - **filename**: Name of the file to download from the outputs directory
    """
    file_path = config.OUTPUT_DIR / filename

    if not file_path.exists():
        raise HTTPException(status_code=404, detail="File not found")

    return FileResponse(
        path=file_path,
        filename=filename,
        media_type="application/octet-stream"
    )


@app.get("/ollama/models")
async def list_ollama_models(base_url: Optional[str] = None):
    """
    List available Ollama models

    **Parameters:**
    - **base_url**: Ollama server URL (default: from config)
    """
    from services.translation_service import OllamaTranslationProvider

    url = base_url or config.OLLAMA_BASE_URL
    models = OllamaTranslationProvider.list_models(url)

    return {
        "ollama_url": url,
        "models": models,
        "count": len(models)
    }


@app.post("/ollama/configure")
async def configure_ollama(base_url: str = Form(...), model: str = Form(...)):
    """
    Configure Ollama settings

    **Parameters:**
    - **base_url**: Ollama server URL (e.g., http://localhost:11434)
    - **model**: Model name to use for translation (e.g., llama3, mistral)
    """
    config.OLLAMA_BASE_URL = base_url
    config.OLLAMA_MODEL = model

    return {
        "status": "success",
        "message": "Ollama configuration updated",
        "ollama_url": base_url,
        "model": model
    }


@app.post("/extract-texts")
async def extract_texts_from_document(
    file: UploadFile = File(..., description="Document file to extract texts from"),
):
    """
    Extract all translatable texts from a document for client-side translation (WebLLM).
    Returns a list of texts and a session ID to use for reconstruction.

    **Parameters:**
    - **file**: The document file to extract texts from

    **Returns:**
    - session_id: Unique ID to reference this extraction
    - texts: Array of texts to translate
    - file_type: Type of the document
    """
    import uuid
    import json

    try:
        # Validate file extension
        file_extension = file_handler.validate_file_extension(file.filename)
        logger.info(f"Extracting texts from {file_extension} file: {file.filename}")

        # Validate file size
        file_handler.validate_file_size(file)

        # Generate session ID
        session_id = str(uuid.uuid4())

        # Save uploaded file
        input_filename = f"session_{session_id}{file_extension}"
        input_path = config.UPLOAD_DIR / input_filename
        await file_handler.save_upload_file(file, input_path)

        # Extract texts based on file type
        texts = []

        if file_extension == ".xlsx":
            from openpyxl import load_workbook
            wb = load_workbook(input_path)
            for sheet in wb.worksheets:
                for row in sheet.iter_rows():
                    for cell in row:
                        if cell.value and isinstance(cell.value, str) and cell.value.strip():
                            texts.append({
                                "id": f"{sheet.title}!{cell.coordinate}",
                                "text": cell.value
                            })
            wb.close()
        elif file_extension == ".docx":
            from docx import Document
            doc = Document(input_path)
            para_idx = 0
            for para in doc.paragraphs:
                if para.text.strip():
                    texts.append({
                        "id": f"para_{para_idx}",
                        "text": para.text
                    })
                para_idx += 1
            # Also extract from tables
            table_idx = 0
            for table in doc.tables:
                for row_idx, row in enumerate(table.rows):
                    for cell_idx, cell in enumerate(row.cells):
                        if cell.text.strip():
                            texts.append({
                                "id": f"table_{table_idx}_r{row_idx}_c{cell_idx}",
                                "text": cell.text
                            })
                table_idx += 1
        elif file_extension == ".pptx":
            from pptx import Presentation
            prs = Presentation(input_path)
            for slide_idx, slide in enumerate(prs.slides):
                for shape_idx, shape in enumerate(slide.shapes):
                    if shape.has_text_frame:
                        for para_idx, para in enumerate(shape.text_frame.paragraphs):
                            for run_idx, run in enumerate(para.runs):
                                if run.text.strip():
                                    texts.append({
                                        "id": f"slide_{slide_idx}_shape_{shape_idx}_para_{para_idx}_run_{run_idx}",
                                        "text": run.text
                                    })

        # Save session metadata
        session_data = {
            "original_filename": file.filename,
            "file_extension": file_extension,
            "input_path": str(input_path),
            "text_count": len(texts)
        }
        session_file = config.UPLOAD_DIR / f"session_{session_id}.json"
        with open(session_file, "w", encoding="utf-8") as f:
            json.dump(session_data, f)

        logger.info(f"Extracted {len(texts)} texts from {file.filename}, session: {session_id}")

        return {
            "session_id": session_id,
            "texts": texts,
            "file_type": file_extension,
            "text_count": len(texts)
        }

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Text extraction error: {str(e)}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to extract texts: {str(e)}")


@app.post("/reconstruct-document")
async def reconstruct_document(
    session_id: str = Form(..., description="Session ID from extract-texts"),
    translations: str = Form(..., description="JSON array of {id, translated_text} objects"),
    target_language: str = Form(..., description="Target language code"),
):
    """
    Reconstruct a document with translated texts.

    **Parameters:**
    - **session_id**: The session ID from extract-texts
    - **translations**: JSON array of translations with matching IDs
    - **target_language**: Target language for filename

    **Returns:**
    - Translated document file
    """
    import json

    try:
        # Load session data
        session_file = config.UPLOAD_DIR / f"session_{session_id}.json"
        if not session_file.exists():
            raise HTTPException(status_code=404, detail="Session not found or expired")

        with open(session_file, "r", encoding="utf-8") as f:
            session_data = json.load(f)

        input_path = Path(session_data["input_path"])
        file_extension = session_data["file_extension"]
        original_filename = session_data["original_filename"]

        if not input_path.exists():
            raise HTTPException(status_code=404, detail="Source file not found or expired")

        # Parse translations
        translation_list = json.loads(translations)
        translation_map = {t["id"]: t["translated_text"] for t in translation_list}

        # Generate output path
        output_filename = file_handler.generate_unique_filename(original_filename, "translated")
        output_path = config.OUTPUT_DIR / output_filename

        # Reconstruct based on file type
        if file_extension == ".xlsx":
            from openpyxl import load_workbook
            import shutil
            shutil.copy(input_path, output_path)
            wb = load_workbook(output_path)
            for sheet in wb.worksheets:
                for row in sheet.iter_rows():
                    for cell in row:
                        cell_id = f"{sheet.title}!{cell.coordinate}"
                        if cell_id in translation_map:
                            cell.value = translation_map[cell_id]
            wb.save(output_path)
            wb.close()

        elif file_extension == ".docx":
            from docx import Document
            import shutil
            shutil.copy(input_path, output_path)
            doc = Document(output_path)
            para_idx = 0
            for para in doc.paragraphs:
                para_id = f"para_{para_idx}"
                if para_id in translation_map and para.text.strip():
                    # Replace text while keeping formatting
                    for run in para.runs:
                        run.text = ""
                    if para.runs:
                        para.runs[0].text = translation_map[para_id]
                    else:
                        para.text = translation_map[para_id]
                para_idx += 1
            # Also handle tables
            table_idx = 0
            for table in doc.tables:
                for row_idx, row in enumerate(table.rows):
                    for cell_idx, cell in enumerate(row.cells):
                        cell_id = f"table_{table_idx}_r{row_idx}_c{cell_idx}"
                        if cell_id in translation_map:
                            # Clear and set new text
                            for para in cell.paragraphs:
                                for run in para.runs:
                                    run.text = ""
                            if cell.paragraphs and cell.paragraphs[0].runs:
                                cell.paragraphs[0].runs[0].text = translation_map[cell_id]
                            elif cell.paragraphs:
                                cell.paragraphs[0].text = translation_map[cell_id]
                table_idx += 1
            doc.save(output_path)

        elif file_extension == ".pptx":
            from pptx import Presentation
            import shutil
            shutil.copy(input_path, output_path)
            prs = Presentation(output_path)
            for slide_idx, slide in enumerate(prs.slides):
                for shape_idx, shape in enumerate(slide.shapes):
                    if shape.has_text_frame:
                        for para_idx, para in enumerate(shape.text_frame.paragraphs):
                            for run_idx, run in enumerate(para.runs):
                                run_id = f"slide_{slide_idx}_shape_{shape_idx}_para_{para_idx}_run_{run_idx}"
                                if run_id in translation_map:
                                    run.text = translation_map[run_id]
            prs.save(output_path)

        # Cleanup session files
        file_handler.cleanup_file(input_path)
        file_handler.cleanup_file(session_file)

        logger.info(f"Reconstructed document: {output_path}")

        return FileResponse(
            path=output_path,
            filename=f"translated_{original_filename}",
            media_type="application/octet-stream"
        )

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Reconstruction error: {str(e)}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to reconstruct document: {str(e)}")


# ============== SaaS Management Endpoints ==============

class AdminLoginRequest(BaseModel):
    password: str

@app.post("/admin/login")
async def admin_login(request: AdminLoginRequest):
    """
    Admin login endpoint
    Returns a bearer token for authenticated admin access
    """
    if not verify_admin_password(request.password):
        logger.warning(f"Failed admin login attempt - wrong password")
        raise HTTPException(status_code=401, detail="Invalid credentials")

    token = create_admin_token()
    logger.info(f"Admin login successful")

    return {
        "status": "success",
        "access_token": token,
        "token_type": "bearer",
        "expires_in": 86400,  # 24 hours in seconds
        "message": "Login successful"
    }


@app.post("/admin/logout")
async def admin_logout(authorization: Optional[str] = Header(None)):
    """Logout and invalidate admin token"""
    if authorization:
        parts = authorization.split(" ")
        if len(parts) == 2 and parts[0].lower() == "bearer":
            token = parts[1]
            delete_admin_token(token)
            logger.info("Admin logout successful")

    return {"status": "success", "message": "Logged out"}


@app.get("/admin/verify")
async def verify_admin_session(is_admin: bool = Depends(require_admin)):
    """Verify admin token is still valid"""
    return {"status": "valid", "authenticated": True}


@app.get("/admin/dashboard")
async def get_admin_dashboard(is_admin: bool = Depends(require_admin)):
    """Get comprehensive admin dashboard data"""
    health_status = await health_checker.check_health()
    cleanup_stats = cleanup_manager.get_stats()
    rate_limit_stats = rate_limit_manager.get_stats()
    tracked_files = cleanup_manager.get_tracked_files()

    return {
        "timestamp": health_status.get("timestamp"),
        "uptime": health_status.get("uptime_human"),
        "status": health_status.get("status"),
        "issues": health_status.get("issues", []),
        "system": {
            "memory": health_status.get("memory", {}),
            "disk": health_status.get("disk", {}),
        },
        "translations": health_status.get("translations", {}),
        "cleanup": {
            **cleanup_stats,
            "tracked_files_count": len(tracked_files)
        },
        "rate_limits": rate_limit_stats,
        "config": {
            "max_file_size_mb": config.MAX_FILE_SIZE_MB,
            "supported_extensions": list(config.SUPPORTED_EXTENSIONS),
            "translation_service": config.TRANSLATION_SERVICE,
            "rate_limit_per_minute": rate_limit_config.requests_per_minute,
            "translations_per_minute": rate_limit_config.translations_per_minute
        }
    }


@app.get("/metrics")
async def get_metrics():
    """Get system metrics and statistics for monitoring"""
    health_status = await health_checker.check_health()
    cleanup_stats = cleanup_manager.get_stats()
    rate_limit_stats = rate_limit_manager.get_stats()

    return {
        "system": {
            "memory": health_status.get("memory", {}),
            "disk": health_status.get("disk", {}),
            "status": health_status.get("status", "unknown")
        },
        "cleanup": cleanup_stats,
        "rate_limits": rate_limit_stats,
        "config": {
            "max_file_size_mb": config.MAX_FILE_SIZE_MB,
            "supported_extensions": list(config.SUPPORTED_EXTENSIONS),
            "translation_service": config.TRANSLATION_SERVICE
        }
    }


@app.get("/rate-limit/status")
async def get_rate_limit_status(request: Request):
    """Get current rate limit status for the requesting client"""
    client_ip = request.client.host if request.client else "unknown"
    status = await rate_limit_manager.get_client_status(client_ip)

    return {
        "client_ip": client_ip,
        "limits": {
            "requests_per_minute": rate_limit_config.requests_per_minute,
            "requests_per_hour": rate_limit_config.requests_per_hour,
            "translations_per_minute": rate_limit_config.translations_per_minute,
            "translations_per_hour": rate_limit_config.translations_per_hour
        },
        "current_usage": status
    }


@app.post("/admin/cleanup/trigger")
async def trigger_cleanup(is_admin: bool = Depends(require_admin)):
    """Trigger manual cleanup of expired files (requires admin auth)"""
    try:
        cleaned = await cleanup_manager.cleanup_expired()
        return {
            "status": "success",
            "files_cleaned": cleaned,
            "message": f"Cleaned up {cleaned} expired files"
        }
    except Exception as e:
        logger.error(f"Manual cleanup failed: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Cleanup failed: {str(e)}")


@app.get("/admin/files/tracked")
async def get_tracked_files(is_admin: bool = Depends(require_admin)):
    """Get list of currently tracked files (requires admin auth)"""
    tracked = cleanup_manager.get_tracked_files()
    return {
        "count": len(tracked),
        "files": tracked
    }


@app.get("/admin/users")
async def get_admin_users(is_admin: bool = Depends(require_admin)):
    """Get all users with their usage stats (requires admin auth)"""
    from services.auth_service import load_users
    from models.subscription import PLANS

    users_data = load_users()
    users_list = []

    for user_id, user_data in users_data.items():
        plan = user_data.get("plan", "free")
        plan_info = PLANS.get(plan, PLANS["free"])

        users_list.append({
            "id": user_id,
            "email": user_data.get("email", ""),
            "name": user_data.get("name", ""),
            "plan": plan,
            "subscription_status": user_data.get("subscription_status", "active"),
            "docs_translated_this_month": user_data.get("docs_translated_this_month", 0),
            "pages_translated_this_month": user_data.get("pages_translated_this_month", 0),
            "extra_credits": user_data.get("extra_credits", 0),
            "created_at": user_data.get("created_at", ""),
            "plan_limits": {
                "docs_per_month": plan_info.get("docs_per_month", 0),
                "max_pages_per_doc": plan_info.get("max_pages_per_doc", 0),
            }
        })

    # Sort by created_at descending (newest first)
    users_list.sort(key=lambda x: x.get("created_at", ""), reverse=True)

    return {
        "total": len(users_list),
        "users": users_list
    }


@app.get("/admin/stats")
async def get_admin_stats(is_admin: bool = Depends(require_admin)):
    """Get comprehensive admin statistics (requires admin auth)"""
    from services.auth_service import load_users
    from models.subscription import PLANS

    users_data = load_users()

    # Calculate stats
    total_users = len(users_data)
    plan_distribution = {}
    total_docs_translated = 0
    total_pages_translated = 0
    active_users = 0  # Users who translated something this month

    for user_data in users_data.values():
        plan = user_data.get("plan", "free")
        plan_distribution[plan] = plan_distribution.get(plan, 0) + 1

        docs = user_data.get("docs_translated_this_month", 0)
        pages = user_data.get("pages_translated_this_month", 0)
        total_docs_translated += docs
        total_pages_translated += pages

        if docs > 0:
            active_users += 1

    # Get cache stats
    cache_stats = _translation_cache.get_stats()

    return {
        "users": {
            "total": total_users,
            "active_this_month": active_users,
            "by_plan": plan_distribution
        },
        "translations": {
            "docs_this_month": total_docs_translated,
            "pages_this_month": total_pages_translated
        },
        "cache": cache_stats,
        "config": {
            "translation_service": config.TRANSLATION_SERVICE,
            "max_file_size_mb": config.MAX_FILE_SIZE_MB,
            "supported_extensions": list(config.SUPPORTED_EXTENSIONS)
        }
    }


@app.post("/admin/config/provider")
async def update_default_provider(
    provider: str = Form(...),
    is_admin: bool = Depends(require_admin)
):
    """Update the default translation provider (requires admin auth)"""
    valid_providers = ["google", "openrouter", "ollama", "deepl", "libre", "openai"]
    if provider not in valid_providers:
        raise HTTPException(status_code=400, detail=f"Invalid provider. Must be one of: {valid_providers}")

    # Update config (in production, this would persist to database/env)
    config.TRANSLATION_SERVICE = provider

    return {
        "status": "success",
        "message": f"Default provider updated to {provider}",
        "provider": provider
    }


if __name__ == "__main__":
    import uvicorn
    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)