""" Configuration module for the Document Translation API SaaS-ready with comprehensive settings for production deployment """ import os from pathlib import Path from dotenv import load_dotenv load_dotenv() class Config: # ============== Translation Service ============== TRANSLATION_SERVICE = os.getenv("TRANSLATION_SERVICE", "google") DEEPL_API_KEY = os.getenv("DEEPL_API_KEY", "") # Ollama Configuration OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3") OLLAMA_VISION_MODEL = os.getenv("OLLAMA_VISION_MODEL", "llava") # ============== File Upload Configuration ============== MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", "50")) MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 # Directories BASE_DIR = Path(__file__).parent UPLOAD_DIR = BASE_DIR / "uploads" OUTPUT_DIR = BASE_DIR / "outputs" TEMP_DIR = BASE_DIR / "temp" LOGS_DIR = BASE_DIR / "logs" # Supported file types SUPPORTED_EXTENSIONS = {".xlsx", ".docx", ".pptx", ".pdf"} # ============== Rate Limiting (SaaS) ============== RATE_LIMIT_ENABLED = os.getenv("RATE_LIMIT_ENABLED", "true").lower() == "true" RATE_LIMIT_PER_MINUTE = int(os.getenv("RATE_LIMIT_PER_MINUTE", "30")) RATE_LIMIT_PER_HOUR = int(os.getenv("RATE_LIMIT_PER_HOUR", "200")) TRANSLATIONS_PER_MINUTE = int(os.getenv("TRANSLATIONS_PER_MINUTE", "10")) TRANSLATIONS_PER_HOUR = int(os.getenv("TRANSLATIONS_PER_HOUR", "50")) MAX_CONCURRENT_TRANSLATIONS = int(os.getenv("MAX_CONCURRENT_TRANSLATIONS", "5")) # ============== Cleanup Service ============== CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "true").lower() == "true" CLEANUP_INTERVAL_MINUTES = int(os.getenv("CLEANUP_INTERVAL_MINUTES", "5")) FILE_TTL_MINUTES = int(os.getenv("FILE_TTL_MINUTES", "60")) INPUT_FILE_TTL_MINUTES = int(os.getenv("INPUT_FILE_TTL_MINUTES", "30")) OUTPUT_FILE_TTL_MINUTES = int(os.getenv("OUTPUT_FILE_TTL_MINUTES", "120")) # Disk space thresholds DISK_WARNING_THRESHOLD_GB = float(os.getenv("DISK_WARNING_THRESHOLD_GB", "5.0")) DISK_CRITICAL_THRESHOLD_GB = float(os.getenv("DISK_CRITICAL_THRESHOLD_GB", "1.0")) MAX_TOTAL_SIZE_GB = float(os.getenv("MAX_TOTAL_SIZE_GB", "10.0")) # ============== Security ============== ENABLE_HSTS = os.getenv("ENABLE_HSTS", "false").lower() == "true" _CORS_ORIGINS_RAW = os.getenv("CORS_ORIGINS", "") CORS_ORIGINS = [o.strip() for o in _CORS_ORIGINS_RAW.split(",") if o.strip()] # Raw value for "*" / empty checks (single source of truth) CORS_ORIGINS_RAW = _CORS_ORIGINS_RAW MAX_REQUEST_SIZE_MB = int(os.getenv("MAX_REQUEST_SIZE_MB", "100")) REQUEST_TIMEOUT_SECONDS = int(os.getenv("REQUEST_TIMEOUT_SECONDS", "300")) # ============== Monitoring ============== LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") LOG_FORMAT = os.getenv("LOG_FORMAT", "json") ENV = os.getenv("ENV", os.getenv("ENVIRONMENT", "development")).lower() ENABLE_REQUEST_LOGGING = ( os.getenv("ENABLE_REQUEST_LOGGING", "true").lower() == "true" ) MAX_MEMORY_PERCENT = float(os.getenv("MAX_MEMORY_PERCENT", "80")) # ============== API Configuration ============== API_TITLE = "Document Translation API" API_VERSION = "1.0.0" API_DESCRIPTION = """ Advanced Document Translation API with strict formatting preservation. ## Supported Formats - Excel (.xlsx) - Preserves cell formatting, formulas, merged cells, images - Word (.docx) - Preserves styles, tables, images, headers/footers - PowerPoint (.pptx) - Preserves layouts, animations, embedded media ## SaaS Features - Rate limiting per client IP - Automatic file cleanup - Health monitoring - Request logging ## API Versioning All API endpoints are versioned under /api/v1/ prefix for backward compatibility. """ @classmethod def ensure_directories(cls): """Create necessary directories if they don't exist""" cls.UPLOAD_DIR.mkdir(exist_ok=True, parents=True) cls.OUTPUT_DIR.mkdir(exist_ok=True, parents=True) cls.TEMP_DIR.mkdir(exist_ok=True, parents=True) cls.LOGS_DIR.mkdir(exist_ok=True, parents=True) @classmethod def validate_required_env(cls) -> list[str]: """ Validate required environment variables (Story 6.6 - NFR10). In production (ENV=production): returns list of missing required vars; app should exit if non-empty. In development: returns [] so defaults/warnings can be used (e.g. REDIS_URL optional if rate limit off). """ env = os.getenv("ENV", os.getenv("ENVIRONMENT", "development")).lower() if env != "production": return [] missing: list[str] = [] if not os.getenv("JWT_SECRET_KEY", "").strip(): missing.append("JWT_SECRET_KEY") if not os.getenv("ADMIN_USERNAME", "").strip(): missing.append("ADMIN_USERNAME") admin_pass = os.getenv("ADMIN_PASSWORD", "").strip() admin_hash = os.getenv("ADMIN_PASSWORD_HASH", "").strip() if not admin_pass and not admin_hash: missing.append("ADMIN_PASSWORD or ADMIN_PASSWORD_HASH") if not os.getenv("ADMIN_TOKEN_SECRET", "").strip(): missing.append("ADMIN_TOKEN_SECRET") rate_limit_on = os.getenv("RATE_LIMIT_ENABLED", "true").lower() == "true" if rate_limit_on and not os.getenv("REDIS_URL", "").strip(): missing.append("REDIS_URL") db_url = cls._get_database_url() if not db_url: missing.append("DATABASE_URL") return missing @classmethod def _get_database_url(cls) -> str: """Return DATABASE_URL or build from POSTGRES_* (AC #1 - Story 6.6).""" url = os.getenv("DATABASE_URL", "").strip() if url: return url host = os.getenv("POSTGRES_HOST", "").strip() port = os.getenv("POSTGRES_PORT", "5432").strip() user = os.getenv("POSTGRES_USER", "").strip() password = os.getenv("POSTGRES_PASSWORD", "").strip() db = os.getenv("POSTGRES_DB", "").strip() if host and user and db: from urllib.parse import quote_plus pw = quote_plus(password) if password else "" return f"postgresql://{user}:{pw}@{host}:{port}/{db}" return "" config = Config() # So that database/connection.py and alembic see DATABASE_URL when only POSTGRES_* is set (AC #1) _effective_db_url = Config._get_database_url() if _effective_db_url and not os.environ.get("DATABASE_URL", "").strip(): os.environ["DATABASE_URL"] = _effective_db_url