Files
office_translator/config.py
sepehr ce8e150a61 feat: homelab deployment - NPM + IONOS DNS + monitoring + NAS backup
- Restructured docker-compose for Nginx Proxy Manager (no custom nginx)
- Added domain wordly.art configuration
- Added Prometheus + Grafana monitoring stack with pre-configured dashboards
- Added PostgreSQL backup script to NAS (daily/weekly/monthly rotation)
- Added alert rules for backend, system, and Docker metrics
- Updated deployment guide for NPM + IONOS DNS homelab setup
- Added marketing plan document
- PDF translator and watermark support
- Enhanced middleware, routes, and translator modules

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-10 11:43:28 +02:00

158 lines
6.5 KiB
Python

"""
Configuration module for the Document Translation API
SaaS-ready with comprehensive settings for production deployment
"""
import os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
class Config:
# ============== Translation Service ==============
TRANSLATION_SERVICE = os.getenv("TRANSLATION_SERVICE", "google")
DEEPL_API_KEY = os.getenv("DEEPL_API_KEY", "")
# Ollama Configuration
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3")
OLLAMA_VISION_MODEL = os.getenv("OLLAMA_VISION_MODEL", "llava")
# ============== File Upload Configuration ==============
MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", "50"))
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
# Directories
BASE_DIR = Path(__file__).parent
UPLOAD_DIR = BASE_DIR / "uploads"
OUTPUT_DIR = BASE_DIR / "outputs"
TEMP_DIR = BASE_DIR / "temp"
LOGS_DIR = BASE_DIR / "logs"
# Supported file types
SUPPORTED_EXTENSIONS = {".xlsx", ".docx", ".pptx", ".pdf"}
# ============== Rate Limiting (SaaS) ==============
RATE_LIMIT_ENABLED = os.getenv("RATE_LIMIT_ENABLED", "true").lower() == "true"
RATE_LIMIT_PER_MINUTE = int(os.getenv("RATE_LIMIT_PER_MINUTE", "30"))
RATE_LIMIT_PER_HOUR = int(os.getenv("RATE_LIMIT_PER_HOUR", "200"))
TRANSLATIONS_PER_MINUTE = int(os.getenv("TRANSLATIONS_PER_MINUTE", "10"))
TRANSLATIONS_PER_HOUR = int(os.getenv("TRANSLATIONS_PER_HOUR", "50"))
MAX_CONCURRENT_TRANSLATIONS = int(os.getenv("MAX_CONCURRENT_TRANSLATIONS", "5"))
# ============== Cleanup Service ==============
CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "true").lower() == "true"
CLEANUP_INTERVAL_MINUTES = int(os.getenv("CLEANUP_INTERVAL_MINUTES", "5"))
FILE_TTL_MINUTES = int(os.getenv("FILE_TTL_MINUTES", "60"))
INPUT_FILE_TTL_MINUTES = int(os.getenv("INPUT_FILE_TTL_MINUTES", "30"))
OUTPUT_FILE_TTL_MINUTES = int(os.getenv("OUTPUT_FILE_TTL_MINUTES", "120"))
# Disk space thresholds
DISK_WARNING_THRESHOLD_GB = float(os.getenv("DISK_WARNING_THRESHOLD_GB", "5.0"))
DISK_CRITICAL_THRESHOLD_GB = float(os.getenv("DISK_CRITICAL_THRESHOLD_GB", "1.0"))
MAX_TOTAL_SIZE_GB = float(os.getenv("MAX_TOTAL_SIZE_GB", "10.0"))
# ============== Security ==============
ENABLE_HSTS = os.getenv("ENABLE_HSTS", "false").lower() == "true"
_CORS_ORIGINS_RAW = os.getenv("CORS_ORIGINS", "")
CORS_ORIGINS = [o.strip() for o in _CORS_ORIGINS_RAW.split(",") if o.strip()]
# Raw value for "*" / empty checks (single source of truth)
CORS_ORIGINS_RAW = _CORS_ORIGINS_RAW
MAX_REQUEST_SIZE_MB = int(os.getenv("MAX_REQUEST_SIZE_MB", "100"))
REQUEST_TIMEOUT_SECONDS = int(os.getenv("REQUEST_TIMEOUT_SECONDS", "300"))
# ============== Monitoring ==============
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
LOG_FORMAT = os.getenv("LOG_FORMAT", "json")
ENV = os.getenv("ENV", os.getenv("ENVIRONMENT", "development")).lower()
ENABLE_REQUEST_LOGGING = (
os.getenv("ENABLE_REQUEST_LOGGING", "true").lower() == "true"
)
MAX_MEMORY_PERCENT = float(os.getenv("MAX_MEMORY_PERCENT", "80"))
# ============== API Configuration ==============
API_TITLE = "Document Translation API"
API_VERSION = "1.0.0"
API_DESCRIPTION = """
Advanced Document Translation API with strict formatting preservation.
## Supported Formats
- Excel (.xlsx) - Preserves cell formatting, formulas, merged cells, images
- Word (.docx) - Preserves styles, tables, images, headers/footers
- PowerPoint (.pptx) - Preserves layouts, animations, embedded media
## SaaS Features
- Rate limiting per client IP
- Automatic file cleanup
- Health monitoring
- Request logging
## API Versioning
All API endpoints are versioned under /api/v1/ prefix for backward compatibility.
"""
@classmethod
def ensure_directories(cls):
"""Create necessary directories if they don't exist"""
cls.UPLOAD_DIR.mkdir(exist_ok=True, parents=True)
cls.OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
cls.TEMP_DIR.mkdir(exist_ok=True, parents=True)
cls.LOGS_DIR.mkdir(exist_ok=True, parents=True)
@classmethod
def validate_required_env(cls) -> list[str]:
"""
Validate required environment variables (Story 6.6 - NFR10).
In production (ENV=production): returns list of missing required vars; app should exit if non-empty.
In development: returns [] so defaults/warnings can be used (e.g. REDIS_URL optional if rate limit off).
"""
env = os.getenv("ENV", os.getenv("ENVIRONMENT", "development")).lower()
if env != "production":
return []
missing: list[str] = []
if not os.getenv("JWT_SECRET_KEY", "").strip():
missing.append("JWT_SECRET_KEY")
if not os.getenv("ADMIN_USERNAME", "").strip():
missing.append("ADMIN_USERNAME")
admin_pass = os.getenv("ADMIN_PASSWORD", "").strip()
admin_hash = os.getenv("ADMIN_PASSWORD_HASH", "").strip()
if not admin_pass and not admin_hash:
missing.append("ADMIN_PASSWORD or ADMIN_PASSWORD_HASH")
if not os.getenv("ADMIN_TOKEN_SECRET", "").strip():
missing.append("ADMIN_TOKEN_SECRET")
rate_limit_on = os.getenv("RATE_LIMIT_ENABLED", "true").lower() == "true"
if rate_limit_on and not os.getenv("REDIS_URL", "").strip():
missing.append("REDIS_URL")
db_url = cls._get_database_url()
if not db_url:
missing.append("DATABASE_URL")
return missing
@classmethod
def _get_database_url(cls) -> str:
"""Return DATABASE_URL or build from POSTGRES_* (AC #1 - Story 6.6)."""
url = os.getenv("DATABASE_URL", "").strip()
if url:
return url
host = os.getenv("POSTGRES_HOST", "").strip()
port = os.getenv("POSTGRES_PORT", "5432").strip()
user = os.getenv("POSTGRES_USER", "").strip()
password = os.getenv("POSTGRES_PASSWORD", "").strip()
db = os.getenv("POSTGRES_DB", "").strip()
if host and user and db:
from urllib.parse import quote_plus
pw = quote_plus(password) if password else ""
return f"postgresql://{user}:{pw}@{host}:{port}/{db}"
return ""
config = Config()
# So that database/connection.py and alembic see DATABASE_URL when only POSTGRES_* is set (AC #1)
_effective_db_url = Config._get_database_url()
if _effective_db_url and not os.environ.get("DATABASE_URL", "").strip():
os.environ["DATABASE_URL"] = _effective_db_url