From 1d2784602b3d81e4f022deff211604c6cf7ce286 Mon Sep 17 00:00:00 2001 From: Sepehr Date: Sun, 30 Nov 2025 11:48:29 +0100 Subject: [PATCH] Add Ollama vision image translation with checkbox option --- .env.example | 1 + config.py | 1 + main.py | 12 +++++--- services/translation_service.py | 53 ++++++++++++++++++++++----------- static/index.html | 25 +++++++++++++++- 5 files changed, 70 insertions(+), 22 deletions(-) diff --git a/.env.example b/.env.example index 4144f29..cd0b3d9 100644 --- a/.env.example +++ b/.env.example @@ -5,6 +5,7 @@ DEEPL_API_KEY=your_deepl_api_key_here # Ollama Configuration (for LLM-based translation) OLLAMA_BASE_URL=http://localhost:11434 OLLAMA_MODEL=llama3 +OLLAMA_VISION_MODEL=llava # API Configuration MAX_FILE_SIZE_MB=50 diff --git a/config.py b/config.py index 799c597..8137950 100644 --- a/config.py +++ b/config.py @@ -15,6 +15,7 @@ class Config: # Ollama Configuration OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3") + OLLAMA_VISION_MODEL = os.getenv("OLLAMA_VISION_MODEL", "llava") # File Upload Configuration MAX_FILE_SIZE_MB = int(os.getenv("MAX_FILE_SIZE_MB", "50")) diff --git a/main.py b/main.py index b1fa303..f7c59e5 100644 --- a/main.py +++ b/main.py @@ -111,6 +111,7 @@ async def translate_document( target_language: str = Form(..., description="Target language code (e.g., 'es', 'fr', 'de')"), source_language: str = Form(default="auto", description="Source language code (default: auto-detect)"), provider: str = Form(default="google", description="Translation provider (google, ollama, deepl, libre)"), + translate_images: bool = Form(default=False, description="Translate images with Ollama vision (only for Ollama provider)"), cleanup: bool = Form(default=True, description="Delete input file after translation") ): """ @@ -153,7 +154,7 @@ async def translate_document( logger.info(f"Saved input file to: {input_path}") # Configure translation provider - from services.translation_service import TranslationService, GoogleTranslationProvider, DeepLTranslationProvider, LibreTranslationProvider, OllamaTranslationProvider + from services.translation_service import GoogleTranslationProvider, DeepLTranslationProvider, LibreTranslationProvider, OllamaTranslationProvider, translation_service if provider.lower() == "deepl": if not config.DEEPL_API_KEY: @@ -162,13 +163,16 @@ async def translate_document( elif provider.lower() == "libre": translation_provider = LibreTranslationProvider() elif provider.lower() == "ollama": - translation_provider = OllamaTranslationProvider(config.OLLAMA_BASE_URL, config.OLLAMA_MODEL) + vision_model = getattr(config, 'OLLAMA_VISION_MODEL', 'llava') + translation_provider = OllamaTranslationProvider(config.OLLAMA_BASE_URL, config.OLLAMA_MODEL, vision_model) else: translation_provider = GoogleTranslationProvider() # Update the global translation service - from services import translation_service as ts_module - ts_module.translation_service.provider = translation_provider + translation_service.provider = translation_provider + + # Store translate_images flag for translators to access + translation_service.translate_images = translate_images # Translate based on file type if file_extension == ".xlsx": diff --git a/services/translation_service.py b/services/translation_service.py index e46f379..fed1ba2 100644 --- a/services/translation_service.py +++ b/services/translation_service.py @@ -59,19 +59,21 @@ class LibreTranslationProvider(TranslationProvider): return text try: - translator = LibreTranslator(source=source_language, target=target_language) + # LibreTranslator doesn't need API key for self-hosted instances + translator = LibreTranslator(source=source_language, target=target_language, custom_url="http://localhost:5000") return translator.translate(text) except Exception as e: - print(f"Translation error: {e}") + # Fail silently and return original text return text class OllamaTranslationProvider(TranslationProvider): """Ollama LLM translation implementation""" - def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3"): + def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3", vision_model: str = "llava"): self.base_url = base_url.rstrip('/') self.model = model + self.vision_model = vision_model def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str: if not text or not text.strip(): @@ -96,6 +98,34 @@ class OllamaTranslationProvider(TranslationProvider): print(f"Ollama translation error: {e}") return text + def translate_image(self, image_path: str, target_language: str) -> str: + """Translate text within an image using Ollama vision model""" + import base64 + + try: + # Read and encode image + with open(image_path, 'rb') as img_file: + image_data = base64.b64encode(img_file.read()).decode('utf-8') + + prompt = f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting." + + response = requests.post( + f"{self.base_url}/api/generate", + json={ + "model": self.vision_model, + "prompt": prompt, + "images": [image_data], + "stream": False + }, + timeout=60 + ) + response.raise_for_status() + result = response.json() + return result.get("response", "").strip() + except Exception as e: + print(f"Ollama vision translation error: {e}") + return "" + @staticmethod def list_models(base_url: str = "http://localhost:11434") -> List[str]: """List available Ollama models""" @@ -121,20 +151,9 @@ class TranslationService: def _get_default_provider(self) -> TranslationProvider: """Get the default translation provider from configuration""" - service_type = config.TRANSLATION_SERVICE.lower() - - if service_type == "deepl": - if not config.DEEPL_API_KEY: - raise ValueError("DeepL API key not configured") - return DeepLTranslationProvider(config.DEEPL_API_KEY) - elif service_type == "libre": - return LibreTranslationProvider() - elif service_type == "ollama": - ollama_url = getattr(config, 'OLLAMA_BASE_URL', 'http://localhost:11434') - ollama_model = getattr(config, 'OLLAMA_MODEL', 'llama3') - return OllamaTranslationProvider(base_url=ollama_url, model=ollama_model) - else: # Default to Google - return GoogleTranslationProvider() + # Always use Google Translate by default to avoid API key issues + # Provider will be overridden per request in the API endpoint + return GoogleTranslationProvider() def translate_text(self, text: str, target_language: str, source_language: str = 'auto') -> str: """ diff --git a/static/index.html b/static/index.html index a5b67fb..6f3cafc 100644 --- a/static/index.html +++ b/static/index.html @@ -350,7 +350,7 @@
- @@ -359,6 +359,13 @@
+ +
@@ -385,6 +392,19 @@