Add Ollama vision image translation with checkbox option

2025-11-30 11:48:29 +01:00
parent abe77e3b29
commit 1d2784602b
5 changed files with 70 additions and 22 deletions
--- a/services/translation_service.py
+++ b/services/translation_service.py
@@ -59,19 +59,21 @@ class LibreTranslationProvider(TranslationProvider):
            return text
        
        try:
-            translator = LibreTranslator(source=source_language, target=target_language)
+            # LibreTranslator doesn't need API key for self-hosted instances
+            translator = LibreTranslator(source=source_language, target=target_language, custom_url="http://localhost:5000")
            return translator.translate(text)
        except Exception as e:
-            print(f"Translation error: {e}")
+            # Fail silently and return original text
            return text


 class OllamaTranslationProvider(TranslationProvider):
    """Ollama LLM translation implementation"""
    
-    def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3"):
+    def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3", vision_model: str = "llava"):
        self.base_url = base_url.rstrip('/')
        self.model = model
+        self.vision_model = vision_model
    
    def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
        if not text or not text.strip():
@@ -96,6 +98,34 @@ class OllamaTranslationProvider(TranslationProvider):
            print(f"Ollama translation error: {e}")
            return text
    
+    def translate_image(self, image_path: str, target_language: str) -> str:
+        """Translate text within an image using Ollama vision model"""
+        import base64
+        
+        try:
+            # Read and encode image
+            with open(image_path, 'rb') as img_file:
+                image_data = base64.b64encode(img_file.read()).decode('utf-8')
+            
+            prompt = f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting."
+            
+            response = requests.post(
+                f"{self.base_url}/api/generate",
+                json={
+                    "model": self.vision_model,
+                    "prompt": prompt,
+                    "images": [image_data],
+                    "stream": False
+                },
+                timeout=60
+            )
+            response.raise_for_status()
+            result = response.json()
+            return result.get("response", "").strip()
+        except Exception as e:
+            print(f"Ollama vision translation error: {e}")
+            return ""
+    
    @staticmethod
    def list_models(base_url: str = "http://localhost:11434") -> List[str]:
        """List available Ollama models"""
@@ -121,20 +151,9 @@ class TranslationService:
    
    def _get_default_provider(self) -> TranslationProvider:
        """Get the default translation provider from configuration"""
-        service_type = config.TRANSLATION_SERVICE.lower()
-        
-        if service_type == "deepl":
-            if not config.DEEPL_API_KEY:
-                raise ValueError("DeepL API key not configured")
-            return DeepLTranslationProvider(config.DEEPL_API_KEY)
-        elif service_type == "libre":
-            return LibreTranslationProvider()
-        elif service_type == "ollama":
-            ollama_url = getattr(config, 'OLLAMA_BASE_URL', 'http://localhost:11434')
-            ollama_model = getattr(config, 'OLLAMA_MODEL', 'llama3')
-            return OllamaTranslationProvider(base_url=ollama_url, model=ollama_model)
-        else:  # Default to Google
-            return GoogleTranslationProvider()
+        # Always use Google Translate by default to avoid API key issues
+        # Provider will be overridden per request in the API endpoint
+        return GoogleTranslationProvider()
    
    def translate_text(self, text: str, target_language: str, source_language: str = 'auto') -> str:
        """