Add system prompt, glossary, presets for Ollama/WebLLM, image translation support

2025-11-30 16:45:41 +01:00
parent 465cab8a61
commit e48ea07e44
6 changed files with 497 additions and 51 deletions
--- a/main.py
+++ b/main.py
@@ -111,7 +111,9 @@ async def translate_document(
    target_language: str = Form(..., description="Target language code (e.g., 'es', 'fr', 'de')"),
    source_language: str = Form(default="auto", description="Source language code (default: auto-detect)"),
    provider: str = Form(default="google", description="Translation provider (google, ollama, deepl, libre)"),
-    translate_images: bool = Form(default=False, description="Translate images with Ollama vision (only for Ollama provider)"),
+    translate_images: bool = Form(default=False, description="Translate images with multimodal Ollama model"),
    ollama_model: str = Form(default="", description="Ollama model to use (also used for vision if multimodal)"),
    system_prompt: str = Form(default="", description="Custom system prompt with context, glossary, or instructions for LLM translation"),
    cleanup: bool = Form(default=True, description="Delete input file after translation")
 ):
    """
@@ -154,7 +156,7 @@ async def translate_document(
        logger.info(f"Saved input file to: {input_path}")
        # Configure translation provider
-        from services.translation_service import GoogleTranslationProvider, DeepLTranslationProvider, LibreTranslationProvider, OllamaTranslationProvider, WebLLMTranslationProvider, translation_service
+        from services.translation_service import GoogleTranslationProvider, DeepLTranslationProvider, LibreTranslationProvider, OllamaTranslationProvider, translation_service
        if provider.lower() == "deepl":
            if not config.DEEPL_API_KEY:
@@ -163,10 +165,13 @@ async def translate_document(
        elif provider.lower() == "libre":
            translation_provider = LibreTranslationProvider()
        elif provider.lower() == "ollama":
-            vision_model = getattr(config, 'OLLAMA_VISION_MODEL', 'llava')
+            # Use the same model for text and vision (multimodal models like gemma3, qwen3-vl)
-            translation_provider = OllamaTranslationProvider(config.OLLAMA_BASE_URL, config.OLLAMA_MODEL, vision_model)
+            model_to_use = ollama_model.strip() if ollama_model else config.OLLAMA_MODEL
-        elif provider.lower() == "webllm":
+            custom_prompt = system_prompt.strip() if system_prompt else ""
-            translation_provider = WebLLMTranslationProvider()
+            logger.info(f"Using Ollama model: {model_to_use} (text + vision)")
            if custom_prompt:
                logger.info(f"Custom system prompt provided ({len(custom_prompt)} chars)")
            translation_provider = OllamaTranslationProvider(config.OLLAMA_BASE_URL, model_to_use, model_to_use, custom_prompt)
        else:
            translation_provider = GoogleTranslationProvider()
--- a/services/translation_service.py
+++ b/services/translation_service.py
@@ -70,30 +70,65 @@ class LibreTranslationProvider(TranslationProvider):
 class OllamaTranslationProvider(TranslationProvider):
    """Ollama LLM translation implementation"""
-    def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3", vision_model: str = "llava"):
+    def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3", vision_model: str = "llava", system_prompt: str = ""):
        self.base_url = base_url.rstrip('/')
-        self.model = model
+        self.model = model.strip()  # Remove any leading/trailing whitespace
-        self.vision_model = vision_model
+        self.vision_model = vision_model.strip()
        self.custom_system_prompt = system_prompt  # Custom context, glossary, instructions
    def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
        if not text or not text.strip():
            return text
        # Skip very short text or numbers only
        if len(text.strip()) < 2 or text.strip().isdigit():
            return text
        try:
-            prompt = f"Translate the following text to {target_language}. Return ONLY the translation, nothing else:\n\n{text}"
+            # Build system prompt with custom context if provided
            base_prompt = f"You are a translator. Translate the user's text to {target_language}. Return ONLY the translation, nothing else."
            if self.custom_system_prompt:
                system_content = f"""{base_prompt}
 ADDITIONAL CONTEXT AND INSTRUCTIONS:
 {self.custom_system_prompt}"""
            else:
                system_content = base_prompt
            # Use /api/chat endpoint (more compatible with all models)
            response = requests.post(
-                f"{self.base_url}/api/generate",
+                f"{self.base_url}/api/chat",
                json={
                    "model": self.model,
-                    "prompt": prompt,
+                    "messages": [
-                    "stream": False
+                        {
                            "role": "system",
                            "content": system_content
                        },
                        {
                            "role": "user",
                            "content": text
                        }
                    ],
                    "stream": False,
                    "options": {
                        "temperature": 0.3,
                        "num_predict": 500
                    }
                },
-                timeout=30
+                timeout=120  # 2 minutes timeout
            )
            response.raise_for_status()
            result = response.json()
-            return result.get("response", text).strip()
+            translated = result.get("message", {}).get("content", "").strip()
            return translated if translated else text
        except requests.exceptions.ConnectionError:
            print(f"Ollama error: Cannot connect to {self.base_url}. Is Ollama running?")
            return text
        except requests.exceptions.Timeout:
            print(f"Ollama error: Request timeout after 120s")
            return text
        except Exception as e:
            print(f"Ollama translation error: {e}")
            return text
@@ -107,21 +142,25 @@ class OllamaTranslationProvider(TranslationProvider):
            with open(image_path, 'rb') as img_file:
                image_data = base64.b64encode(img_file.read()).decode('utf-8')
-            prompt = f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting."
+            # Use /api/chat for vision models too
            response = requests.post(
-                f"{self.base_url}/api/generate",
+                f"{self.base_url}/api/chat",
                json={
                    "model": self.vision_model,
-                    "prompt": prompt,
+                    "messages": [
-                    "images": [image_data],
+                        {
                            "role": "user",
                            "content": f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting.",
                            "images": [image_data]
                        }
                    ],
                    "stream": False
                },
                timeout=60
            )
            response.raise_for_status()
            result = response.json()
-            return result.get("response", "").strip()
+            return result.get("message", {}).get("content", "").strip()
        except Exception as e:
            print(f"Ollama vision translation error: {e}")
            return ""
@@ -158,6 +197,7 @@ class TranslationService:
        else:
            # Auto-select provider based on configuration
            self.provider = self._get_default_provider()
        self.translate_images = False  # Flag to enable image translation
    def _get_default_provider(self) -> TranslationProvider:
        """Get the default translation provider from configuration"""
@@ -182,6 +222,26 @@ class TranslationService:
        return self.provider.translate(text, target_language, source_language)
    def translate_image(self, image_path: str, target_language: str) -> str:
        """
        Translate text in an image using vision model (Ollama only)
        Args:
            image_path: Path to image file
            target_language: Target language code
        Returns:
            Translated text from image
        """
        if not self.translate_images:
            return ""
        # Only Ollama supports image translation
        if isinstance(self.provider, OllamaTranslationProvider):
            return self.provider.translate_image(image_path, target_language)
        return ""
    def translate_batch(self, texts: list[str], target_language: str, source_language: str = 'auto') -> list[str]:
        """
        Translate multiple text strings
--- a/static/index.html
+++ b/static/index.html
@@ -309,7 +309,7 @@
                </div>
                <div class="form-group">
                    <label for="ollama-model">Modèle Ollama</label>
-                    <input type="text" id="ollama-model" value="llama3" placeholder="llama3, mistral, etc.">
+                    <input type="text" id="ollama-model" value="llama3.2" placeholder="llama3.2, mistral, etc.">
                </div>
            </div>
            <button onclick="listOllamaModels()" class="btn-secondary">List Available Models</button>
@@ -318,6 +318,39 @@
            <div id="models-result"></div>
        </div>
        <!-- System Prompt for LLM Translation -->
        <div class="card">
            <h2>Translation Context (Ollama / WebLLM)</h2>
            <p style="font-size: 13px; color: #718096; margin-bottom: 15px;">
                Provide context, technical glossary, or specific instructions to improve translation quality.
            </p>
            <div class="form-group">
                <label for="system-prompt">System Prompt / Instructions</label>
                <textarea id="system-prompt" rows="4" style="width: 100%; padding: 10px 14px; border: 1px solid #cbd5e0; border-radius: 6px; font-size: 14px; font-family: inherit; resize: vertical;" placeholder="Example: You are translating HVAC technical documents. Use these terms:
 - Batterie (FR) = Coil (EN)
 - Groupe froid (FR) = Chiller (EN)
 - CTA (FR) = AHU (EN)"></textarea>
            </div>
            <div class="form-group">
                <label for="glossary">Technical Glossary (one per line: source=target)</label>
                <textarea id="glossary" rows="5" style="width: 100%; padding: 10px 14px; border: 1px solid #cbd5e0; border-radius: 6px; font-size: 13px; font-family: monospace; resize: vertical;" placeholder="batterie=coil
 groupe froid=chiller
 CTA=AHU
 échangeur=heat exchanger
 vanne 3 voies=3-way valve"></textarea>
            </div>
            <div style="display: flex; gap: 10px; flex-wrap: wrap;">
                <button onclick="loadPreset('hvac')" class="btn-secondary" style="font-size: 12px;">HVAC Preset</button>
                <button onclick="loadPreset('it')" class="btn-secondary" style="font-size: 12px;">IT Preset</button>
                <button onclick="loadPreset('legal')" class="btn-secondary" style="font-size: 12px;">Legal Preset</button>
                <button onclick="loadPreset('medical')" class="btn-secondary" style="font-size: 12px;">Medical Preset</button>
                <button onclick="clearPrompt()" class="btn-secondary" style="font-size: 12px; background: #dc2626;">Clear</button>
            </div>
        </div>
        <!-- Traduction de fichier -->
        <div class="card">
            <h2>Document Translation</h2>
@@ -335,6 +368,8 @@
                <div class="form-group">
                    <label for="target-lang">Target Language</label>
                    <select id="target-lang">
                        <option value="en">English (en)</option>
                        <option value="fa">Persian / Farsi (fa)</option>
                        <option value="es">Espagnol (es)</option>
                        <option value="fr">Français (fr)</option>
                        <option value="de">Allemand (de)</option>
@@ -350,10 +385,10 @@
                <div class="form-group">
                    <label for="provider">Translation Service</label>
-                    <select id="provider" onchange="toggleImageTranslation()">
+                    <select id="provider" onchange="toggleProviderOptions()">
                        <option value="google">Google Translate (Default)</option>
                        <option value="ollama">Ollama LLM (Local Server)</option>
-                        <option value="webllm">WebLLM (Browser - No Server)</option>
+                        <option value="webllm">WebLLM (Browser - WebGPU)</option>
                        <option value="deepl">DeepL</option>
                        <option value="libre">LibreTranslate</option>
                    </select>
@@ -363,11 +398,11 @@
            <div class="form-group" id="image-translation-option" style="display: none;">
                <label style="display: flex; align-items: center; cursor: pointer;">
                    <input type="checkbox" id="translate-images" style="width: auto; margin-right: 10px;">
-                    <span>Translate images with Ollama Vision (requires llava model)</span>
+                    <span>Translate images with vision (use multimodal models: gemma3, qwen3-vl, llava, etc.)</span>
                </label>
            </div>
-            <div class="form-group" id="webllm-info" style="display: none; padding: 12px; background: #e0f2ff; border-radius: 6px; border-left: 4px solid #2563eb;">
+            <div class="form-group" id="webllm-options" style="display: none; padding: 12px; background: #e0f2ff; border-radius: 6px; border-left: 4px solid #2563eb;">
                <p style="margin: 0 0 10px 0; font-size: 13px; color: #1e40af;">
                    <strong>WebLLM Mode:</strong> Translation runs entirely in your browser using WebGPU. First use downloads the model.
                </p>
@@ -375,8 +410,8 @@
                    <div>
                        <label for="webllm-model" style="font-size: 12px; color: #4a5568; margin-bottom: 4px;">Select Model:</label>
                        <select id="webllm-model" style="width: 100%; padding: 6px; font-size: 13px; border: 1px solid #cbd5e0; border-radius: 4px;">
                            <option value="Llama-3.2-3B-Instruct-q4f32_1-MLC">Llama 3.2 3B (~2GB) - Recommended</option>
                            <option value="Llama-3.1-8B-Instruct-q4f32_1-MLC">Llama 3.1 8B (~4.5GB)</option>
                            <option value="Llama-3.2-3B-Instruct-q4f32_1-MLC">Llama 3.2 3B (~2GB)</option>
                            <option value="Phi-3.5-mini-instruct-q4f16_1-MLC">Phi 3.5 Mini (~2.5GB)</option>
                            <option value="Mistral-7B-Instruct-v0.3-q4f16_1-MLC">Mistral 7B (~4.5GB)</option>
                            <option value="gemma-2-2b-it-q4f16_1-MLC">Gemma 2 2B (~1.5GB)</option>
@@ -386,6 +421,7 @@
                        Clear Cache
                    </button>
                </div>
                <div id="webllm-status" style="margin-top: 10px; font-size: 12px; color: #4a5568;"></div>
            </div>
            <button onclick="translateFile()">Translate Document</button>
@@ -445,26 +481,193 @@
            }
        }
-        // Toggle image translation option based on provider
+        // Toggle provider options based on selection
-        function toggleImageTranslation() {
+        // Preset templates for different domains
-            const provider = document.getElementById('provider').value;
+        const presets = {
-            const imageOption = document.getElementById('image-translation-option');
+            hvac: {
-            const webllmInfo = document.getElementById('webllm-info');
+                prompt: `You are translating HVAC (Heating, Ventilation, Air Conditioning) technical documents. 
-            
+Use precise technical terminology. Maintain consistency with industry standards.
-            if (provider === 'ollama') {
+Keep unit measurements (kW, m³/h, Pa) unchanged.
-                imageOption.style.display = 'block';
+Translate component names according to the glossary provided.`,
-                webllmInfo.style.display = 'none';
+                glossary: `batterie=coil
-            } else if (provider === 'webllm') {
+groupe froid=chiller
-                imageOption.style.display = 'none';
+CTA=AHU (Air Handling Unit)
-                webllmInfo.style.display = 'block';
+échangeur=heat exchanger
-                document.getElementById('translate-images').checked = false;
+vanne 3 voies=3-way valve
-            } else {
+détendeur=expansion valve
-                imageOption.style.display = 'none';
+compresseur=compressor
-                webllmInfo.style.display = 'none';
+évaporateur=evaporator
-                document.getElementById('translate-images').checked = false;
+condenseur=condenser
 fluide frigorigène=refrigerant
 débit d'air=airflow
 pression statique=static pressure
 récupérateur=heat recovery unit
 ventilo-convecteur=fan coil unit
 gaine=duct
 diffuseur=diffuser
 registre=damper`
            },
            it: {
                prompt: `You are translating IT and software documentation.
 Keep technical terms, code snippets, and variable names unchanged.
 Translate UI labels and user-facing text appropriately.
 Maintain formatting markers like **bold** and \`code\`.`,
                glossary: `serveur=server
 base de données=database
 requête=query
 sauvegarde=backup
 mise à jour=update
 télécharger=download
 téléverser=upload
 mot de passe=password
 identifiant=username
 pare-feu=firewall
 réseau=network
 stockage=storage
 conteneur=container
 déploiement=deployment`
            },
            legal: {
                prompt: `You are translating legal documents.
 Use formal legal terminology. Be precise and unambiguous.
 Maintain references to laws, articles, and clauses in their original form.
 Use standard legal phrases for the target language.`,
                glossary: `contrat=contract
 clause=clause
 partie=party
 signataire=signatory
 résiliation=termination
 préavis=notice period
 dommages et intérêts=damages
 responsabilité=liability
 juridiction=jurisdiction
 arbitrage=arbitration
 avenant=amendment
 ayant droit=beneficiary`
            },
            medical: {
                prompt: `You are translating medical and healthcare documents.
 Use standard medical terminology (Latin/Greek roots when appropriate).
 Keep drug names, dosages, and medical codes unchanged.
 Be precise with anatomical terms and procedures.`,
                glossary: `patient=patient
 ordonnance=prescription
 posologie=dosage
 effet secondaire=side effect
 contre-indication=contraindication
 diagnostic=diagnosis
 symptôme=symptom
 traitement=treatment
 chirurgie=surgery
 anesthésie=anesthesia
 perfusion=infusion
 prélèvement=sample collection`
            }
        };
        function loadPreset(presetName) {
            const preset = presets[presetName];
            if (preset) {
                document.getElementById('system-prompt').value = preset.prompt;
                document.getElementById('glossary').value = preset.glossary;
            }
        }
        function clearPrompt() {
            document.getElementById('system-prompt').value = '';
            document.getElementById('glossary').value = '';
        }
        function getFullSystemPrompt() {
            let prompt = document.getElementById('system-prompt').value || '';
            const glossary = document.getElementById('glossary').value || '';
            if (glossary.trim()) {
                prompt += '\n\nGLOSSARY (use these exact translations):\n' + glossary;
            }
            return prompt;
        }
        function toggleProviderOptions() {
            const provider = document.getElementById('provider').value;
            const imageOption = document.getElementById('image-translation-option');
            const webllmOptions = document.getElementById('webllm-options');
            // Hide all options first
            imageOption.style.display = 'none';
            webllmOptions.style.display = 'none';
            document.getElementById('translate-images').checked = false;
            if (provider === 'ollama') {
                imageOption.style.display = 'block';
            } else if (provider === 'webllm') {
                webllmOptions.style.display = 'block';
            }
        }
        // WebLLM engine instance
        let webllmEngine = null;
        let webllmReady = false;
        // Initialize WebLLM
        async function initWebLLM(modelId) {
            const statusDiv = document.getElementById('webllm-status');
            statusDiv.innerHTML = '⏳ Loading WebLLM...';
            try {
                // Dynamically import WebLLM
                const webllm = await import('https://esm.run/@mlc-ai/web-llm');
                statusDiv.innerHTML = '⏳ Downloading model (this may take a while on first use)...';
                webllmEngine = await webllm.CreateMLCEngine(modelId, {
                    initProgressCallback: (progress) => {
                        statusDiv.innerHTML = `⏳ ${progress.text}`;
                    }
                });
                webllmReady = true;
                statusDiv.innerHTML = '✅ Model loaded and ready!';
                return true;
            } catch (error) {
                statusDiv.innerHTML = `❌ Error: ${error.message}`;
                console.error('WebLLM init error:', error);
                return false;
            }
        }
        // Translate text with WebLLM
        async function translateWithWebLLM(text, targetLang) {
            if (!webllmEngine) return text;
            try {
                // Build system prompt with custom context and glossary
                let systemPrompt = `You are a translator. Translate the user's text to ${targetLang}. Return ONLY the translation, nothing else.`;
                const customPrompt = getFullSystemPrompt();
                if (customPrompt.trim()) {
                    systemPrompt = `You are a translator. Translate the user's text to ${targetLang}. Return ONLY the translation, nothing else.
 ADDITIONAL CONTEXT AND INSTRUCTIONS:
 ${customPrompt}`;
                }
                const response = await webllmEngine.chat.completions.create({
                    messages: [
                        { role: "system", content: systemPrompt },
                        { role: "user", content: text }
                    ],
                    temperature: 0.3,
                    max_tokens: 500
                });
                return response.choices[0].message.content.trim();
            } catch (error) {
                console.error('WebLLM translation error:', error);
                return text;
            }
        }
        // Liste des modèles Ollama
        async function listOllamaModels() {
            const url = document.getElementById('ollama-url').value;
@@ -553,11 +756,19 @@
                return;
            }
            // Get Ollama model from configuration field (used for both text and vision)
            const ollamaModel = document.getElementById('ollama-model').value || 'llama3.2';
            // Get custom system prompt with glossary
            const systemPrompt = getFullSystemPrompt();
            const formData = new FormData();
            formData.append('file', fileInput.files[0]);
            formData.append('target_language', targetLang);
            formData.append('provider', provider);
            formData.append('translate_images', translateImages);
            formData.append('ollama_model', ollamaModel);
            formData.append('system_prompt', systemPrompt);
            loadingDiv.classList.add('active');
            progressContainer.classList.add('active');
--- a/translators/excel_translator.py
+++ b/translators/excel_translator.py
@@ -3,6 +3,8 @@ Excel Translation Module
 Translates Excel files while preserving all formatting, formulas, images, and layout
 """
 import re
 import tempfile
 import os
 from pathlib import Path
 from typing import Dict, Set
 from openpyxl import load_workbook
@@ -40,6 +42,10 @@ class ExcelTranslator:
            worksheet = workbook[sheet_name]
            self._translate_worksheet(worksheet, target_language)
            # Translate images if enabled
            if getattr(self.translation_service, 'translate_images', False):
                self._translate_images(worksheet, target_language)
            # Prepare translated sheet name (but don't rename yet)
            translated_sheet_name = self.translation_service.translate_text(
                sheet_name, target_language
@@ -155,6 +161,54 @@ class ExcelTranslator:
            return False
        return True
    def _translate_images(self, worksheet: Worksheet, target_language: str):
        """
        Translate text in images using vision model and add as comments
        """
        from services.translation_service import OllamaTranslationProvider
        if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
            return
        try:
            # Get images from worksheet
            images = getattr(worksheet, '_images', [])
            for idx, image in enumerate(images):
                try:
                    # Get image data
                    image_data = image._data()
                    ext = image.format or 'png'
                    # Save to temp file
                    with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
                        tmp.write(image_data)
                        tmp_path = tmp.name
                    # Translate with vision
                    translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
                    # Clean up
                    os.unlink(tmp_path)
                    if translated_text and translated_text.strip():
                        # Add translation as a cell near the image
                        anchor = image.anchor
                        if hasattr(anchor, '_from'):
                            cell_ref = f"{get_column_letter(anchor._from.col + 1)}{anchor._from.row + 1}"
                            cell = worksheet[cell_ref]
                            # Add as comment
                            from openpyxl.comments import Comment
                            cell.comment = Comment(f"Image translation: {translated_text}", "Translator")
                            print(f"Added Excel image translation at {cell_ref}: {translated_text[:50]}...")
                except Exception as e:
                    print(f"Error translating Excel image {idx}: {e}")
                    continue
        except Exception as e:
            print(f"Error processing Excel images: {e}")
 # Global translator instance
--- a/translators/pptx_translator.py
+++ b/translators/pptx_translator.py
@@ -9,6 +9,8 @@ from pptx.shapes.group import GroupShape
 from pptx.util import Inches, Pt
 from pptx.enum.shapes import MSO_SHAPE_TYPE
 from services.translation_service import translation_service
 import tempfile
 import os
 class PowerPointTranslator:
@@ -32,21 +34,23 @@ class PowerPointTranslator:
        presentation = Presentation(input_path)
        # Translate each slide
-        for slide in presentation.slides:
+        for slide_idx, slide in enumerate(presentation.slides):
-            self._translate_slide(slide, target_language)
+            self._translate_slide(slide, target_language, slide_idx + 1, input_path)
        # Save the translated presentation
        presentation.save(output_path)
        return output_path
-    def _translate_slide(self, slide, target_language: str):
+    def _translate_slide(self, slide, target_language: str, slide_num: int, input_path: Path):
        """
        Translate all text elements in a slide while preserving layout
        Args:
            slide: Slide to translate
            target_language: Target language code
            slide_num: Slide number for reference
            input_path: Path to source file for image extraction
        """
        # Translate notes (speaker notes)
        if slide.has_notes_slide:
@@ -56,15 +60,16 @@ class PowerPointTranslator:
        # Translate shapes in the slide
        for shape in slide.shapes:
-            self._translate_shape(shape, target_language)
+            self._translate_shape(shape, target_language, slide)
-    def _translate_shape(self, shape: BaseShape, target_language: str):
+    def _translate_shape(self, shape: BaseShape, target_language: str, slide=None):
        """
        Translate text in a shape based on its type
        Args:
            shape: Shape to translate
            target_language: Target language code
            slide: Parent slide for adding image translations
        """
        # Handle text-containing shapes
        if shape.has_text_frame:
@@ -74,20 +79,72 @@ class PowerPointTranslator:
        if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
            self._translate_table(shape.table, target_language)
        # Handle pictures/images
        if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
            self._translate_image_shape(shape, target_language, slide)
        # Handle group shapes (shapes within shapes)
        if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
            for sub_shape in shape.shapes:
-                self._translate_shape(sub_shape, target_language)
+                self._translate_shape(sub_shape, target_language, slide)
        # Handle smart art (contains multiple shapes)
        # Smart art is complex, but we can try to translate text within it
        if hasattr(shape, 'shapes'):
            try:
                for sub_shape in shape.shapes:
-                    self._translate_shape(sub_shape, target_language)
+                    self._translate_shape(sub_shape, target_language, slide)
            except:
                pass  # Some shapes may not support iteration
    def _translate_image_shape(self, shape, target_language: str, slide):
        """
        Translate text in an image using vision model and add as text box
        """
        if not getattr(self.translation_service, 'translate_images', False):
            return
        from services.translation_service import OllamaTranslationProvider
        if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
            return
        try:
            # Get image blob
            image_blob = shape.image.blob
            ext = shape.image.ext
            # Save to temp file
            with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
                tmp.write(image_blob)
                tmp_path = tmp.name
            # Translate with vision
            translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
            # Clean up
            os.unlink(tmp_path)
            if translated_text and translated_text.strip():
                # Add text box below the image with translation
                left = shape.left
                top = shape.top + shape.height + Inches(0.1)
                width = shape.width
                height = Inches(0.5)
                # Add text box
                textbox = slide.shapes.add_textbox(left, top, width, height)
                tf = textbox.text_frame
                p = tf.paragraphs[0]
                p.text = f"[{translated_text}]"
                p.font.size = Pt(10)
                p.font.italic = True
                print(f"Added image translation: {translated_text[:50]}...")
        except Exception as e:
            print(f"Error translating image: {e}")
    def _translate_text_frame(self, text_frame, target_language: str):
        """
        Translate text within a text frame while preserving formatting
--- a/translators/word_translator.py
+++ b/translators/word_translator.py
@@ -9,7 +9,11 @@ from docx.table import Table, _Cell
 from docx.oxml.text.paragraph import CT_P
 from docx.oxml.table import CT_Tbl
 from docx.section import Section
 from docx.shared import Inches, Pt
 from docx.oxml.ns import qn
 from services.translation_service import translation_service
 import tempfile
 import os
 class WordTranslator:
@@ -39,11 +43,66 @@ class WordTranslator:
        for section in document.sections:
            self._translate_section(section, target_language)
        # Translate images if enabled
        if getattr(self.translation_service, 'translate_images', False):
            self._translate_images(document, target_language, input_path)
        # Save the translated document
        document.save(output_path)
        return output_path
    def _translate_images(self, document: Document, target_language: str, input_path: Path):
        """
        Extract text from images and add translations as captions
        """
        from services.translation_service import OllamaTranslationProvider
        # Only works with Ollama vision
        if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
            return
        try:
            import zipfile
            import base64
            # Extract images from docx (it's a zip file)
            with zipfile.ZipFile(input_path, 'r') as zip_ref:
                image_files = [f for f in zip_ref.namelist() if f.startswith('word/media/')]
                for idx, image_file in enumerate(image_files):
                    try:
                        # Extract image
                        image_data = zip_ref.read(image_file)
                        # Create temp file
                        ext = os.path.splitext(image_file)[1]
                        with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
                            tmp.write(image_data)
                            tmp_path = tmp.name
                        # Translate image with vision
                        translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
                        # Clean up temp file
                        os.unlink(tmp_path)
                        if translated_text and translated_text.strip():
                            # Add translated text as a new paragraph after image
                            # We'll add it at the end with a note
                            p = document.add_paragraph()
                            p.add_run(f"[Image {idx + 1} translation: ").bold = True
                            p.add_run(translated_text)
                            p.add_run("]").bold = True
                            print(f"Translated image {idx + 1}: {translated_text[:50]}...")
                    except Exception as e:
                        print(f"Error translating image {image_file}: {e}")
                        continue
        except Exception as e:
            print(f"Error processing images: {e}")
    def _translate_document_body(self, document: Document, target_language: str):
        """
        Translate all elements in the document body