Add system prompt, glossary, presets for Ollama/WebLLM, image translation support

2025-11-30 16:45:41 +01:00
parent 465cab8a61
commit e48ea07e44
6 changed files with 497 additions and 51 deletions
--- a/translators/excel_translator.py
+++ b/translators/excel_translator.py
@@ -3,6 +3,8 @@ Excel Translation Module
 Translates Excel files while preserving all formatting, formulas, images, and layout
 """
 import re
+import tempfile
+import os
 from pathlib import Path
 from typing import Dict, Set
 from openpyxl import load_workbook
@@ -40,6 +42,10 @@ class ExcelTranslator:
            worksheet = workbook[sheet_name]
            self._translate_worksheet(worksheet, target_language)
            
+            # Translate images if enabled
+            if getattr(self.translation_service, 'translate_images', False):
+                self._translate_images(worksheet, target_language)
+            
            # Prepare translated sheet name (but don't rename yet)
            translated_sheet_name = self.translation_service.translate_text(
                sheet_name, target_language
@@ -155,6 +161,54 @@ class ExcelTranslator:
            return False
        
        return True
+    
+    def _translate_images(self, worksheet: Worksheet, target_language: str):
+        """
+        Translate text in images using vision model and add as comments
+        """
+        from services.translation_service import OllamaTranslationProvider
+        
+        if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
+            return
+        
+        try:
+            # Get images from worksheet
+            images = getattr(worksheet, '_images', [])
+            
+            for idx, image in enumerate(images):
+                try:
+                    # Get image data
+                    image_data = image._data()
+                    ext = image.format or 'png'
+                    
+                    # Save to temp file
+                    with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
+                        tmp.write(image_data)
+                        tmp_path = tmp.name
+                    
+                    # Translate with vision
+                    translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
+                    
+                    # Clean up
+                    os.unlink(tmp_path)
+                    
+                    if translated_text and translated_text.strip():
+                        # Add translation as a cell near the image
+                        anchor = image.anchor
+                        if hasattr(anchor, '_from'):
+                            cell_ref = f"{get_column_letter(anchor._from.col + 1)}{anchor._from.row + 1}"
+                            cell = worksheet[cell_ref]
+                            # Add as comment
+                            from openpyxl.comments import Comment
+                            cell.comment = Comment(f"Image translation: {translated_text}", "Translator")
+                            print(f"Added Excel image translation at {cell_ref}: {translated_text[:50]}...")
+                            
+                except Exception as e:
+                    print(f"Error translating Excel image {idx}: {e}")
+                    continue
+                    
+        except Exception as e:
+            print(f"Error processing Excel images: {e}")


 # Global translator instance
--- a/translators/pptx_translator.py
+++ b/translators/pptx_translator.py
@@ -9,6 +9,8 @@ from pptx.shapes.group import GroupShape
 from pptx.util import Inches, Pt
 from pptx.enum.shapes import MSO_SHAPE_TYPE
 from services.translation_service import translation_service
+import tempfile
+import os


 class PowerPointTranslator:
@@ -32,21 +34,23 @@ class PowerPointTranslator:
        presentation = Presentation(input_path)
        
        # Translate each slide
-        for slide in presentation.slides:
-            self._translate_slide(slide, target_language)
+        for slide_idx, slide in enumerate(presentation.slides):
+            self._translate_slide(slide, target_language, slide_idx + 1, input_path)
        
        # Save the translated presentation
        presentation.save(output_path)
        
        return output_path
    
-    def _translate_slide(self, slide, target_language: str):
+    def _translate_slide(self, slide, target_language: str, slide_num: int, input_path: Path):
        """
        Translate all text elements in a slide while preserving layout
        
        Args:
            slide: Slide to translate
            target_language: Target language code
+            slide_num: Slide number for reference
+            input_path: Path to source file for image extraction
        """
        # Translate notes (speaker notes)
        if slide.has_notes_slide:
@@ -56,15 +60,16 @@ class PowerPointTranslator:
        
        # Translate shapes in the slide
        for shape in slide.shapes:
-            self._translate_shape(shape, target_language)
+            self._translate_shape(shape, target_language, slide)
    
-    def _translate_shape(self, shape: BaseShape, target_language: str):
+    def _translate_shape(self, shape: BaseShape, target_language: str, slide=None):
        """
        Translate text in a shape based on its type
        
        Args:
            shape: Shape to translate
            target_language: Target language code
+            slide: Parent slide for adding image translations
        """
        # Handle text-containing shapes
        if shape.has_text_frame:
@@ -74,20 +79,72 @@ class PowerPointTranslator:
        if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
            self._translate_table(shape.table, target_language)
        
+        # Handle pictures/images
+        if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
+            self._translate_image_shape(shape, target_language, slide)
+        
        # Handle group shapes (shapes within shapes)
        if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
            for sub_shape in shape.shapes:
-                self._translate_shape(sub_shape, target_language)
+                self._translate_shape(sub_shape, target_language, slide)
        
        # Handle smart art (contains multiple shapes)
        # Smart art is complex, but we can try to translate text within it
        if hasattr(shape, 'shapes'):
            try:
                for sub_shape in shape.shapes:
-                    self._translate_shape(sub_shape, target_language)
+                    self._translate_shape(sub_shape, target_language, slide)
            except:
                pass  # Some shapes may not support iteration
    
+    def _translate_image_shape(self, shape, target_language: str, slide):
+        """
+        Translate text in an image using vision model and add as text box
+        """
+        if not getattr(self.translation_service, 'translate_images', False):
+            return
+        
+        from services.translation_service import OllamaTranslationProvider
+        
+        if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
+            return
+        
+        try:
+            # Get image blob
+            image_blob = shape.image.blob
+            ext = shape.image.ext
+            
+            # Save to temp file
+            with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
+                tmp.write(image_blob)
+                tmp_path = tmp.name
+            
+            # Translate with vision
+            translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
+            
+            # Clean up
+            os.unlink(tmp_path)
+            
+            if translated_text and translated_text.strip():
+                # Add text box below the image with translation
+                left = shape.left
+                top = shape.top + shape.height + Inches(0.1)
+                width = shape.width
+                height = Inches(0.5)
+                
+                # Add text box
+                textbox = slide.shapes.add_textbox(left, top, width, height)
+                tf = textbox.text_frame
+                p = tf.paragraphs[0]
+                p.text = f"[{translated_text}]"
+                p.font.size = Pt(10)
+                p.font.italic = True
+                
+                print(f"Added image translation: {translated_text[:50]}...")
+                
+        except Exception as e:
+            print(f"Error translating image: {e}")
+    
    def _translate_text_frame(self, text_frame, target_language: str):
        """
        Translate text within a text frame while preserving formatting
--- a/translators/word_translator.py
+++ b/translators/word_translator.py
@@ -9,7 +9,11 @@ from docx.table import Table, _Cell
 from docx.oxml.text.paragraph import CT_P
 from docx.oxml.table import CT_Tbl
 from docx.section import Section
+from docx.shared import Inches, Pt
+from docx.oxml.ns import qn
 from services.translation_service import translation_service
+import tempfile
+import os


 class WordTranslator:
@@ -39,11 +43,66 @@ class WordTranslator:
        for section in document.sections:
            self._translate_section(section, target_language)
        
+        # Translate images if enabled
+        if getattr(self.translation_service, 'translate_images', False):
+            self._translate_images(document, target_language, input_path)
+        
        # Save the translated document
        document.save(output_path)
        
        return output_path
    
+    def _translate_images(self, document: Document, target_language: str, input_path: Path):
+        """
+        Extract text from images and add translations as captions
+        """
+        from services.translation_service import OllamaTranslationProvider
+        
+        # Only works with Ollama vision
+        if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
+            return
+        
+        try:
+            import zipfile
+            import base64
+            
+            # Extract images from docx (it's a zip file)
+            with zipfile.ZipFile(input_path, 'r') as zip_ref:
+                image_files = [f for f in zip_ref.namelist() if f.startswith('word/media/')]
+                
+                for idx, image_file in enumerate(image_files):
+                    try:
+                        # Extract image
+                        image_data = zip_ref.read(image_file)
+                        
+                        # Create temp file
+                        ext = os.path.splitext(image_file)[1]
+                        with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
+                            tmp.write(image_data)
+                            tmp_path = tmp.name
+                        
+                        # Translate image with vision
+                        translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
+                        
+                        # Clean up temp file
+                        os.unlink(tmp_path)
+                        
+                        if translated_text and translated_text.strip():
+                            # Add translated text as a new paragraph after image
+                            # We'll add it at the end with a note
+                            p = document.add_paragraph()
+                            p.add_run(f"[Image {idx + 1} translation: ").bold = True
+                            p.add_run(translated_text)
+                            p.add_run("]").bold = True
+                            
+                            print(f"Translated image {idx + 1}: {translated_text[:50]}...")
+                    except Exception as e:
+                        print(f"Error translating image {image_file}: {e}")
+                        continue
+                        
+        except Exception as e:
+            print(f"Error processing images: {e}")
+    
    def _translate_document_body(self, document: Document, target_language: str):
        """
        Translate all elements in the document body