+
@@ -445,26 +481,193 @@
}
}
- // Toggle image translation option based on provider
- function toggleImageTranslation() {
- const provider = document.getElementById('provider').value;
- const imageOption = document.getElementById('image-translation-option');
- const webllmInfo = document.getElementById('webllm-info');
-
- if (provider === 'ollama') {
- imageOption.style.display = 'block';
- webllmInfo.style.display = 'none';
- } else if (provider === 'webllm') {
- imageOption.style.display = 'none';
- webllmInfo.style.display = 'block';
- document.getElementById('translate-images').checked = false;
- } else {
- imageOption.style.display = 'none';
- webllmInfo.style.display = 'none';
- document.getElementById('translate-images').checked = false;
+ // Toggle provider options based on selection
+ // Preset templates for different domains
+ const presets = {
+ hvac: {
+ prompt: `You are translating HVAC (Heating, Ventilation, Air Conditioning) technical documents.
+Use precise technical terminology. Maintain consistency with industry standards.
+Keep unit measurements (kW, m³/h, Pa) unchanged.
+Translate component names according to the glossary provided.`,
+ glossary: `batterie=coil
+groupe froid=chiller
+CTA=AHU (Air Handling Unit)
+échangeur=heat exchanger
+vanne 3 voies=3-way valve
+détendeur=expansion valve
+compresseur=compressor
+évaporateur=evaporator
+condenseur=condenser
+fluide frigorigène=refrigerant
+débit d'air=airflow
+pression statique=static pressure
+récupérateur=heat recovery unit
+ventilo-convecteur=fan coil unit
+gaine=duct
+diffuseur=diffuser
+registre=damper`
+ },
+ it: {
+ prompt: `You are translating IT and software documentation.
+Keep technical terms, code snippets, and variable names unchanged.
+Translate UI labels and user-facing text appropriately.
+Maintain formatting markers like **bold** and \`code\`.`,
+ glossary: `serveur=server
+base de données=database
+requête=query
+sauvegarde=backup
+mise à jour=update
+télécharger=download
+téléverser=upload
+mot de passe=password
+identifiant=username
+pare-feu=firewall
+réseau=network
+stockage=storage
+conteneur=container
+déploiement=deployment`
+ },
+ legal: {
+ prompt: `You are translating legal documents.
+Use formal legal terminology. Be precise and unambiguous.
+Maintain references to laws, articles, and clauses in their original form.
+Use standard legal phrases for the target language.`,
+ glossary: `contrat=contract
+clause=clause
+partie=party
+signataire=signatory
+résiliation=termination
+préavis=notice period
+dommages et intérêts=damages
+responsabilité=liability
+juridiction=jurisdiction
+arbitrage=arbitration
+avenant=amendment
+ayant droit=beneficiary`
+ },
+ medical: {
+ prompt: `You are translating medical and healthcare documents.
+Use standard medical terminology (Latin/Greek roots when appropriate).
+Keep drug names, dosages, and medical codes unchanged.
+Be precise with anatomical terms and procedures.`,
+ glossary: `patient=patient
+ordonnance=prescription
+posologie=dosage
+effet secondaire=side effect
+contre-indication=contraindication
+diagnostic=diagnosis
+symptôme=symptom
+traitement=treatment
+chirurgie=surgery
+anesthésie=anesthesia
+perfusion=infusion
+prélèvement=sample collection`
+ }
+ };
+
+ function loadPreset(presetName) {
+ const preset = presets[presetName];
+ if (preset) {
+ document.getElementById('system-prompt').value = preset.prompt;
+ document.getElementById('glossary').value = preset.glossary;
}
}
+ function clearPrompt() {
+ document.getElementById('system-prompt').value = '';
+ document.getElementById('glossary').value = '';
+ }
+
+ function getFullSystemPrompt() {
+ let prompt = document.getElementById('system-prompt').value || '';
+ const glossary = document.getElementById('glossary').value || '';
+
+ if (glossary.trim()) {
+ prompt += '\n\nGLOSSARY (use these exact translations):\n' + glossary;
+ }
+
+ return prompt;
+ }
+
+ function toggleProviderOptions() {
+ const provider = document.getElementById('provider').value;
+ const imageOption = document.getElementById('image-translation-option');
+ const webllmOptions = document.getElementById('webllm-options');
+
+ // Hide all options first
+ imageOption.style.display = 'none';
+ webllmOptions.style.display = 'none';
+ document.getElementById('translate-images').checked = false;
+
+ if (provider === 'ollama') {
+ imageOption.style.display = 'block';
+ } else if (provider === 'webllm') {
+ webllmOptions.style.display = 'block';
+ }
+ }
+
+ // WebLLM engine instance
+ let webllmEngine = null;
+ let webllmReady = false;
+
+ // Initialize WebLLM
+ async function initWebLLM(modelId) {
+ const statusDiv = document.getElementById('webllm-status');
+ statusDiv.innerHTML = '⏳ Loading WebLLM...';
+
+ try {
+ // Dynamically import WebLLM
+ const webllm = await import('https://esm.run/@mlc-ai/web-llm');
+
+ statusDiv.innerHTML = '⏳ Downloading model (this may take a while on first use)...';
+
+ webllmEngine = await webllm.CreateMLCEngine(modelId, {
+ initProgressCallback: (progress) => {
+ statusDiv.innerHTML = `⏳ ${progress.text}`;
+ }
+ });
+
+ webllmReady = true;
+ statusDiv.innerHTML = '✅ Model loaded and ready!';
+ return true;
+ } catch (error) {
+ statusDiv.innerHTML = `❌ Error: ${error.message}`;
+ console.error('WebLLM init error:', error);
+ return false;
+ }
+ }
+
+ // Translate text with WebLLM
+ async function translateWithWebLLM(text, targetLang) {
+ if (!webllmEngine) return text;
+
+ try {
+ // Build system prompt with custom context and glossary
+ let systemPrompt = `You are a translator. Translate the user's text to ${targetLang}. Return ONLY the translation, nothing else.`;
+
+ const customPrompt = getFullSystemPrompt();
+ if (customPrompt.trim()) {
+ systemPrompt = `You are a translator. Translate the user's text to ${targetLang}. Return ONLY the translation, nothing else.
+
+ADDITIONAL CONTEXT AND INSTRUCTIONS:
+${customPrompt}`;
+ }
+
+ const response = await webllmEngine.chat.completions.create({
+ messages: [
+ { role: "system", content: systemPrompt },
+ { role: "user", content: text }
+ ],
+ temperature: 0.3,
+ max_tokens: 500
+ });
+
+ return response.choices[0].message.content.trim();
+ } catch (error) {
+ console.error('WebLLM translation error:', error);
+ return text;
+ }
+ }
// Liste des modèles Ollama
async function listOllamaModels() {
const url = document.getElementById('ollama-url').value;
@@ -553,11 +756,19 @@
return;
}
+ // Get Ollama model from configuration field (used for both text and vision)
+ const ollamaModel = document.getElementById('ollama-model').value || 'llama3.2';
+
+ // Get custom system prompt with glossary
+ const systemPrompt = getFullSystemPrompt();
+
const formData = new FormData();
formData.append('file', fileInput.files[0]);
formData.append('target_language', targetLang);
formData.append('provider', provider);
formData.append('translate_images', translateImages);
+ formData.append('ollama_model', ollamaModel);
+ formData.append('system_prompt', systemPrompt);
loadingDiv.classList.add('active');
progressContainer.classList.add('active');
diff --git a/translators/excel_translator.py b/translators/excel_translator.py
index 503561a..d49caa0 100644
--- a/translators/excel_translator.py
+++ b/translators/excel_translator.py
@@ -3,6 +3,8 @@ Excel Translation Module
Translates Excel files while preserving all formatting, formulas, images, and layout
"""
import re
+import tempfile
+import os
from pathlib import Path
from typing import Dict, Set
from openpyxl import load_workbook
@@ -40,6 +42,10 @@ class ExcelTranslator:
worksheet = workbook[sheet_name]
self._translate_worksheet(worksheet, target_language)
+ # Translate images if enabled
+ if getattr(self.translation_service, 'translate_images', False):
+ self._translate_images(worksheet, target_language)
+
# Prepare translated sheet name (but don't rename yet)
translated_sheet_name = self.translation_service.translate_text(
sheet_name, target_language
@@ -155,6 +161,54 @@ class ExcelTranslator:
return False
return True
+
+ def _translate_images(self, worksheet: Worksheet, target_language: str):
+ """
+ Translate text in images using vision model and add as comments
+ """
+ from services.translation_service import OllamaTranslationProvider
+
+ if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
+ return
+
+ try:
+ # Get images from worksheet
+ images = getattr(worksheet, '_images', [])
+
+ for idx, image in enumerate(images):
+ try:
+ # Get image data
+ image_data = image._data()
+ ext = image.format or 'png'
+
+ # Save to temp file
+ with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
+ tmp.write(image_data)
+ tmp_path = tmp.name
+
+ # Translate with vision
+ translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
+
+ # Clean up
+ os.unlink(tmp_path)
+
+ if translated_text and translated_text.strip():
+ # Add translation as a cell near the image
+ anchor = image.anchor
+ if hasattr(anchor, '_from'):
+ cell_ref = f"{get_column_letter(anchor._from.col + 1)}{anchor._from.row + 1}"
+ cell = worksheet[cell_ref]
+ # Add as comment
+ from openpyxl.comments import Comment
+ cell.comment = Comment(f"Image translation: {translated_text}", "Translator")
+ print(f"Added Excel image translation at {cell_ref}: {translated_text[:50]}...")
+
+ except Exception as e:
+ print(f"Error translating Excel image {idx}: {e}")
+ continue
+
+ except Exception as e:
+ print(f"Error processing Excel images: {e}")
# Global translator instance
diff --git a/translators/pptx_translator.py b/translators/pptx_translator.py
index 973bee3..3bd6388 100644
--- a/translators/pptx_translator.py
+++ b/translators/pptx_translator.py
@@ -9,6 +9,8 @@ from pptx.shapes.group import GroupShape
from pptx.util import Inches, Pt
from pptx.enum.shapes import MSO_SHAPE_TYPE
from services.translation_service import translation_service
+import tempfile
+import os
class PowerPointTranslator:
@@ -32,21 +34,23 @@ class PowerPointTranslator:
presentation = Presentation(input_path)
# Translate each slide
- for slide in presentation.slides:
- self._translate_slide(slide, target_language)
+ for slide_idx, slide in enumerate(presentation.slides):
+ self._translate_slide(slide, target_language, slide_idx + 1, input_path)
# Save the translated presentation
presentation.save(output_path)
return output_path
- def _translate_slide(self, slide, target_language: str):
+ def _translate_slide(self, slide, target_language: str, slide_num: int, input_path: Path):
"""
Translate all text elements in a slide while preserving layout
Args:
slide: Slide to translate
target_language: Target language code
+ slide_num: Slide number for reference
+ input_path: Path to source file for image extraction
"""
# Translate notes (speaker notes)
if slide.has_notes_slide:
@@ -56,15 +60,16 @@ class PowerPointTranslator:
# Translate shapes in the slide
for shape in slide.shapes:
- self._translate_shape(shape, target_language)
+ self._translate_shape(shape, target_language, slide)
- def _translate_shape(self, shape: BaseShape, target_language: str):
+ def _translate_shape(self, shape: BaseShape, target_language: str, slide=None):
"""
Translate text in a shape based on its type
Args:
shape: Shape to translate
target_language: Target language code
+ slide: Parent slide for adding image translations
"""
# Handle text-containing shapes
if shape.has_text_frame:
@@ -74,20 +79,72 @@ class PowerPointTranslator:
if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
self._translate_table(shape.table, target_language)
+ # Handle pictures/images
+ if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
+ self._translate_image_shape(shape, target_language, slide)
+
# Handle group shapes (shapes within shapes)
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
for sub_shape in shape.shapes:
- self._translate_shape(sub_shape, target_language)
+ self._translate_shape(sub_shape, target_language, slide)
# Handle smart art (contains multiple shapes)
# Smart art is complex, but we can try to translate text within it
if hasattr(shape, 'shapes'):
try:
for sub_shape in shape.shapes:
- self._translate_shape(sub_shape, target_language)
+ self._translate_shape(sub_shape, target_language, slide)
except:
pass # Some shapes may not support iteration
+ def _translate_image_shape(self, shape, target_language: str, slide):
+ """
+ Translate text in an image using vision model and add as text box
+ """
+ if not getattr(self.translation_service, 'translate_images', False):
+ return
+
+ from services.translation_service import OllamaTranslationProvider
+
+ if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
+ return
+
+ try:
+ # Get image blob
+ image_blob = shape.image.blob
+ ext = shape.image.ext
+
+ # Save to temp file
+ with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
+ tmp.write(image_blob)
+ tmp_path = tmp.name
+
+ # Translate with vision
+ translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
+
+ # Clean up
+ os.unlink(tmp_path)
+
+ if translated_text and translated_text.strip():
+ # Add text box below the image with translation
+ left = shape.left
+ top = shape.top + shape.height + Inches(0.1)
+ width = shape.width
+ height = Inches(0.5)
+
+ # Add text box
+ textbox = slide.shapes.add_textbox(left, top, width, height)
+ tf = textbox.text_frame
+ p = tf.paragraphs[0]
+ p.text = f"[{translated_text}]"
+ p.font.size = Pt(10)
+ p.font.italic = True
+
+ print(f"Added image translation: {translated_text[:50]}...")
+
+ except Exception as e:
+ print(f"Error translating image: {e}")
+
def _translate_text_frame(self, text_frame, target_language: str):
"""
Translate text within a text frame while preserving formatting
diff --git a/translators/word_translator.py b/translators/word_translator.py
index d06fd1e..41be052 100644
--- a/translators/word_translator.py
+++ b/translators/word_translator.py
@@ -9,7 +9,11 @@ from docx.table import Table, _Cell
from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl
from docx.section import Section
+from docx.shared import Inches, Pt
+from docx.oxml.ns import qn
from services.translation_service import translation_service
+import tempfile
+import os
class WordTranslator:
@@ -39,11 +43,66 @@ class WordTranslator:
for section in document.sections:
self._translate_section(section, target_language)
+ # Translate images if enabled
+ if getattr(self.translation_service, 'translate_images', False):
+ self._translate_images(document, target_language, input_path)
+
# Save the translated document
document.save(output_path)
return output_path
+ def _translate_images(self, document: Document, target_language: str, input_path: Path):
+ """
+ Extract text from images and add translations as captions
+ """
+ from services.translation_service import OllamaTranslationProvider
+
+ # Only works with Ollama vision
+ if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
+ return
+
+ try:
+ import zipfile
+ import base64
+
+ # Extract images from docx (it's a zip file)
+ with zipfile.ZipFile(input_path, 'r') as zip_ref:
+ image_files = [f for f in zip_ref.namelist() if f.startswith('word/media/')]
+
+ for idx, image_file in enumerate(image_files):
+ try:
+ # Extract image
+ image_data = zip_ref.read(image_file)
+
+ # Create temp file
+ ext = os.path.splitext(image_file)[1]
+ with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
+ tmp.write(image_data)
+ tmp_path = tmp.name
+
+ # Translate image with vision
+ translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
+
+ # Clean up temp file
+ os.unlink(tmp_path)
+
+ if translated_text and translated_text.strip():
+ # Add translated text as a new paragraph after image
+ # We'll add it at the end with a note
+ p = document.add_paragraph()
+ p.add_run(f"[Image {idx + 1} translation: ").bold = True
+ p.add_run(translated_text)
+ p.add_run("]").bold = True
+
+ print(f"Translated image {idx + 1}: {translated_text[:50]}...")
+ except Exception as e:
+ print(f"Error translating image {image_file}: {e}")
+ continue
+
+ except Exception as e:
+ print(f"Error processing images: {e}")
+
def _translate_document_body(self, document: Document, target_language: str):
"""
Translate all elements in the document body