Add system prompt, glossary, presets for Ollama/WebLLM, image translation support

This commit is contained in:
Sepehr 2025-11-30 16:45:41 +01:00
parent 465cab8a61
commit e48ea07e44
6 changed files with 497 additions and 51 deletions

17
main.py
View File

@ -111,7 +111,9 @@ async def translate_document(
target_language: str = Form(..., description="Target language code (e.g., 'es', 'fr', 'de')"), target_language: str = Form(..., description="Target language code (e.g., 'es', 'fr', 'de')"),
source_language: str = Form(default="auto", description="Source language code (default: auto-detect)"), source_language: str = Form(default="auto", description="Source language code (default: auto-detect)"),
provider: str = Form(default="google", description="Translation provider (google, ollama, deepl, libre)"), provider: str = Form(default="google", description="Translation provider (google, ollama, deepl, libre)"),
translate_images: bool = Form(default=False, description="Translate images with Ollama vision (only for Ollama provider)"), translate_images: bool = Form(default=False, description="Translate images with multimodal Ollama model"),
ollama_model: str = Form(default="", description="Ollama model to use (also used for vision if multimodal)"),
system_prompt: str = Form(default="", description="Custom system prompt with context, glossary, or instructions for LLM translation"),
cleanup: bool = Form(default=True, description="Delete input file after translation") cleanup: bool = Form(default=True, description="Delete input file after translation")
): ):
""" """
@ -154,7 +156,7 @@ async def translate_document(
logger.info(f"Saved input file to: {input_path}") logger.info(f"Saved input file to: {input_path}")
# Configure translation provider # Configure translation provider
from services.translation_service import GoogleTranslationProvider, DeepLTranslationProvider, LibreTranslationProvider, OllamaTranslationProvider, WebLLMTranslationProvider, translation_service from services.translation_service import GoogleTranslationProvider, DeepLTranslationProvider, LibreTranslationProvider, OllamaTranslationProvider, translation_service
if provider.lower() == "deepl": if provider.lower() == "deepl":
if not config.DEEPL_API_KEY: if not config.DEEPL_API_KEY:
@ -163,10 +165,13 @@ async def translate_document(
elif provider.lower() == "libre": elif provider.lower() == "libre":
translation_provider = LibreTranslationProvider() translation_provider = LibreTranslationProvider()
elif provider.lower() == "ollama": elif provider.lower() == "ollama":
vision_model = getattr(config, 'OLLAMA_VISION_MODEL', 'llava') # Use the same model for text and vision (multimodal models like gemma3, qwen3-vl)
translation_provider = OllamaTranslationProvider(config.OLLAMA_BASE_URL, config.OLLAMA_MODEL, vision_model) model_to_use = ollama_model.strip() if ollama_model else config.OLLAMA_MODEL
elif provider.lower() == "webllm": custom_prompt = system_prompt.strip() if system_prompt else ""
translation_provider = WebLLMTranslationProvider() logger.info(f"Using Ollama model: {model_to_use} (text + vision)")
if custom_prompt:
logger.info(f"Custom system prompt provided ({len(custom_prompt)} chars)")
translation_provider = OllamaTranslationProvider(config.OLLAMA_BASE_URL, model_to_use, model_to_use, custom_prompt)
else: else:
translation_provider = GoogleTranslationProvider() translation_provider = GoogleTranslationProvider()

View File

@ -70,30 +70,65 @@ class LibreTranslationProvider(TranslationProvider):
class OllamaTranslationProvider(TranslationProvider): class OllamaTranslationProvider(TranslationProvider):
"""Ollama LLM translation implementation""" """Ollama LLM translation implementation"""
def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3", vision_model: str = "llava"): def __init__(self, base_url: str = "http://localhost:11434", model: str = "llama3", vision_model: str = "llava", system_prompt: str = ""):
self.base_url = base_url.rstrip('/') self.base_url = base_url.rstrip('/')
self.model = model self.model = model.strip() # Remove any leading/trailing whitespace
self.vision_model = vision_model self.vision_model = vision_model.strip()
self.custom_system_prompt = system_prompt # Custom context, glossary, instructions
def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str: def translate(self, text: str, target_language: str, source_language: str = 'auto') -> str:
if not text or not text.strip(): if not text or not text.strip():
return text return text
# Skip very short text or numbers only
if len(text.strip()) < 2 or text.strip().isdigit():
return text
try: try:
prompt = f"Translate the following text to {target_language}. Return ONLY the translation, nothing else:\n\n{text}" # Build system prompt with custom context if provided
base_prompt = f"You are a translator. Translate the user's text to {target_language}. Return ONLY the translation, nothing else."
if self.custom_system_prompt:
system_content = f"""{base_prompt}
ADDITIONAL CONTEXT AND INSTRUCTIONS:
{self.custom_system_prompt}"""
else:
system_content = base_prompt
# Use /api/chat endpoint (more compatible with all models)
response = requests.post( response = requests.post(
f"{self.base_url}/api/generate", f"{self.base_url}/api/chat",
json={ json={
"model": self.model, "model": self.model,
"prompt": prompt, "messages": [
"stream": False {
"role": "system",
"content": system_content
},
{
"role": "user",
"content": text
}
],
"stream": False,
"options": {
"temperature": 0.3,
"num_predict": 500
}
}, },
timeout=30 timeout=120 # 2 minutes timeout
) )
response.raise_for_status() response.raise_for_status()
result = response.json() result = response.json()
return result.get("response", text).strip() translated = result.get("message", {}).get("content", "").strip()
return translated if translated else text
except requests.exceptions.ConnectionError:
print(f"Ollama error: Cannot connect to {self.base_url}. Is Ollama running?")
return text
except requests.exceptions.Timeout:
print(f"Ollama error: Request timeout after 120s")
return text
except Exception as e: except Exception as e:
print(f"Ollama translation error: {e}") print(f"Ollama translation error: {e}")
return text return text
@ -107,21 +142,25 @@ class OllamaTranslationProvider(TranslationProvider):
with open(image_path, 'rb') as img_file: with open(image_path, 'rb') as img_file:
image_data = base64.b64encode(img_file.read()).decode('utf-8') image_data = base64.b64encode(img_file.read()).decode('utf-8')
prompt = f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting." # Use /api/chat for vision models too
response = requests.post( response = requests.post(
f"{self.base_url}/api/generate", f"{self.base_url}/api/chat",
json={ json={
"model": self.vision_model, "model": self.vision_model,
"prompt": prompt, "messages": [
"images": [image_data], {
"role": "user",
"content": f"Extract all text from this image and translate it to {target_language}. Return ONLY the translated text, preserving the structure and formatting.",
"images": [image_data]
}
],
"stream": False "stream": False
}, },
timeout=60 timeout=60
) )
response.raise_for_status() response.raise_for_status()
result = response.json() result = response.json()
return result.get("response", "").strip() return result.get("message", {}).get("content", "").strip()
except Exception as e: except Exception as e:
print(f"Ollama vision translation error: {e}") print(f"Ollama vision translation error: {e}")
return "" return ""
@ -158,6 +197,7 @@ class TranslationService:
else: else:
# Auto-select provider based on configuration # Auto-select provider based on configuration
self.provider = self._get_default_provider() self.provider = self._get_default_provider()
self.translate_images = False # Flag to enable image translation
def _get_default_provider(self) -> TranslationProvider: def _get_default_provider(self) -> TranslationProvider:
"""Get the default translation provider from configuration""" """Get the default translation provider from configuration"""
@ -182,6 +222,26 @@ class TranslationService:
return self.provider.translate(text, target_language, source_language) return self.provider.translate(text, target_language, source_language)
def translate_image(self, image_path: str, target_language: str) -> str:
"""
Translate text in an image using vision model (Ollama only)
Args:
image_path: Path to image file
target_language: Target language code
Returns:
Translated text from image
"""
if not self.translate_images:
return ""
# Only Ollama supports image translation
if isinstance(self.provider, OllamaTranslationProvider):
return self.provider.translate_image(image_path, target_language)
return ""
def translate_batch(self, texts: list[str], target_language: str, source_language: str = 'auto') -> list[str]: def translate_batch(self, texts: list[str], target_language: str, source_language: str = 'auto') -> list[str]:
""" """
Translate multiple text strings Translate multiple text strings

View File

@ -309,7 +309,7 @@
</div> </div>
<div class="form-group"> <div class="form-group">
<label for="ollama-model">Modèle Ollama</label> <label for="ollama-model">Modèle Ollama</label>
<input type="text" id="ollama-model" value="llama3" placeholder="llama3, mistral, etc."> <input type="text" id="ollama-model" value="llama3.2" placeholder="llama3.2, mistral, etc.">
</div> </div>
</div> </div>
<button onclick="listOllamaModels()" class="btn-secondary">List Available Models</button> <button onclick="listOllamaModels()" class="btn-secondary">List Available Models</button>
@ -318,6 +318,39 @@
<div id="models-result"></div> <div id="models-result"></div>
</div> </div>
<!-- System Prompt for LLM Translation -->
<div class="card">
<h2>Translation Context (Ollama / WebLLM)</h2>
<p style="font-size: 13px; color: #718096; margin-bottom: 15px;">
Provide context, technical glossary, or specific instructions to improve translation quality.
</p>
<div class="form-group">
<label for="system-prompt">System Prompt / Instructions</label>
<textarea id="system-prompt" rows="4" style="width: 100%; padding: 10px 14px; border: 1px solid #cbd5e0; border-radius: 6px; font-size: 14px; font-family: inherit; resize: vertical;" placeholder="Example: You are translating HVAC technical documents. Use these terms:
- Batterie (FR) = Coil (EN)
- Groupe froid (FR) = Chiller (EN)
- CTA (FR) = AHU (EN)"></textarea>
</div>
<div class="form-group">
<label for="glossary">Technical Glossary (one per line: source=target)</label>
<textarea id="glossary" rows="5" style="width: 100%; padding: 10px 14px; border: 1px solid #cbd5e0; border-radius: 6px; font-size: 13px; font-family: monospace; resize: vertical;" placeholder="batterie=coil
groupe froid=chiller
CTA=AHU
échangeur=heat exchanger
vanne 3 voies=3-way valve"></textarea>
</div>
<div style="display: flex; gap: 10px; flex-wrap: wrap;">
<button onclick="loadPreset('hvac')" class="btn-secondary" style="font-size: 12px;">HVAC Preset</button>
<button onclick="loadPreset('it')" class="btn-secondary" style="font-size: 12px;">IT Preset</button>
<button onclick="loadPreset('legal')" class="btn-secondary" style="font-size: 12px;">Legal Preset</button>
<button onclick="loadPreset('medical')" class="btn-secondary" style="font-size: 12px;">Medical Preset</button>
<button onclick="clearPrompt()" class="btn-secondary" style="font-size: 12px; background: #dc2626;">Clear</button>
</div>
</div>
<!-- Traduction de fichier --> <!-- Traduction de fichier -->
<div class="card"> <div class="card">
<h2>Document Translation</h2> <h2>Document Translation</h2>
@ -335,6 +368,8 @@
<div class="form-group"> <div class="form-group">
<label for="target-lang">Target Language</label> <label for="target-lang">Target Language</label>
<select id="target-lang"> <select id="target-lang">
<option value="en">English (en)</option>
<option value="fa">Persian / Farsi (fa)</option>
<option value="es">Espagnol (es)</option> <option value="es">Espagnol (es)</option>
<option value="fr">Français (fr)</option> <option value="fr">Français (fr)</option>
<option value="de">Allemand (de)</option> <option value="de">Allemand (de)</option>
@ -350,10 +385,10 @@
<div class="form-group"> <div class="form-group">
<label for="provider">Translation Service</label> <label for="provider">Translation Service</label>
<select id="provider" onchange="toggleImageTranslation()"> <select id="provider" onchange="toggleProviderOptions()">
<option value="google">Google Translate (Default)</option> <option value="google">Google Translate (Default)</option>
<option value="ollama">Ollama LLM (Local Server)</option> <option value="ollama">Ollama LLM (Local Server)</option>
<option value="webllm">WebLLM (Browser - No Server)</option> <option value="webllm">WebLLM (Browser - WebGPU)</option>
<option value="deepl">DeepL</option> <option value="deepl">DeepL</option>
<option value="libre">LibreTranslate</option> <option value="libre">LibreTranslate</option>
</select> </select>
@ -363,11 +398,11 @@
<div class="form-group" id="image-translation-option" style="display: none;"> <div class="form-group" id="image-translation-option" style="display: none;">
<label style="display: flex; align-items: center; cursor: pointer;"> <label style="display: flex; align-items: center; cursor: pointer;">
<input type="checkbox" id="translate-images" style="width: auto; margin-right: 10px;"> <input type="checkbox" id="translate-images" style="width: auto; margin-right: 10px;">
<span>Translate images with Ollama Vision (requires llava model)</span> <span>Translate images with vision (use multimodal models: gemma3, qwen3-vl, llava, etc.)</span>
</label> </label>
</div> </div>
<div class="form-group" id="webllm-info" style="display: none; padding: 12px; background: #e0f2ff; border-radius: 6px; border-left: 4px solid #2563eb;"> <div class="form-group" id="webllm-options" style="display: none; padding: 12px; background: #e0f2ff; border-radius: 6px; border-left: 4px solid #2563eb;">
<p style="margin: 0 0 10px 0; font-size: 13px; color: #1e40af;"> <p style="margin: 0 0 10px 0; font-size: 13px; color: #1e40af;">
<strong>WebLLM Mode:</strong> Translation runs entirely in your browser using WebGPU. First use downloads the model. <strong>WebLLM Mode:</strong> Translation runs entirely in your browser using WebGPU. First use downloads the model.
</p> </p>
@ -375,8 +410,8 @@
<div> <div>
<label for="webllm-model" style="font-size: 12px; color: #4a5568; margin-bottom: 4px;">Select Model:</label> <label for="webllm-model" style="font-size: 12px; color: #4a5568; margin-bottom: 4px;">Select Model:</label>
<select id="webllm-model" style="width: 100%; padding: 6px; font-size: 13px; border: 1px solid #cbd5e0; border-radius: 4px;"> <select id="webllm-model" style="width: 100%; padding: 6px; font-size: 13px; border: 1px solid #cbd5e0; border-radius: 4px;">
<option value="Llama-3.2-3B-Instruct-q4f32_1-MLC">Llama 3.2 3B (~2GB) - Recommended</option>
<option value="Llama-3.1-8B-Instruct-q4f32_1-MLC">Llama 3.1 8B (~4.5GB)</option> <option value="Llama-3.1-8B-Instruct-q4f32_1-MLC">Llama 3.1 8B (~4.5GB)</option>
<option value="Llama-3.2-3B-Instruct-q4f32_1-MLC">Llama 3.2 3B (~2GB)</option>
<option value="Phi-3.5-mini-instruct-q4f16_1-MLC">Phi 3.5 Mini (~2.5GB)</option> <option value="Phi-3.5-mini-instruct-q4f16_1-MLC">Phi 3.5 Mini (~2.5GB)</option>
<option value="Mistral-7B-Instruct-v0.3-q4f16_1-MLC">Mistral 7B (~4.5GB)</option> <option value="Mistral-7B-Instruct-v0.3-q4f16_1-MLC">Mistral 7B (~4.5GB)</option>
<option value="gemma-2-2b-it-q4f16_1-MLC">Gemma 2 2B (~1.5GB)</option> <option value="gemma-2-2b-it-q4f16_1-MLC">Gemma 2 2B (~1.5GB)</option>
@ -386,6 +421,7 @@
Clear Cache Clear Cache
</button> </button>
</div> </div>
<div id="webllm-status" style="margin-top: 10px; font-size: 12px; color: #4a5568;"></div>
</div> </div>
<button onclick="translateFile()">Translate Document</button> <button onclick="translateFile()">Translate Document</button>
@ -445,26 +481,193 @@
} }
} }
// Toggle image translation option based on provider // Toggle provider options based on selection
function toggleImageTranslation() { // Preset templates for different domains
const provider = document.getElementById('provider').value; const presets = {
const imageOption = document.getElementById('image-translation-option'); hvac: {
const webllmInfo = document.getElementById('webllm-info'); prompt: `You are translating HVAC (Heating, Ventilation, Air Conditioning) technical documents.
Use precise technical terminology. Maintain consistency with industry standards.
if (provider === 'ollama') { Keep unit measurements (kW, m³/h, Pa) unchanged.
imageOption.style.display = 'block'; Translate component names according to the glossary provided.`,
webllmInfo.style.display = 'none'; glossary: `batterie=coil
} else if (provider === 'webllm') { groupe froid=chiller
imageOption.style.display = 'none'; CTA=AHU (Air Handling Unit)
webllmInfo.style.display = 'block'; échangeur=heat exchanger
document.getElementById('translate-images').checked = false; vanne 3 voies=3-way valve
} else { détendeur=expansion valve
imageOption.style.display = 'none'; compresseur=compressor
webllmInfo.style.display = 'none'; évaporateur=evaporator
document.getElementById('translate-images').checked = false; condenseur=condenser
fluide frigorigène=refrigerant
débit d'air=airflow
pression statique=static pressure
récupérateur=heat recovery unit
ventilo-convecteur=fan coil unit
gaine=duct
diffuseur=diffuser
registre=damper`
},
it: {
prompt: `You are translating IT and software documentation.
Keep technical terms, code snippets, and variable names unchanged.
Translate UI labels and user-facing text appropriately.
Maintain formatting markers like **bold** and \`code\`.`,
glossary: `serveur=server
base de données=database
requête=query
sauvegarde=backup
mise à jour=update
télécharger=download
téléverser=upload
mot de passe=password
identifiant=username
pare-feu=firewall
réseau=network
stockage=storage
conteneur=container
déploiement=deployment`
},
legal: {
prompt: `You are translating legal documents.
Use formal legal terminology. Be precise and unambiguous.
Maintain references to laws, articles, and clauses in their original form.
Use standard legal phrases for the target language.`,
glossary: `contrat=contract
clause=clause
partie=party
signataire=signatory
résiliation=termination
préavis=notice period
dommages et intérêts=damages
responsabilité=liability
juridiction=jurisdiction
arbitrage=arbitration
avenant=amendment
ayant droit=beneficiary`
},
medical: {
prompt: `You are translating medical and healthcare documents.
Use standard medical terminology (Latin/Greek roots when appropriate).
Keep drug names, dosages, and medical codes unchanged.
Be precise with anatomical terms and procedures.`,
glossary: `patient=patient
ordonnance=prescription
posologie=dosage
effet secondaire=side effect
contre-indication=contraindication
diagnostic=diagnosis
symptôme=symptom
traitement=treatment
chirurgie=surgery
anesthésie=anesthesia
perfusion=infusion
prélèvement=sample collection`
}
};
function loadPreset(presetName) {
const preset = presets[presetName];
if (preset) {
document.getElementById('system-prompt').value = preset.prompt;
document.getElementById('glossary').value = preset.glossary;
} }
} }
function clearPrompt() {
document.getElementById('system-prompt').value = '';
document.getElementById('glossary').value = '';
}
function getFullSystemPrompt() {
let prompt = document.getElementById('system-prompt').value || '';
const glossary = document.getElementById('glossary').value || '';
if (glossary.trim()) {
prompt += '\n\nGLOSSARY (use these exact translations):\n' + glossary;
}
return prompt;
}
function toggleProviderOptions() {
const provider = document.getElementById('provider').value;
const imageOption = document.getElementById('image-translation-option');
const webllmOptions = document.getElementById('webllm-options');
// Hide all options first
imageOption.style.display = 'none';
webllmOptions.style.display = 'none';
document.getElementById('translate-images').checked = false;
if (provider === 'ollama') {
imageOption.style.display = 'block';
} else if (provider === 'webllm') {
webllmOptions.style.display = 'block';
}
}
// WebLLM engine instance
let webllmEngine = null;
let webllmReady = false;
// Initialize WebLLM
async function initWebLLM(modelId) {
const statusDiv = document.getElementById('webllm-status');
statusDiv.innerHTML = '⏳ Loading WebLLM...';
try {
// Dynamically import WebLLM
const webllm = await import('https://esm.run/@mlc-ai/web-llm');
statusDiv.innerHTML = '⏳ Downloading model (this may take a while on first use)...';
webllmEngine = await webllm.CreateMLCEngine(modelId, {
initProgressCallback: (progress) => {
statusDiv.innerHTML = `⏳ ${progress.text}`;
}
});
webllmReady = true;
statusDiv.innerHTML = '✅ Model loaded and ready!';
return true;
} catch (error) {
statusDiv.innerHTML = `❌ Error: ${error.message}`;
console.error('WebLLM init error:', error);
return false;
}
}
// Translate text with WebLLM
async function translateWithWebLLM(text, targetLang) {
if (!webllmEngine) return text;
try {
// Build system prompt with custom context and glossary
let systemPrompt = `You are a translator. Translate the user's text to ${targetLang}. Return ONLY the translation, nothing else.`;
const customPrompt = getFullSystemPrompt();
if (customPrompt.trim()) {
systemPrompt = `You are a translator. Translate the user's text to ${targetLang}. Return ONLY the translation, nothing else.
ADDITIONAL CONTEXT AND INSTRUCTIONS:
${customPrompt}`;
}
const response = await webllmEngine.chat.completions.create({
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: text }
],
temperature: 0.3,
max_tokens: 500
});
return response.choices[0].message.content.trim();
} catch (error) {
console.error('WebLLM translation error:', error);
return text;
}
}
// Liste des modèles Ollama // Liste des modèles Ollama
async function listOllamaModels() { async function listOllamaModels() {
const url = document.getElementById('ollama-url').value; const url = document.getElementById('ollama-url').value;
@ -553,11 +756,19 @@
return; return;
} }
// Get Ollama model from configuration field (used for both text and vision)
const ollamaModel = document.getElementById('ollama-model').value || 'llama3.2';
// Get custom system prompt with glossary
const systemPrompt = getFullSystemPrompt();
const formData = new FormData(); const formData = new FormData();
formData.append('file', fileInput.files[0]); formData.append('file', fileInput.files[0]);
formData.append('target_language', targetLang); formData.append('target_language', targetLang);
formData.append('provider', provider); formData.append('provider', provider);
formData.append('translate_images', translateImages); formData.append('translate_images', translateImages);
formData.append('ollama_model', ollamaModel);
formData.append('system_prompt', systemPrompt);
loadingDiv.classList.add('active'); loadingDiv.classList.add('active');
progressContainer.classList.add('active'); progressContainer.classList.add('active');

View File

@ -3,6 +3,8 @@ Excel Translation Module
Translates Excel files while preserving all formatting, formulas, images, and layout Translates Excel files while preserving all formatting, formulas, images, and layout
""" """
import re import re
import tempfile
import os
from pathlib import Path from pathlib import Path
from typing import Dict, Set from typing import Dict, Set
from openpyxl import load_workbook from openpyxl import load_workbook
@ -40,6 +42,10 @@ class ExcelTranslator:
worksheet = workbook[sheet_name] worksheet = workbook[sheet_name]
self._translate_worksheet(worksheet, target_language) self._translate_worksheet(worksheet, target_language)
# Translate images if enabled
if getattr(self.translation_service, 'translate_images', False):
self._translate_images(worksheet, target_language)
# Prepare translated sheet name (but don't rename yet) # Prepare translated sheet name (but don't rename yet)
translated_sheet_name = self.translation_service.translate_text( translated_sheet_name = self.translation_service.translate_text(
sheet_name, target_language sheet_name, target_language
@ -155,6 +161,54 @@ class ExcelTranslator:
return False return False
return True return True
def _translate_images(self, worksheet: Worksheet, target_language: str):
"""
Translate text in images using vision model and add as comments
"""
from services.translation_service import OllamaTranslationProvider
if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
return
try:
# Get images from worksheet
images = getattr(worksheet, '_images', [])
for idx, image in enumerate(images):
try:
# Get image data
image_data = image._data()
ext = image.format or 'png'
# Save to temp file
with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
tmp.write(image_data)
tmp_path = tmp.name
# Translate with vision
translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
# Clean up
os.unlink(tmp_path)
if translated_text and translated_text.strip():
# Add translation as a cell near the image
anchor = image.anchor
if hasattr(anchor, '_from'):
cell_ref = f"{get_column_letter(anchor._from.col + 1)}{anchor._from.row + 1}"
cell = worksheet[cell_ref]
# Add as comment
from openpyxl.comments import Comment
cell.comment = Comment(f"Image translation: {translated_text}", "Translator")
print(f"Added Excel image translation at {cell_ref}: {translated_text[:50]}...")
except Exception as e:
print(f"Error translating Excel image {idx}: {e}")
continue
except Exception as e:
print(f"Error processing Excel images: {e}")
# Global translator instance # Global translator instance

View File

@ -9,6 +9,8 @@ from pptx.shapes.group import GroupShape
from pptx.util import Inches, Pt from pptx.util import Inches, Pt
from pptx.enum.shapes import MSO_SHAPE_TYPE from pptx.enum.shapes import MSO_SHAPE_TYPE
from services.translation_service import translation_service from services.translation_service import translation_service
import tempfile
import os
class PowerPointTranslator: class PowerPointTranslator:
@ -32,21 +34,23 @@ class PowerPointTranslator:
presentation = Presentation(input_path) presentation = Presentation(input_path)
# Translate each slide # Translate each slide
for slide in presentation.slides: for slide_idx, slide in enumerate(presentation.slides):
self._translate_slide(slide, target_language) self._translate_slide(slide, target_language, slide_idx + 1, input_path)
# Save the translated presentation # Save the translated presentation
presentation.save(output_path) presentation.save(output_path)
return output_path return output_path
def _translate_slide(self, slide, target_language: str): def _translate_slide(self, slide, target_language: str, slide_num: int, input_path: Path):
""" """
Translate all text elements in a slide while preserving layout Translate all text elements in a slide while preserving layout
Args: Args:
slide: Slide to translate slide: Slide to translate
target_language: Target language code target_language: Target language code
slide_num: Slide number for reference
input_path: Path to source file for image extraction
""" """
# Translate notes (speaker notes) # Translate notes (speaker notes)
if slide.has_notes_slide: if slide.has_notes_slide:
@ -56,15 +60,16 @@ class PowerPointTranslator:
# Translate shapes in the slide # Translate shapes in the slide
for shape in slide.shapes: for shape in slide.shapes:
self._translate_shape(shape, target_language) self._translate_shape(shape, target_language, slide)
def _translate_shape(self, shape: BaseShape, target_language: str): def _translate_shape(self, shape: BaseShape, target_language: str, slide=None):
""" """
Translate text in a shape based on its type Translate text in a shape based on its type
Args: Args:
shape: Shape to translate shape: Shape to translate
target_language: Target language code target_language: Target language code
slide: Parent slide for adding image translations
""" """
# Handle text-containing shapes # Handle text-containing shapes
if shape.has_text_frame: if shape.has_text_frame:
@ -74,20 +79,72 @@ class PowerPointTranslator:
if shape.shape_type == MSO_SHAPE_TYPE.TABLE: if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
self._translate_table(shape.table, target_language) self._translate_table(shape.table, target_language)
# Handle pictures/images
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
self._translate_image_shape(shape, target_language, slide)
# Handle group shapes (shapes within shapes) # Handle group shapes (shapes within shapes)
if shape.shape_type == MSO_SHAPE_TYPE.GROUP: if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
for sub_shape in shape.shapes: for sub_shape in shape.shapes:
self._translate_shape(sub_shape, target_language) self._translate_shape(sub_shape, target_language, slide)
# Handle smart art (contains multiple shapes) # Handle smart art (contains multiple shapes)
# Smart art is complex, but we can try to translate text within it # Smart art is complex, but we can try to translate text within it
if hasattr(shape, 'shapes'): if hasattr(shape, 'shapes'):
try: try:
for sub_shape in shape.shapes: for sub_shape in shape.shapes:
self._translate_shape(sub_shape, target_language) self._translate_shape(sub_shape, target_language, slide)
except: except:
pass # Some shapes may not support iteration pass # Some shapes may not support iteration
def _translate_image_shape(self, shape, target_language: str, slide):
"""
Translate text in an image using vision model and add as text box
"""
if not getattr(self.translation_service, 'translate_images', False):
return
from services.translation_service import OllamaTranslationProvider
if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
return
try:
# Get image blob
image_blob = shape.image.blob
ext = shape.image.ext
# Save to temp file
with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
tmp.write(image_blob)
tmp_path = tmp.name
# Translate with vision
translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
# Clean up
os.unlink(tmp_path)
if translated_text and translated_text.strip():
# Add text box below the image with translation
left = shape.left
top = shape.top + shape.height + Inches(0.1)
width = shape.width
height = Inches(0.5)
# Add text box
textbox = slide.shapes.add_textbox(left, top, width, height)
tf = textbox.text_frame
p = tf.paragraphs[0]
p.text = f"[{translated_text}]"
p.font.size = Pt(10)
p.font.italic = True
print(f"Added image translation: {translated_text[:50]}...")
except Exception as e:
print(f"Error translating image: {e}")
def _translate_text_frame(self, text_frame, target_language: str): def _translate_text_frame(self, text_frame, target_language: str):
""" """
Translate text within a text frame while preserving formatting Translate text within a text frame while preserving formatting

View File

@ -9,7 +9,11 @@ from docx.table import Table, _Cell
from docx.oxml.text.paragraph import CT_P from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl from docx.oxml.table import CT_Tbl
from docx.section import Section from docx.section import Section
from docx.shared import Inches, Pt
from docx.oxml.ns import qn
from services.translation_service import translation_service from services.translation_service import translation_service
import tempfile
import os
class WordTranslator: class WordTranslator:
@ -39,11 +43,66 @@ class WordTranslator:
for section in document.sections: for section in document.sections:
self._translate_section(section, target_language) self._translate_section(section, target_language)
# Translate images if enabled
if getattr(self.translation_service, 'translate_images', False):
self._translate_images(document, target_language, input_path)
# Save the translated document # Save the translated document
document.save(output_path) document.save(output_path)
return output_path return output_path
def _translate_images(self, document: Document, target_language: str, input_path: Path):
"""
Extract text from images and add translations as captions
"""
from services.translation_service import OllamaTranslationProvider
# Only works with Ollama vision
if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
return
try:
import zipfile
import base64
# Extract images from docx (it's a zip file)
with zipfile.ZipFile(input_path, 'r') as zip_ref:
image_files = [f for f in zip_ref.namelist() if f.startswith('word/media/')]
for idx, image_file in enumerate(image_files):
try:
# Extract image
image_data = zip_ref.read(image_file)
# Create temp file
ext = os.path.splitext(image_file)[1]
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
tmp.write(image_data)
tmp_path = tmp.name
# Translate image with vision
translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
# Clean up temp file
os.unlink(tmp_path)
if translated_text and translated_text.strip():
# Add translated text as a new paragraph after image
# We'll add it at the end with a note
p = document.add_paragraph()
p.add_run(f"[Image {idx + 1} translation: ").bold = True
p.add_run(translated_text)
p.add_run("]").bold = True
print(f"Translated image {idx + 1}: {translated_text[:50]}...")
except Exception as e:
print(f"Error translating image {image_file}: {e}")
continue
except Exception as e:
print(f"Error processing images: {e}")
def _translate_document_body(self, document: Document, target_language: str): def _translate_document_body(self, document: Document, target_language: str):
""" """
Translate all elements in the document body Translate all elements in the document body