Add system prompt, glossary, presets for Ollama/WebLLM, image translation support
This commit is contained in:
@@ -3,6 +3,8 @@ Excel Translation Module
|
||||
Translates Excel files while preserving all formatting, formulas, images, and layout
|
||||
"""
|
||||
import re
|
||||
import tempfile
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, Set
|
||||
from openpyxl import load_workbook
|
||||
@@ -40,6 +42,10 @@ class ExcelTranslator:
|
||||
worksheet = workbook[sheet_name]
|
||||
self._translate_worksheet(worksheet, target_language)
|
||||
|
||||
# Translate images if enabled
|
||||
if getattr(self.translation_service, 'translate_images', False):
|
||||
self._translate_images(worksheet, target_language)
|
||||
|
||||
# Prepare translated sheet name (but don't rename yet)
|
||||
translated_sheet_name = self.translation_service.translate_text(
|
||||
sheet_name, target_language
|
||||
@@ -155,6 +161,54 @@ class ExcelTranslator:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _translate_images(self, worksheet: Worksheet, target_language: str):
|
||||
"""
|
||||
Translate text in images using vision model and add as comments
|
||||
"""
|
||||
from services.translation_service import OllamaTranslationProvider
|
||||
|
||||
if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
|
||||
return
|
||||
|
||||
try:
|
||||
# Get images from worksheet
|
||||
images = getattr(worksheet, '_images', [])
|
||||
|
||||
for idx, image in enumerate(images):
|
||||
try:
|
||||
# Get image data
|
||||
image_data = image._data()
|
||||
ext = image.format or 'png'
|
||||
|
||||
# Save to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
|
||||
tmp.write(image_data)
|
||||
tmp_path = tmp.name
|
||||
|
||||
# Translate with vision
|
||||
translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
|
||||
|
||||
# Clean up
|
||||
os.unlink(tmp_path)
|
||||
|
||||
if translated_text and translated_text.strip():
|
||||
# Add translation as a cell near the image
|
||||
anchor = image.anchor
|
||||
if hasattr(anchor, '_from'):
|
||||
cell_ref = f"{get_column_letter(anchor._from.col + 1)}{anchor._from.row + 1}"
|
||||
cell = worksheet[cell_ref]
|
||||
# Add as comment
|
||||
from openpyxl.comments import Comment
|
||||
cell.comment = Comment(f"Image translation: {translated_text}", "Translator")
|
||||
print(f"Added Excel image translation at {cell_ref}: {translated_text[:50]}...")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error translating Excel image {idx}: {e}")
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing Excel images: {e}")
|
||||
|
||||
|
||||
# Global translator instance
|
||||
|
||||
@@ -9,6 +9,8 @@ from pptx.shapes.group import GroupShape
|
||||
from pptx.util import Inches, Pt
|
||||
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
||||
from services.translation_service import translation_service
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
|
||||
class PowerPointTranslator:
|
||||
@@ -32,21 +34,23 @@ class PowerPointTranslator:
|
||||
presentation = Presentation(input_path)
|
||||
|
||||
# Translate each slide
|
||||
for slide in presentation.slides:
|
||||
self._translate_slide(slide, target_language)
|
||||
for slide_idx, slide in enumerate(presentation.slides):
|
||||
self._translate_slide(slide, target_language, slide_idx + 1, input_path)
|
||||
|
||||
# Save the translated presentation
|
||||
presentation.save(output_path)
|
||||
|
||||
return output_path
|
||||
|
||||
def _translate_slide(self, slide, target_language: str):
|
||||
def _translate_slide(self, slide, target_language: str, slide_num: int, input_path: Path):
|
||||
"""
|
||||
Translate all text elements in a slide while preserving layout
|
||||
|
||||
Args:
|
||||
slide: Slide to translate
|
||||
target_language: Target language code
|
||||
slide_num: Slide number for reference
|
||||
input_path: Path to source file for image extraction
|
||||
"""
|
||||
# Translate notes (speaker notes)
|
||||
if slide.has_notes_slide:
|
||||
@@ -56,15 +60,16 @@ class PowerPointTranslator:
|
||||
|
||||
# Translate shapes in the slide
|
||||
for shape in slide.shapes:
|
||||
self._translate_shape(shape, target_language)
|
||||
self._translate_shape(shape, target_language, slide)
|
||||
|
||||
def _translate_shape(self, shape: BaseShape, target_language: str):
|
||||
def _translate_shape(self, shape: BaseShape, target_language: str, slide=None):
|
||||
"""
|
||||
Translate text in a shape based on its type
|
||||
|
||||
Args:
|
||||
shape: Shape to translate
|
||||
target_language: Target language code
|
||||
slide: Parent slide for adding image translations
|
||||
"""
|
||||
# Handle text-containing shapes
|
||||
if shape.has_text_frame:
|
||||
@@ -74,20 +79,72 @@ class PowerPointTranslator:
|
||||
if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
|
||||
self._translate_table(shape.table, target_language)
|
||||
|
||||
# Handle pictures/images
|
||||
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
|
||||
self._translate_image_shape(shape, target_language, slide)
|
||||
|
||||
# Handle group shapes (shapes within shapes)
|
||||
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
|
||||
for sub_shape in shape.shapes:
|
||||
self._translate_shape(sub_shape, target_language)
|
||||
self._translate_shape(sub_shape, target_language, slide)
|
||||
|
||||
# Handle smart art (contains multiple shapes)
|
||||
# Smart art is complex, but we can try to translate text within it
|
||||
if hasattr(shape, 'shapes'):
|
||||
try:
|
||||
for sub_shape in shape.shapes:
|
||||
self._translate_shape(sub_shape, target_language)
|
||||
self._translate_shape(sub_shape, target_language, slide)
|
||||
except:
|
||||
pass # Some shapes may not support iteration
|
||||
|
||||
def _translate_image_shape(self, shape, target_language: str, slide):
|
||||
"""
|
||||
Translate text in an image using vision model and add as text box
|
||||
"""
|
||||
if not getattr(self.translation_service, 'translate_images', False):
|
||||
return
|
||||
|
||||
from services.translation_service import OllamaTranslationProvider
|
||||
|
||||
if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
|
||||
return
|
||||
|
||||
try:
|
||||
# Get image blob
|
||||
image_blob = shape.image.blob
|
||||
ext = shape.image.ext
|
||||
|
||||
# Save to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
|
||||
tmp.write(image_blob)
|
||||
tmp_path = tmp.name
|
||||
|
||||
# Translate with vision
|
||||
translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
|
||||
|
||||
# Clean up
|
||||
os.unlink(tmp_path)
|
||||
|
||||
if translated_text and translated_text.strip():
|
||||
# Add text box below the image with translation
|
||||
left = shape.left
|
||||
top = shape.top + shape.height + Inches(0.1)
|
||||
width = shape.width
|
||||
height = Inches(0.5)
|
||||
|
||||
# Add text box
|
||||
textbox = slide.shapes.add_textbox(left, top, width, height)
|
||||
tf = textbox.text_frame
|
||||
p = tf.paragraphs[0]
|
||||
p.text = f"[{translated_text}]"
|
||||
p.font.size = Pt(10)
|
||||
p.font.italic = True
|
||||
|
||||
print(f"Added image translation: {translated_text[:50]}...")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error translating image: {e}")
|
||||
|
||||
def _translate_text_frame(self, text_frame, target_language: str):
|
||||
"""
|
||||
Translate text within a text frame while preserving formatting
|
||||
|
||||
@@ -9,7 +9,11 @@ from docx.table import Table, _Cell
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
from docx.oxml.table import CT_Tbl
|
||||
from docx.section import Section
|
||||
from docx.shared import Inches, Pt
|
||||
from docx.oxml.ns import qn
|
||||
from services.translation_service import translation_service
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
|
||||
class WordTranslator:
|
||||
@@ -39,11 +43,66 @@ class WordTranslator:
|
||||
for section in document.sections:
|
||||
self._translate_section(section, target_language)
|
||||
|
||||
# Translate images if enabled
|
||||
if getattr(self.translation_service, 'translate_images', False):
|
||||
self._translate_images(document, target_language, input_path)
|
||||
|
||||
# Save the translated document
|
||||
document.save(output_path)
|
||||
|
||||
return output_path
|
||||
|
||||
def _translate_images(self, document: Document, target_language: str, input_path: Path):
|
||||
"""
|
||||
Extract text from images and add translations as captions
|
||||
"""
|
||||
from services.translation_service import OllamaTranslationProvider
|
||||
|
||||
# Only works with Ollama vision
|
||||
if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
|
||||
return
|
||||
|
||||
try:
|
||||
import zipfile
|
||||
import base64
|
||||
|
||||
# Extract images from docx (it's a zip file)
|
||||
with zipfile.ZipFile(input_path, 'r') as zip_ref:
|
||||
image_files = [f for f in zip_ref.namelist() if f.startswith('word/media/')]
|
||||
|
||||
for idx, image_file in enumerate(image_files):
|
||||
try:
|
||||
# Extract image
|
||||
image_data = zip_ref.read(image_file)
|
||||
|
||||
# Create temp file
|
||||
ext = os.path.splitext(image_file)[1]
|
||||
with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as tmp:
|
||||
tmp.write(image_data)
|
||||
tmp_path = tmp.name
|
||||
|
||||
# Translate image with vision
|
||||
translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
|
||||
|
||||
# Clean up temp file
|
||||
os.unlink(tmp_path)
|
||||
|
||||
if translated_text and translated_text.strip():
|
||||
# Add translated text as a new paragraph after image
|
||||
# We'll add it at the end with a note
|
||||
p = document.add_paragraph()
|
||||
p.add_run(f"[Image {idx + 1} translation: ").bold = True
|
||||
p.add_run(translated_text)
|
||||
p.add_run("]").bold = True
|
||||
|
||||
print(f"Translated image {idx + 1}: {translated_text[:50]}...")
|
||||
except Exception as e:
|
||||
print(f"Error translating image {image_file}: {e}")
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing images: {e}")
|
||||
|
||||
def _translate_document_body(self, document: Document, target_language: str):
|
||||
"""
|
||||
Translate all elements in the document body
|
||||
|
||||
Reference in New Issue
Block a user