- GoogleTranslationProvider: Added batch translation with separator method - DeepLTranslationProvider: Added translator caching and batch support - LibreTranslationProvider: Added translator caching and batch support - WordTranslator: Collect all texts -> batch translate -> apply pattern - ExcelTranslator: Collect all texts -> batch translate -> apply pattern - PowerPointTranslator: Collect all texts -> batch translate -> apply pattern - Enhanced Ollama/OpenAI prompts with stricter translation-only rules - Added rule: return original text if uncertain about translation
152 lines
6.0 KiB
Python
152 lines
6.0 KiB
Python
"""
|
|
PowerPoint Translation Module
|
|
Translates PowerPoint files while preserving all layouts, animations, and media
|
|
OPTIMIZED: Uses batch translation for 5-10x faster processing
|
|
"""
|
|
from pathlib import Path
|
|
from pptx import Presentation
|
|
from pptx.shapes.base import BaseShape
|
|
from pptx.shapes.group import GroupShape
|
|
from pptx.util import Inches, Pt
|
|
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
|
from services.translation_service import translation_service
|
|
from typing import List, Tuple
|
|
import tempfile
|
|
import os
|
|
|
|
|
|
class PowerPointTranslator:
|
|
"""Handles translation of PowerPoint presentations with strict formatting preservation"""
|
|
|
|
def __init__(self):
|
|
self.translation_service = translation_service
|
|
|
|
def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path:
|
|
"""
|
|
Translate a PowerPoint presentation while preserving all formatting.
|
|
Uses batch translation for improved performance.
|
|
"""
|
|
presentation = Presentation(input_path)
|
|
|
|
# Collect all translatable text elements
|
|
text_elements = [] # List of (text, setter_function)
|
|
image_shapes = [] # Collect images for separate processing
|
|
|
|
for slide_idx, slide in enumerate(presentation.slides):
|
|
# Collect from notes
|
|
if slide.has_notes_slide and slide.notes_slide.notes_text_frame:
|
|
self._collect_from_text_frame(slide.notes_slide.notes_text_frame, text_elements)
|
|
|
|
# Collect from shapes
|
|
for shape in slide.shapes:
|
|
self._collect_from_shape(shape, text_elements, slide, image_shapes)
|
|
|
|
# Batch translate all texts at once
|
|
if text_elements:
|
|
texts = [elem[0] for elem in text_elements]
|
|
print(f"Batch translating {len(texts)} text segments...")
|
|
translated_texts = self.translation_service.translate_batch(texts, target_language)
|
|
|
|
# Apply translations
|
|
for (original_text, setter), translated in zip(text_elements, translated_texts):
|
|
if translated is not None and setter is not None:
|
|
try:
|
|
setter(translated)
|
|
except Exception as e:
|
|
print(f"Error applying translation: {e}")
|
|
|
|
# Translate images if enabled (separate process, can't batch)
|
|
if getattr(self.translation_service, 'translate_images', False):
|
|
for shape, slide in image_shapes:
|
|
self._translate_image_shape(shape, target_language, slide)
|
|
|
|
presentation.save(output_path)
|
|
|
|
return output_path
|
|
|
|
def _collect_from_shape(self, shape: BaseShape, text_elements: List[Tuple[str, callable]], slide=None, image_shapes=None):
|
|
"""Collect text from a shape and its children"""
|
|
# Handle text-containing shapes
|
|
if shape.has_text_frame:
|
|
self._collect_from_text_frame(shape.text_frame, text_elements)
|
|
|
|
# Handle tables
|
|
if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
|
|
for row in shape.table.rows:
|
|
for cell in row.cells:
|
|
self._collect_from_text_frame(cell.text_frame, text_elements)
|
|
|
|
# Handle pictures/images
|
|
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE and image_shapes is not None:
|
|
image_shapes.append((shape, slide))
|
|
|
|
# Handle group shapes
|
|
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
|
|
for sub_shape in shape.shapes:
|
|
self._collect_from_shape(sub_shape, text_elements, slide, image_shapes)
|
|
|
|
# Handle smart art
|
|
if hasattr(shape, 'shapes'):
|
|
try:
|
|
for sub_shape in shape.shapes:
|
|
self._collect_from_shape(sub_shape, text_elements, slide, image_shapes)
|
|
except:
|
|
pass
|
|
|
|
def _collect_from_text_frame(self, text_frame, text_elements: List[Tuple[str, callable]]):
|
|
"""Collect text from a text frame"""
|
|
if not text_frame.text.strip():
|
|
return
|
|
|
|
for paragraph in text_frame.paragraphs:
|
|
if not paragraph.text.strip():
|
|
continue
|
|
|
|
for run in paragraph.runs:
|
|
if run.text and run.text.strip():
|
|
def make_setter(r):
|
|
def setter(text):
|
|
r.text = text
|
|
return setter
|
|
text_elements.append((run.text, make_setter(run)))
|
|
|
|
def _translate_image_shape(self, shape, target_language: str, slide):
|
|
"""Translate text in an image using vision model"""
|
|
from services.translation_service import OllamaTranslationProvider
|
|
|
|
if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
|
|
return
|
|
|
|
try:
|
|
image_blob = shape.image.blob
|
|
ext = shape.image.ext
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
|
|
tmp.write(image_blob)
|
|
tmp_path = tmp.name
|
|
|
|
translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
|
|
os.unlink(tmp_path)
|
|
|
|
if translated_text and translated_text.strip():
|
|
left = shape.left
|
|
top = shape.top + shape.height + Inches(0.1)
|
|
width = shape.width
|
|
height = Inches(0.5)
|
|
|
|
textbox = slide.shapes.add_textbox(left, top, width, height)
|
|
tf = textbox.text_frame
|
|
p = tf.paragraphs[0]
|
|
p.text = f"[{translated_text}]"
|
|
p.font.size = Pt(10)
|
|
p.font.italic = True
|
|
|
|
print(f"Added image translation: {translated_text[:50]}...")
|
|
|
|
except Exception as e:
|
|
print(f"Error translating image: {e}")
|
|
|
|
|
|
# Global translator instance
|
|
pptx_translator = PowerPointTranslator()
|