216 lines
7.6 KiB
Python
216 lines
7.6 KiB
Python
"""
|
|
PowerPoint Translation Module
|
|
Translates PowerPoint files while preserving all layouts, animations, and media
|
|
"""
|
|
from pathlib import Path
|
|
from pptx import Presentation
|
|
from pptx.shapes.base import BaseShape
|
|
from pptx.shapes.group import GroupShape
|
|
from pptx.util import Inches, Pt
|
|
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
|
from services.translation_service import translation_service
|
|
import tempfile
|
|
import os
|
|
|
|
|
|
class PowerPointTranslator:
|
|
"""Handles translation of PowerPoint presentations with strict formatting preservation"""
|
|
|
|
def __init__(self):
|
|
self.translation_service = translation_service
|
|
|
|
def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path:
|
|
"""
|
|
Translate a PowerPoint presentation while preserving all formatting and structure
|
|
|
|
Args:
|
|
input_path: Path to input PowerPoint file
|
|
output_path: Path to save translated PowerPoint file
|
|
target_language: Target language code
|
|
|
|
Returns:
|
|
Path to the translated file
|
|
"""
|
|
presentation = Presentation(input_path)
|
|
|
|
# Translate each slide
|
|
for slide_idx, slide in enumerate(presentation.slides):
|
|
self._translate_slide(slide, target_language, slide_idx + 1, input_path)
|
|
|
|
# Save the translated presentation
|
|
presentation.save(output_path)
|
|
|
|
return output_path
|
|
|
|
def _translate_slide(self, slide, target_language: str, slide_num: int, input_path: Path):
|
|
"""
|
|
Translate all text elements in a slide while preserving layout
|
|
|
|
Args:
|
|
slide: Slide to translate
|
|
target_language: Target language code
|
|
slide_num: Slide number for reference
|
|
input_path: Path to source file for image extraction
|
|
"""
|
|
# Translate notes (speaker notes)
|
|
if slide.has_notes_slide:
|
|
notes_slide = slide.notes_slide
|
|
if notes_slide.notes_text_frame:
|
|
self._translate_text_frame(notes_slide.notes_text_frame, target_language)
|
|
|
|
# Translate shapes in the slide
|
|
for shape in slide.shapes:
|
|
self._translate_shape(shape, target_language, slide)
|
|
|
|
def _translate_shape(self, shape: BaseShape, target_language: str, slide=None):
|
|
"""
|
|
Translate text in a shape based on its type
|
|
|
|
Args:
|
|
shape: Shape to translate
|
|
target_language: Target language code
|
|
slide: Parent slide for adding image translations
|
|
"""
|
|
# Handle text-containing shapes
|
|
if shape.has_text_frame:
|
|
self._translate_text_frame(shape.text_frame, target_language)
|
|
|
|
# Handle tables
|
|
if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
|
|
self._translate_table(shape.table, target_language)
|
|
|
|
# Handle pictures/images
|
|
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
|
|
self._translate_image_shape(shape, target_language, slide)
|
|
|
|
# Handle group shapes (shapes within shapes)
|
|
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
|
|
for sub_shape in shape.shapes:
|
|
self._translate_shape(sub_shape, target_language, slide)
|
|
|
|
# Handle smart art (contains multiple shapes)
|
|
# Smart art is complex, but we can try to translate text within it
|
|
if hasattr(shape, 'shapes'):
|
|
try:
|
|
for sub_shape in shape.shapes:
|
|
self._translate_shape(sub_shape, target_language, slide)
|
|
except:
|
|
pass # Some shapes may not support iteration
|
|
|
|
def _translate_image_shape(self, shape, target_language: str, slide):
|
|
"""
|
|
Translate text in an image using vision model and add as text box
|
|
"""
|
|
if not getattr(self.translation_service, 'translate_images', False):
|
|
return
|
|
|
|
from services.translation_service import OllamaTranslationProvider
|
|
|
|
if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
|
|
return
|
|
|
|
try:
|
|
# Get image blob
|
|
image_blob = shape.image.blob
|
|
ext = shape.image.ext
|
|
|
|
# Save to temp file
|
|
with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
|
|
tmp.write(image_blob)
|
|
tmp_path = tmp.name
|
|
|
|
# Translate with vision
|
|
translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
|
|
|
|
# Clean up
|
|
os.unlink(tmp_path)
|
|
|
|
if translated_text and translated_text.strip():
|
|
# Add text box below the image with translation
|
|
left = shape.left
|
|
top = shape.top + shape.height + Inches(0.1)
|
|
width = shape.width
|
|
height = Inches(0.5)
|
|
|
|
# Add text box
|
|
textbox = slide.shapes.add_textbox(left, top, width, height)
|
|
tf = textbox.text_frame
|
|
p = tf.paragraphs[0]
|
|
p.text = f"[{translated_text}]"
|
|
p.font.size = Pt(10)
|
|
p.font.italic = True
|
|
|
|
print(f"Added image translation: {translated_text[:50]}...")
|
|
|
|
except Exception as e:
|
|
print(f"Error translating image: {e}")
|
|
|
|
def _translate_text_frame(self, text_frame, target_language: str):
|
|
"""
|
|
Translate text within a text frame while preserving formatting
|
|
|
|
Args:
|
|
text_frame: Text frame to translate
|
|
target_language: Target language code
|
|
"""
|
|
if not text_frame.text.strip():
|
|
return
|
|
|
|
# Translate each paragraph in the text frame
|
|
for paragraph in text_frame.paragraphs:
|
|
self._translate_paragraph(paragraph, target_language)
|
|
|
|
def _translate_paragraph(self, paragraph, target_language: str):
|
|
"""
|
|
Translate a paragraph while preserving run-level formatting
|
|
|
|
Args:
|
|
paragraph: Paragraph to translate
|
|
target_language: Target language code
|
|
"""
|
|
if not paragraph.text.strip():
|
|
return
|
|
|
|
# Translate each run in the paragraph to preserve individual formatting
|
|
for run in paragraph.runs:
|
|
if run.text.strip():
|
|
translated_text = self.translation_service.translate_text(
|
|
run.text, target_language
|
|
)
|
|
run.text = translated_text
|
|
|
|
def _translate_table(self, table, target_language: str):
|
|
"""
|
|
Translate all cells in a table while preserving structure
|
|
|
|
Args:
|
|
table: Table to translate
|
|
target_language: Target language code
|
|
"""
|
|
for row in table.rows:
|
|
for cell in row.cells:
|
|
self._translate_text_frame(cell.text_frame, target_language)
|
|
|
|
def _is_translatable(self, text: str) -> bool:
|
|
"""
|
|
Determine if text should be translated
|
|
|
|
Args:
|
|
text: Text to check
|
|
|
|
Returns:
|
|
True if text should be translated, False otherwise
|
|
"""
|
|
if not text or not isinstance(text, str):
|
|
return False
|
|
|
|
# Don't translate if it's only numbers, special characters, or very short
|
|
if len(text.strip()) < 2:
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
# Global translator instance
|
|
pptx_translator = PowerPointTranslator()
|