office_translator/translators/pptx_translator.py

"""
PowerPoint Translation Module
Translates PowerPoint files while preserving all layouts, animations, and media
OPTIMIZED: Uses batch translation for 5-10x faster processing
"""
from pathlib import Path
from pptx import Presentation
from pptx.shapes.base import BaseShape
from pptx.shapes.group import GroupShape
from pptx.util import Inches, Pt
from pptx.enum.shapes import MSO_SHAPE_TYPE
from services.translation_service import translation_service
from typing import List, Tuple
import tempfile
import os


class PowerPointTranslator:
    """Handles translation of PowerPoint presentations with strict formatting preservation"""

    def __init__(self):
        self.translation_service = translation_service

    def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path:
        """
        Translate a PowerPoint presentation while preserving all formatting.
        Uses batch translation for improved performance.
        """
        presentation = Presentation(input_path)

        # Collect all translatable text elements
        text_elements = []  # List of (text, setter_function)
        image_shapes = []  # Collect images for separate processing

        for slide_idx, slide in enumerate(presentation.slides):
            # Collect from notes
            if slide.has_notes_slide and slide.notes_slide.notes_text_frame:
                self._collect_from_text_frame(slide.notes_slide.notes_text_frame, text_elements)

            # Collect from shapes
            for shape in slide.shapes:
                self._collect_from_shape(shape, text_elements, slide, image_shapes)

        # Batch translate all texts at once
        if text_elements:
            texts = [elem[0] for elem in text_elements]
            print(f"Batch translating {len(texts)} text segments...")
            translated_texts = self.translation_service.translate_batch(texts, target_language)

            # Apply translations
            for (original_text, setter), translated in zip(text_elements, translated_texts):
                if translated is not None and setter is not None:
                    try:
                        setter(translated)
                    except Exception as e:
                        print(f"Error applying translation: {e}")

        # Translate images if enabled (separate process, can't batch)
        if getattr(self.translation_service, 'translate_images', False):
            for shape, slide in image_shapes:
                self._translate_image_shape(shape, target_language, slide)

        presentation.save(output_path)

        return output_path

    def _collect_from_shape(self, shape: BaseShape, text_elements: List[Tuple[str, callable]], slide=None, image_shapes=None):
        """Collect text from a shape and its children"""
        # Handle text-containing shapes
        if shape.has_text_frame:
            self._collect_from_text_frame(shape.text_frame, text_elements)

        # Handle tables
        if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
            for row in shape.table.rows:
                for cell in row.cells:
                    self._collect_from_text_frame(cell.text_frame, text_elements)

        # Handle pictures/images
        if shape.shape_type == MSO_SHAPE_TYPE.PICTURE and image_shapes is not None:
            image_shapes.append((shape, slide))

        # Handle group shapes
        if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
            for sub_shape in shape.shapes:
                self._collect_from_shape(sub_shape, text_elements, slide, image_shapes)

        # Handle smart art
        if hasattr(shape, 'shapes'):
            try:
                for sub_shape in shape.shapes:
                    self._collect_from_shape(sub_shape, text_elements, slide, image_shapes)
            except:
                pass

    def _collect_from_text_frame(self, text_frame, text_elements: List[Tuple[str, callable]]):
        """Collect text from a text frame"""
        if not text_frame.text.strip():
            return

        for paragraph in text_frame.paragraphs:
            if not paragraph.text.strip():
                continue

            for run in paragraph.runs:
                if run.text and run.text.strip():
                    def make_setter(r):
                        def setter(text):
                            r.text = text
                        return setter
                    text_elements.append((run.text, make_setter(run)))

    def _translate_image_shape(self, shape, target_language: str, slide):
        """Translate text in an image using vision model"""
        from services.translation_service import OllamaTranslationProvider

        if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
            return

        try:
            image_blob = shape.image.blob
            ext = shape.image.ext

            with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
                tmp.write(image_blob)
                tmp_path = tmp.name

            translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
            os.unlink(tmp_path)

            if translated_text and translated_text.strip():
                left = shape.left
                top = shape.top + shape.height + Inches(0.1)
                width = shape.width
                height = Inches(0.5)

                textbox = slide.shapes.add_textbox(left, top, width, height)
                tf = textbox.text_frame
                p = tf.paragraphs[0]
                p.text = f"[{translated_text}]"
                p.font.size = Pt(10)
                p.font.italic = True

                print(f"Added image translation: {translated_text[:50]}...")

        except Exception as e:
            print(f"Error translating image: {e}")


# Global translator instance
pptx_translator = PowerPointTranslator()