office_translator/translators/pptx_translator.py

"""
PowerPoint Translation Module
Translates PowerPoint files while preserving all layouts, animations, and media
"""
from pathlib import Path
from pptx import Presentation
from pptx.shapes.base import BaseShape
from pptx.shapes.group import GroupShape
from pptx.util import Inches, Pt
from pptx.enum.shapes import MSO_SHAPE_TYPE
from services.translation_service import translation_service
import tempfile
import os


class PowerPointTranslator:
    """Handles translation of PowerPoint presentations with strict formatting preservation"""

    def __init__(self):
        self.translation_service = translation_service

    def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path:
        """
        Translate a PowerPoint presentation while preserving all formatting and structure

        Args:
            input_path: Path to input PowerPoint file
            output_path: Path to save translated PowerPoint file
            target_language: Target language code

        Returns:
            Path to the translated file
        """
        presentation = Presentation(input_path)

        # Translate each slide
        for slide_idx, slide in enumerate(presentation.slides):
            self._translate_slide(slide, target_language, slide_idx + 1, input_path)

        # Save the translated presentation
        presentation.save(output_path)

        return output_path

    def _translate_slide(self, slide, target_language: str, slide_num: int, input_path: Path):
        """
        Translate all text elements in a slide while preserving layout

        Args:
            slide: Slide to translate
            target_language: Target language code
            slide_num: Slide number for reference
            input_path: Path to source file for image extraction
        """
        # Translate notes (speaker notes)
        if slide.has_notes_slide:
            notes_slide = slide.notes_slide
            if notes_slide.notes_text_frame:
                self._translate_text_frame(notes_slide.notes_text_frame, target_language)

        # Translate shapes in the slide
        for shape in slide.shapes:
            self._translate_shape(shape, target_language, slide)

    def _translate_shape(self, shape: BaseShape, target_language: str, slide=None):
        """
        Translate text in a shape based on its type

        Args:
            shape: Shape to translate
            target_language: Target language code
            slide: Parent slide for adding image translations
        """
        # Handle text-containing shapes
        if shape.has_text_frame:
            self._translate_text_frame(shape.text_frame, target_language)

        # Handle tables
        if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
            self._translate_table(shape.table, target_language)

        # Handle pictures/images
        if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
            self._translate_image_shape(shape, target_language, slide)

        # Handle group shapes (shapes within shapes)
        if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
            for sub_shape in shape.shapes:
                self._translate_shape(sub_shape, target_language, slide)

        # Handle smart art (contains multiple shapes)
        # Smart art is complex, but we can try to translate text within it
        if hasattr(shape, 'shapes'):
            try:
                for sub_shape in shape.shapes:
                    self._translate_shape(sub_shape, target_language, slide)
            except:
                pass  # Some shapes may not support iteration

    def _translate_image_shape(self, shape, target_language: str, slide):
        """
        Translate text in an image using vision model and add as text box
        """
        if not getattr(self.translation_service, 'translate_images', False):
            return

        from services.translation_service import OllamaTranslationProvider

        if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
            return

        try:
            # Get image blob
            image_blob = shape.image.blob
            ext = shape.image.ext

            # Save to temp file
            with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
                tmp.write(image_blob)
                tmp_path = tmp.name

            # Translate with vision
            translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)

            # Clean up
            os.unlink(tmp_path)

            if translated_text and translated_text.strip():
                # Add text box below the image with translation
                left = shape.left
                top = shape.top + shape.height + Inches(0.1)
                width = shape.width
                height = Inches(0.5)

                # Add text box
                textbox = slide.shapes.add_textbox(left, top, width, height)
                tf = textbox.text_frame
                p = tf.paragraphs[0]
                p.text = f"[{translated_text}]"
                p.font.size = Pt(10)
                p.font.italic = True

                print(f"Added image translation: {translated_text[:50]}...")

        except Exception as e:
            print(f"Error translating image: {e}")

    def _translate_text_frame(self, text_frame, target_language: str):
        """
        Translate text within a text frame while preserving formatting

        Args:
            text_frame: Text frame to translate
            target_language: Target language code
        """
        if not text_frame.text.strip():
            return

        # Translate each paragraph in the text frame
        for paragraph in text_frame.paragraphs:
            self._translate_paragraph(paragraph, target_language)

    def _translate_paragraph(self, paragraph, target_language: str):
        """
        Translate a paragraph while preserving run-level formatting

        Args:
            paragraph: Paragraph to translate
            target_language: Target language code
        """
        if not paragraph.text.strip():
            return

        # Translate each run in the paragraph to preserve individual formatting
        for run in paragraph.runs:
            if run.text.strip():
                translated_text = self.translation_service.translate_text(
                    run.text, target_language
                )
                run.text = translated_text

    def _translate_table(self, table, target_language: str):
        """
        Translate all cells in a table while preserving structure

        Args:
            table: Table to translate
            target_language: Target language code
        """
        for row in table.rows:
            for cell in row.cells:
                self._translate_text_frame(cell.text_frame, target_language)

    def _is_translatable(self, text: str) -> bool:
        """
        Determine if text should be translated

        Args:
            text: Text to check

        Returns:
            True if text should be translated, False otherwise
        """
        if not text or not isinstance(text, str):
            return False

        # Don't translate if it's only numbers, special characters, or very short
        if len(text.strip()) < 2:
            return False

        return True


# Global translator instance
pptx_translator = PowerPointTranslator()