""" Word Document Translation Module Translates Word files while preserving all formatting, styles, tables, and images """ from pathlib import Path from docx import Document from docx.text.paragraph import Paragraph from docx.table import Table, _Cell from docx.oxml.text.paragraph import CT_P from docx.oxml.table import CT_Tbl from docx.section import Section from services.translation_service import translation_service class WordTranslator: """Handles translation of Word documents with strict formatting preservation""" def __init__(self): self.translation_service = translation_service def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path: """ Translate a Word document while preserving all formatting and structure Args: input_path: Path to input Word file output_path: Path to save translated Word file target_language: Target language code Returns: Path to the translated file """ document = Document(input_path) # Translate main document body self._translate_document_body(document, target_language) # Translate headers and footers in all sections for section in document.sections: self._translate_section(section, target_language) # Save the translated document document.save(output_path) return output_path def _translate_document_body(self, document: Document, target_language: str): """ Translate all elements in the document body Args: document: Document to translate target_language: Target language code """ for element in document.element.body: if isinstance(element, CT_P): # It's a paragraph paragraph = Paragraph(element, document) self._translate_paragraph(paragraph, target_language) elif isinstance(element, CT_Tbl): # It's a table table = Table(element, document) self._translate_table(table, target_language) def _translate_paragraph(self, paragraph: Paragraph, target_language: str): """ Translate a paragraph while preserving all formatting Args: paragraph: Paragraph to translate target_language: Target language code """ if not paragraph.text.strip(): return # For paragraphs with complex formatting (multiple runs), translate run by run if len(paragraph.runs) > 0: for run in paragraph.runs: if run.text.strip(): translated_text = self.translation_service.translate_text( run.text, target_language ) run.text = translated_text else: # Simple paragraph with no runs if paragraph.text.strip(): translated_text = self.translation_service.translate_text( paragraph.text, target_language ) paragraph.text = translated_text def _translate_table(self, table: Table, target_language: str): """ Translate all cells in a table while preserving structure Args: table: Table to translate target_language: Target language code """ for row in table.rows: for cell in row.cells: self._translate_cell(cell, target_language) def _translate_cell(self, cell: _Cell, target_language: str): """ Translate content within a table cell Args: cell: Cell to translate target_language: Target language code """ for paragraph in cell.paragraphs: self._translate_paragraph(paragraph, target_language) # Handle nested tables for table in cell.tables: self._translate_table(table, target_language) def _translate_section(self, section: Section, target_language: str): """ Translate headers and footers in a section Args: section: Section to translate target_language: Target language code """ # Translate header if section.header: for paragraph in section.header.paragraphs: self._translate_paragraph(paragraph, target_language) for table in section.header.tables: self._translate_table(table, target_language) # Translate footer if section.footer: for paragraph in section.footer.paragraphs: self._translate_paragraph(paragraph, target_language) for table in section.footer.tables: self._translate_table(table, target_language) # Translate first page header (if different) if section.first_page_header: for paragraph in section.first_page_header.paragraphs: self._translate_paragraph(paragraph, target_language) for table in section.first_page_header.tables: self._translate_table(table, target_language) # Translate first page footer (if different) if section.first_page_footer: for paragraph in section.first_page_footer.paragraphs: self._translate_paragraph(paragraph, target_language) for table in section.first_page_footer.tables: self._translate_table(table, target_language) # Translate even page header (if different) if section.even_page_header: for paragraph in section.even_page_header.paragraphs: self._translate_paragraph(paragraph, target_language) for table in section.even_page_header.tables: self._translate_table(table, target_language) # Translate even page footer (if different) if section.even_page_footer: for paragraph in section.even_page_footer.paragraphs: self._translate_paragraph(paragraph, target_language) for table in section.even_page_footer.tables: self._translate_table(table, target_language) # Global translator instance word_translator = WordTranslator()