172 lines
6.4 KiB
Python
172 lines
6.4 KiB
Python
"""
|
|
Word Document Translation Module
|
|
Translates Word files while preserving all formatting, styles, tables, and images
|
|
"""
|
|
from pathlib import Path
|
|
from docx import Document
|
|
from docx.text.paragraph import Paragraph
|
|
from docx.table import Table, _Cell
|
|
from docx.oxml.text.paragraph import CT_P
|
|
from docx.oxml.table import CT_Tbl
|
|
from docx.section import Section
|
|
from services.translation_service import translation_service
|
|
|
|
|
|
class WordTranslator:
|
|
"""Handles translation of Word documents with strict formatting preservation"""
|
|
|
|
def __init__(self):
|
|
self.translation_service = translation_service
|
|
|
|
def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path:
|
|
"""
|
|
Translate a Word document while preserving all formatting and structure
|
|
|
|
Args:
|
|
input_path: Path to input Word file
|
|
output_path: Path to save translated Word file
|
|
target_language: Target language code
|
|
|
|
Returns:
|
|
Path to the translated file
|
|
"""
|
|
document = Document(input_path)
|
|
|
|
# Translate main document body
|
|
self._translate_document_body(document, target_language)
|
|
|
|
# Translate headers and footers in all sections
|
|
for section in document.sections:
|
|
self._translate_section(section, target_language)
|
|
|
|
# Save the translated document
|
|
document.save(output_path)
|
|
|
|
return output_path
|
|
|
|
def _translate_document_body(self, document: Document, target_language: str):
|
|
"""
|
|
Translate all elements in the document body
|
|
|
|
Args:
|
|
document: Document to translate
|
|
target_language: Target language code
|
|
"""
|
|
for element in document.element.body:
|
|
if isinstance(element, CT_P):
|
|
# It's a paragraph
|
|
paragraph = Paragraph(element, document)
|
|
self._translate_paragraph(paragraph, target_language)
|
|
elif isinstance(element, CT_Tbl):
|
|
# It's a table
|
|
table = Table(element, document)
|
|
self._translate_table(table, target_language)
|
|
|
|
def _translate_paragraph(self, paragraph: Paragraph, target_language: str):
|
|
"""
|
|
Translate a paragraph while preserving all formatting
|
|
|
|
Args:
|
|
paragraph: Paragraph to translate
|
|
target_language: Target language code
|
|
"""
|
|
if not paragraph.text.strip():
|
|
return
|
|
|
|
# For paragraphs with complex formatting (multiple runs), translate run by run
|
|
if len(paragraph.runs) > 0:
|
|
for run in paragraph.runs:
|
|
if run.text.strip():
|
|
translated_text = self.translation_service.translate_text(
|
|
run.text, target_language
|
|
)
|
|
run.text = translated_text
|
|
else:
|
|
# Simple paragraph with no runs
|
|
if paragraph.text.strip():
|
|
translated_text = self.translation_service.translate_text(
|
|
paragraph.text, target_language
|
|
)
|
|
paragraph.text = translated_text
|
|
|
|
def _translate_table(self, table: Table, target_language: str):
|
|
"""
|
|
Translate all cells in a table while preserving structure
|
|
|
|
Args:
|
|
table: Table to translate
|
|
target_language: Target language code
|
|
"""
|
|
for row in table.rows:
|
|
for cell in row.cells:
|
|
self._translate_cell(cell, target_language)
|
|
|
|
def _translate_cell(self, cell: _Cell, target_language: str):
|
|
"""
|
|
Translate content within a table cell
|
|
|
|
Args:
|
|
cell: Cell to translate
|
|
target_language: Target language code
|
|
"""
|
|
for paragraph in cell.paragraphs:
|
|
self._translate_paragraph(paragraph, target_language)
|
|
|
|
# Handle nested tables
|
|
for table in cell.tables:
|
|
self._translate_table(table, target_language)
|
|
|
|
def _translate_section(self, section: Section, target_language: str):
|
|
"""
|
|
Translate headers and footers in a section
|
|
|
|
Args:
|
|
section: Section to translate
|
|
target_language: Target language code
|
|
"""
|
|
# Translate header
|
|
if section.header:
|
|
for paragraph in section.header.paragraphs:
|
|
self._translate_paragraph(paragraph, target_language)
|
|
for table in section.header.tables:
|
|
self._translate_table(table, target_language)
|
|
|
|
# Translate footer
|
|
if section.footer:
|
|
for paragraph in section.footer.paragraphs:
|
|
self._translate_paragraph(paragraph, target_language)
|
|
for table in section.footer.tables:
|
|
self._translate_table(table, target_language)
|
|
|
|
# Translate first page header (if different)
|
|
if section.first_page_header:
|
|
for paragraph in section.first_page_header.paragraphs:
|
|
self._translate_paragraph(paragraph, target_language)
|
|
for table in section.first_page_header.tables:
|
|
self._translate_table(table, target_language)
|
|
|
|
# Translate first page footer (if different)
|
|
if section.first_page_footer:
|
|
for paragraph in section.first_page_footer.paragraphs:
|
|
self._translate_paragraph(paragraph, target_language)
|
|
for table in section.first_page_footer.tables:
|
|
self._translate_table(table, target_language)
|
|
|
|
# Translate even page header (if different)
|
|
if section.even_page_header:
|
|
for paragraph in section.even_page_header.paragraphs:
|
|
self._translate_paragraph(paragraph, target_language)
|
|
for table in section.even_page_header.tables:
|
|
self._translate_table(table, target_language)
|
|
|
|
# Translate even page footer (if different)
|
|
if section.even_page_footer:
|
|
for paragraph in section.even_page_footer.paragraphs:
|
|
self._translate_paragraph(paragraph, target_language)
|
|
for table in section.even_page_footer.tables:
|
|
self._translate_table(table, target_language)
|
|
|
|
|
|
# Global translator instance
|
|
word_translator = WordTranslator()
|