""" Excel Translation Module Translates Excel files while preserving all formatting, formulas, images, and layout """ import re from pathlib import Path from typing import Dict, Set from openpyxl import load_workbook from openpyxl.worksheet.worksheet import Worksheet from openpyxl.cell.cell import Cell from openpyxl.utils import get_column_letter from services.translation_service import translation_service class ExcelTranslator: """Handles translation of Excel files with strict formatting preservation""" def __init__(self): self.translation_service = translation_service self.formula_pattern = re.compile(r'=.*') def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path: """ Translate an Excel file while preserving all formatting and structure Args: input_path: Path to input Excel file output_path: Path to save translated Excel file target_language: Target language code Returns: Path to the translated file """ # Load workbook with data_only=False to preserve formulas workbook = load_workbook(input_path, data_only=False) # First, translate all worksheet content sheet_name_mapping = {} for sheet_name in workbook.sheetnames: worksheet = workbook[sheet_name] self._translate_worksheet(worksheet, target_language) # Prepare translated sheet name (but don't rename yet) translated_sheet_name = self.translation_service.translate_text( sheet_name, target_language ) if translated_sheet_name and translated_sheet_name != sheet_name: # Truncate to Excel's 31 character limit and ensure uniqueness new_name = translated_sheet_name[:31] counter = 1 base_name = new_name[:28] if len(new_name) > 28 else new_name while new_name in sheet_name_mapping.values() or new_name in workbook.sheetnames: new_name = f"{base_name}_{counter}" counter += 1 sheet_name_mapping[sheet_name] = new_name # Now rename sheets (after all content is translated) for original_name, new_name in sheet_name_mapping.items(): workbook[original_name].title = new_name # Save the translated workbook workbook.save(output_path) workbook.close() return output_path def _translate_worksheet(self, worksheet: Worksheet, target_language: str): """ Translate all cells in a worksheet while preserving formatting Args: worksheet: Worksheet to translate target_language: Target language code """ # Iterate through all cells that have values for row in worksheet.iter_rows(): for cell in row: if cell.value is not None: self._translate_cell(cell, target_language) def _translate_cell(self, cell: Cell, target_language: str): """ Translate a single cell while preserving its formula and formatting Args: cell: Cell to translate target_language: Target language code """ original_value = cell.value # Skip if cell is empty if original_value is None: return # Handle formulas if isinstance(original_value, str) and original_value.startswith('='): self._translate_formula(cell, original_value, target_language) # Handle regular text elif isinstance(original_value, str): translated_text = self.translation_service.translate_text( original_value, target_language ) cell.value = translated_text # Numbers, dates, booleans remain unchanged def _translate_formula(self, cell: Cell, formula: str, target_language: str): """ Translate text within a formula while preserving the formula structure Args: cell: Cell containing the formula formula: Formula string target_language: Target language code """ # Extract text strings from formula (text within quotes) string_pattern = re.compile(r'"([^"]*)"') strings = string_pattern.findall(formula) if not strings: return # Translate each string and replace in formula translated_formula = formula for original_string in strings: if original_string.strip(): # Only translate non-empty strings translated_string = self.translation_service.translate_text( original_string, target_language ) # Replace in formula, being careful with special regex characters translated_formula = translated_formula.replace( f'"{original_string}"', f'"{translated_string}"' ) cell.value = translated_formula def _should_translate(self, text: str) -> bool: """ Determine if text should be translated Args: text: Text to check Returns: True if text should be translated, False otherwise """ if not text or not isinstance(text, str): return False # Don't translate if it's only numbers, special characters, or very short if len(text.strip()) < 2: return False # Check if it's a formula (handled separately) if text.startswith('='): return False return True # Global translator instance excel_translator = ExcelTranslator()