""" Excel Translation Module Translates Excel files while preserving all formatting, formulas, images, and layout """ import re import tempfile import os from pathlib import Path from typing import Dict, Set from openpyxl import load_workbook from openpyxl.worksheet.worksheet import Worksheet from openpyxl.cell.cell import Cell from openpyxl.utils import get_column_letter from services.translation_service import translation_service class ExcelTranslator: """Handles translation of Excel files with strict formatting preservation""" def __init__(self): self.translation_service = translation_service self.formula_pattern = re.compile(r'=.*') def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path: """ Translate an Excel file while preserving all formatting and structure Args: input_path: Path to input Excel file output_path: Path to save translated Excel file target_language: Target language code Returns: Path to the translated file """ # Load workbook with data_only=False to preserve formulas workbook = load_workbook(input_path, data_only=False) # First, translate all worksheet content sheet_name_mapping = {} for sheet_name in workbook.sheetnames: worksheet = workbook[sheet_name] self._translate_worksheet(worksheet, target_language) # Translate images if enabled if getattr(self.translation_service, 'translate_images', False): self._translate_images(worksheet, target_language) # Prepare translated sheet name (but don't rename yet) translated_sheet_name = self.translation_service.translate_text( sheet_name, target_language ) if translated_sheet_name and translated_sheet_name != sheet_name: # Truncate to Excel's 31 character limit and ensure uniqueness new_name = translated_sheet_name[:31] counter = 1 base_name = new_name[:28] if len(new_name) > 28 else new_name while new_name in sheet_name_mapping.values() or new_name in workbook.sheetnames: new_name = f"{base_name}_{counter}" counter += 1 sheet_name_mapping[sheet_name] = new_name # Now rename sheets (after all content is translated) for original_name, new_name in sheet_name_mapping.items(): workbook[original_name].title = new_name # Save the translated workbook workbook.save(output_path) workbook.close() return output_path def _translate_worksheet(self, worksheet: Worksheet, target_language: str): """ Translate all cells in a worksheet while preserving formatting Args: worksheet: Worksheet to translate target_language: Target language code """ # Iterate through all cells that have values for row in worksheet.iter_rows(): for cell in row: if cell.value is not None: self._translate_cell(cell, target_language) def _translate_cell(self, cell: Cell, target_language: str): """ Translate a single cell while preserving its formula and formatting Args: cell: Cell to translate target_language: Target language code """ original_value = cell.value # Skip if cell is empty if original_value is None: return # Handle formulas if isinstance(original_value, str) and original_value.startswith('='): self._translate_formula(cell, original_value, target_language) # Handle regular text elif isinstance(original_value, str): translated_text = self.translation_service.translate_text( original_value, target_language ) cell.value = translated_text # Numbers, dates, booleans remain unchanged def _translate_formula(self, cell: Cell, formula: str, target_language: str): """ Translate text within a formula while preserving the formula structure Args: cell: Cell containing the formula formula: Formula string target_language: Target language code """ # Extract text strings from formula (text within quotes) string_pattern = re.compile(r'"([^"]*)"') strings = string_pattern.findall(formula) if not strings: return # Translate each string and replace in formula translated_formula = formula for original_string in strings: if original_string.strip(): # Only translate non-empty strings translated_string = self.translation_service.translate_text( original_string, target_language ) # Replace in formula, being careful with special regex characters translated_formula = translated_formula.replace( f'"{original_string}"', f'"{translated_string}"' ) cell.value = translated_formula def _should_translate(self, text: str) -> bool: """ Determine if text should be translated Args: text: Text to check Returns: True if text should be translated, False otherwise """ if not text or not isinstance(text, str): return False # Don't translate if it's only numbers, special characters, or very short if len(text.strip()) < 2: return False # Check if it's a formula (handled separately) if text.startswith('='): return False return True def _translate_images(self, worksheet: Worksheet, target_language: str): """ Translate text in images using vision model and add as comments """ from services.translation_service import OllamaTranslationProvider if not isinstance(self.translation_service.provider, OllamaTranslationProvider): return try: # Get images from worksheet images = getattr(worksheet, '_images', []) for idx, image in enumerate(images): try: # Get image data image_data = image._data() ext = image.format or 'png' # Save to temp file with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp: tmp.write(image_data) tmp_path = tmp.name # Translate with vision translated_text = self.translation_service.provider.translate_image(tmp_path, target_language) # Clean up os.unlink(tmp_path) if translated_text and translated_text.strip(): # Add translation as a cell near the image anchor = image.anchor if hasattr(anchor, '_from'): cell_ref = f"{get_column_letter(anchor._from.col + 1)}{anchor._from.row + 1}" cell = worksheet[cell_ref] # Add as comment from openpyxl.comments import Comment cell.comment = Comment(f"Image translation: {translated_text}", "Translator") print(f"Added Excel image translation at {cell_ref}: {translated_text[:50]}...") except Exception as e: print(f"Error translating Excel image {idx}: {e}") continue except Exception as e: print(f"Error processing Excel images: {e}") # Global translator instance excel_translator = ExcelTranslator()