216 lines
8.2 KiB
Python
216 lines
8.2 KiB
Python
"""
|
|
Excel Translation Module
|
|
Translates Excel files while preserving all formatting, formulas, images, and layout
|
|
"""
|
|
import re
|
|
import tempfile
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Dict, Set
|
|
from openpyxl import load_workbook
|
|
from openpyxl.worksheet.worksheet import Worksheet
|
|
from openpyxl.cell.cell import Cell
|
|
from openpyxl.utils import get_column_letter
|
|
from services.translation_service import translation_service
|
|
|
|
|
|
class ExcelTranslator:
|
|
"""Handles translation of Excel files with strict formatting preservation"""
|
|
|
|
def __init__(self):
|
|
self.translation_service = translation_service
|
|
self.formula_pattern = re.compile(r'=.*')
|
|
|
|
def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path:
|
|
"""
|
|
Translate an Excel file while preserving all formatting and structure
|
|
|
|
Args:
|
|
input_path: Path to input Excel file
|
|
output_path: Path to save translated Excel file
|
|
target_language: Target language code
|
|
|
|
Returns:
|
|
Path to the translated file
|
|
"""
|
|
# Load workbook with data_only=False to preserve formulas
|
|
workbook = load_workbook(input_path, data_only=False)
|
|
|
|
# First, translate all worksheet content
|
|
sheet_name_mapping = {}
|
|
for sheet_name in workbook.sheetnames:
|
|
worksheet = workbook[sheet_name]
|
|
self._translate_worksheet(worksheet, target_language)
|
|
|
|
# Translate images if enabled
|
|
if getattr(self.translation_service, 'translate_images', False):
|
|
self._translate_images(worksheet, target_language)
|
|
|
|
# Prepare translated sheet name (but don't rename yet)
|
|
translated_sheet_name = self.translation_service.translate_text(
|
|
sheet_name, target_language
|
|
)
|
|
if translated_sheet_name and translated_sheet_name != sheet_name:
|
|
# Truncate to Excel's 31 character limit and ensure uniqueness
|
|
new_name = translated_sheet_name[:31]
|
|
counter = 1
|
|
base_name = new_name[:28] if len(new_name) > 28 else new_name
|
|
while new_name in sheet_name_mapping.values() or new_name in workbook.sheetnames:
|
|
new_name = f"{base_name}_{counter}"
|
|
counter += 1
|
|
sheet_name_mapping[sheet_name] = new_name
|
|
|
|
# Now rename sheets (after all content is translated)
|
|
for original_name, new_name in sheet_name_mapping.items():
|
|
workbook[original_name].title = new_name
|
|
|
|
# Save the translated workbook
|
|
workbook.save(output_path)
|
|
workbook.close()
|
|
|
|
return output_path
|
|
|
|
def _translate_worksheet(self, worksheet: Worksheet, target_language: str):
|
|
"""
|
|
Translate all cells in a worksheet while preserving formatting
|
|
|
|
Args:
|
|
worksheet: Worksheet to translate
|
|
target_language: Target language code
|
|
"""
|
|
# Iterate through all cells that have values
|
|
for row in worksheet.iter_rows():
|
|
for cell in row:
|
|
if cell.value is not None:
|
|
self._translate_cell(cell, target_language)
|
|
|
|
def _translate_cell(self, cell: Cell, target_language: str):
|
|
"""
|
|
Translate a single cell while preserving its formula and formatting
|
|
|
|
Args:
|
|
cell: Cell to translate
|
|
target_language: Target language code
|
|
"""
|
|
original_value = cell.value
|
|
|
|
# Skip if cell is empty
|
|
if original_value is None:
|
|
return
|
|
|
|
# Handle formulas
|
|
if isinstance(original_value, str) and original_value.startswith('='):
|
|
self._translate_formula(cell, original_value, target_language)
|
|
# Handle regular text
|
|
elif isinstance(original_value, str):
|
|
translated_text = self.translation_service.translate_text(
|
|
original_value, target_language
|
|
)
|
|
cell.value = translated_text
|
|
# Numbers, dates, booleans remain unchanged
|
|
|
|
def _translate_formula(self, cell: Cell, formula: str, target_language: str):
|
|
"""
|
|
Translate text within a formula while preserving the formula structure
|
|
|
|
Args:
|
|
cell: Cell containing the formula
|
|
formula: Formula string
|
|
target_language: Target language code
|
|
"""
|
|
# Extract text strings from formula (text within quotes)
|
|
string_pattern = re.compile(r'"([^"]*)"')
|
|
strings = string_pattern.findall(formula)
|
|
|
|
if not strings:
|
|
return
|
|
|
|
# Translate each string and replace in formula
|
|
translated_formula = formula
|
|
for original_string in strings:
|
|
if original_string.strip(): # Only translate non-empty strings
|
|
translated_string = self.translation_service.translate_text(
|
|
original_string, target_language
|
|
)
|
|
# Replace in formula, being careful with special regex characters
|
|
translated_formula = translated_formula.replace(
|
|
f'"{original_string}"', f'"{translated_string}"'
|
|
)
|
|
|
|
cell.value = translated_formula
|
|
|
|
def _should_translate(self, text: str) -> bool:
|
|
"""
|
|
Determine if text should be translated
|
|
|
|
Args:
|
|
text: Text to check
|
|
|
|
Returns:
|
|
True if text should be translated, False otherwise
|
|
"""
|
|
if not text or not isinstance(text, str):
|
|
return False
|
|
|
|
# Don't translate if it's only numbers, special characters, or very short
|
|
if len(text.strip()) < 2:
|
|
return False
|
|
|
|
# Check if it's a formula (handled separately)
|
|
if text.startswith('='):
|
|
return False
|
|
|
|
return True
|
|
|
|
def _translate_images(self, worksheet: Worksheet, target_language: str):
|
|
"""
|
|
Translate text in images using vision model and add as comments
|
|
"""
|
|
from services.translation_service import OllamaTranslationProvider
|
|
|
|
if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
|
|
return
|
|
|
|
try:
|
|
# Get images from worksheet
|
|
images = getattr(worksheet, '_images', [])
|
|
|
|
for idx, image in enumerate(images):
|
|
try:
|
|
# Get image data
|
|
image_data = image._data()
|
|
ext = image.format or 'png'
|
|
|
|
# Save to temp file
|
|
with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
|
|
tmp.write(image_data)
|
|
tmp_path = tmp.name
|
|
|
|
# Translate with vision
|
|
translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
|
|
|
|
# Clean up
|
|
os.unlink(tmp_path)
|
|
|
|
if translated_text and translated_text.strip():
|
|
# Add translation as a cell near the image
|
|
anchor = image.anchor
|
|
if hasattr(anchor, '_from'):
|
|
cell_ref = f"{get_column_letter(anchor._from.col + 1)}{anchor._from.row + 1}"
|
|
cell = worksheet[cell_ref]
|
|
# Add as comment
|
|
from openpyxl.comments import Comment
|
|
cell.comment = Comment(f"Image translation: {translated_text}", "Translator")
|
|
print(f"Added Excel image translation at {cell_ref}: {translated_text[:50]}...")
|
|
|
|
except Exception as e:
|
|
print(f"Error translating Excel image {idx}: {e}")
|
|
continue
|
|
|
|
except Exception as e:
|
|
print(f"Error processing Excel images: {e}")
|
|
|
|
|
|
# Global translator instance
|
|
excel_translator = ExcelTranslator()
|