- GoogleTranslationProvider: Added batch translation with separator method - DeepLTranslationProvider: Added translator caching and batch support - LibreTranslationProvider: Added translator caching and batch support - WordTranslator: Collect all texts -> batch translate -> apply pattern - ExcelTranslator: Collect all texts -> batch translate -> apply pattern - PowerPointTranslator: Collect all texts -> batch translate -> apply pattern - Enhanced Ollama/OpenAI prompts with stricter translation-only rules - Added rule: return original text if uncertain about translation
161 lines
7.1 KiB
Python
161 lines
7.1 KiB
Python
"""
|
|
Excel Translation Module
|
|
Translates Excel files while preserving all formatting, formulas, images, and layout
|
|
OPTIMIZED: Uses batch translation for 5-10x faster processing
|
|
"""
|
|
import re
|
|
import tempfile
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Dict, Set, List, Tuple
|
|
from openpyxl import load_workbook
|
|
from openpyxl.worksheet.worksheet import Worksheet
|
|
from openpyxl.cell.cell import Cell
|
|
from openpyxl.utils import get_column_letter
|
|
from services.translation_service import translation_service
|
|
|
|
|
|
class ExcelTranslator:
|
|
"""Handles translation of Excel files with strict formatting preservation"""
|
|
|
|
def __init__(self):
|
|
self.translation_service = translation_service
|
|
self.formula_pattern = re.compile(r'=.*')
|
|
|
|
def translate_file(self, input_path: Path, output_path: Path, target_language: str) -> Path:
|
|
"""
|
|
Translate an Excel file while preserving all formatting and structure.
|
|
Uses batch translation for improved performance.
|
|
"""
|
|
workbook = load_workbook(input_path, data_only=False)
|
|
|
|
# Collect all translatable text elements
|
|
text_elements = [] # List of (text, setter_function)
|
|
sheet_names_to_translate = []
|
|
|
|
for sheet_name in workbook.sheetnames:
|
|
worksheet = workbook[sheet_name]
|
|
self._collect_from_worksheet(worksheet, text_elements)
|
|
sheet_names_to_translate.append(sheet_name)
|
|
|
|
# Add sheet names to translate
|
|
sheet_name_setters = []
|
|
for sheet_name in sheet_names_to_translate:
|
|
text_elements.append((sheet_name, None)) # None setter - handled separately
|
|
sheet_name_setters.append(sheet_name)
|
|
|
|
# Batch translate all texts at once
|
|
if text_elements:
|
|
texts = [elem[0] for elem in text_elements]
|
|
print(f"Batch translating {len(texts)} text segments...")
|
|
translated_texts = self.translation_service.translate_batch(texts, target_language)
|
|
|
|
# Apply translations to cells
|
|
sheet_name_offset = len(text_elements) - len(sheet_name_setters)
|
|
for i, ((original_text, setter), translated) in enumerate(zip(text_elements[:sheet_name_offset], translated_texts[:sheet_name_offset])):
|
|
if translated is not None and setter is not None:
|
|
try:
|
|
setter(translated)
|
|
except Exception as e:
|
|
print(f"Error applying translation: {e}")
|
|
|
|
# Apply sheet name translations
|
|
sheet_name_mapping = {}
|
|
for i, (sheet_name, translated) in enumerate(zip(sheet_name_setters, translated_texts[sheet_name_offset:])):
|
|
if translated and translated != sheet_name:
|
|
new_name = translated[:31]
|
|
counter = 1
|
|
base_name = new_name[:28] if len(new_name) > 28 else new_name
|
|
while new_name in sheet_name_mapping.values() or new_name in workbook.sheetnames:
|
|
new_name = f"{base_name}_{counter}"
|
|
counter += 1
|
|
sheet_name_mapping[sheet_name] = new_name
|
|
|
|
# Rename sheets
|
|
for original_name, new_name in sheet_name_mapping.items():
|
|
workbook[original_name].title = new_name
|
|
|
|
# Translate images if enabled (separate process)
|
|
if getattr(self.translation_service, 'translate_images', False):
|
|
for sheet_name in workbook.sheetnames:
|
|
self._translate_images(workbook[sheet_name], target_language)
|
|
|
|
workbook.save(output_path)
|
|
workbook.close()
|
|
|
|
return output_path
|
|
|
|
def _collect_from_worksheet(self, worksheet: Worksheet, text_elements: List[Tuple[str, callable]]):
|
|
"""Collect all translatable text from worksheet cells"""
|
|
for row in worksheet.iter_rows():
|
|
for cell in row:
|
|
if cell.value is not None:
|
|
self._collect_from_cell(cell, text_elements)
|
|
|
|
def _collect_from_cell(self, cell: Cell, text_elements: List[Tuple[str, callable]]):
|
|
"""Collect text from a cell"""
|
|
original_value = cell.value
|
|
|
|
if original_value is None:
|
|
return
|
|
|
|
# Handle formulas - collect text inside quotes
|
|
if isinstance(original_value, str) and original_value.startswith('='):
|
|
string_pattern = re.compile(r'"([^"]*)"')
|
|
strings = string_pattern.findall(original_value)
|
|
for s in strings:
|
|
if s.strip():
|
|
def make_formula_setter(c, orig_formula, orig_string):
|
|
def setter(translated):
|
|
c.value = orig_formula.replace(f'"{orig_string}"', f'"{translated}"')
|
|
return setter
|
|
text_elements.append((s, make_formula_setter(cell, original_value, s)))
|
|
# Handle regular text
|
|
elif isinstance(original_value, str) and original_value.strip():
|
|
def make_setter(c):
|
|
def setter(text):
|
|
c.value = text
|
|
return setter
|
|
text_elements.append((original_value, make_setter(cell)))
|
|
|
|
def _translate_images(self, worksheet: Worksheet, target_language: str):
|
|
"""Translate text in images using vision model"""
|
|
from services.translation_service import OllamaTranslationProvider
|
|
|
|
if not isinstance(self.translation_service.provider, OllamaTranslationProvider):
|
|
return
|
|
|
|
try:
|
|
images = getattr(worksheet, '_images', [])
|
|
|
|
for idx, image in enumerate(images):
|
|
try:
|
|
image_data = image._data()
|
|
ext = image.format or 'png'
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=f'.{ext}', delete=False) as tmp:
|
|
tmp.write(image_data)
|
|
tmp_path = tmp.name
|
|
|
|
translated_text = self.translation_service.provider.translate_image(tmp_path, target_language)
|
|
os.unlink(tmp_path)
|
|
|
|
if translated_text and translated_text.strip():
|
|
anchor = image.anchor
|
|
if hasattr(anchor, '_from'):
|
|
cell_ref = f"{get_column_letter(anchor._from.col + 1)}{anchor._from.row + 1}"
|
|
cell = worksheet[cell_ref]
|
|
from openpyxl.comments import Comment
|
|
cell.comment = Comment(f"Image translation: {translated_text}", "Translator")
|
|
print(f"Added Excel image translation at {cell_ref}")
|
|
|
|
except Exception as e:
|
|
print(f"Error translating Excel image {idx}: {e}")
|
|
|
|
except Exception as e:
|
|
print(f"Error processing Excel images: {e}")
|
|
|
|
|
|
# Global translator instance
|
|
excel_translator = ExcelTranslator()
|