Enhance Excel translation functionality with robust formatting and error handling
This commit is contained in:
parent
11b85d6111
commit
cc1decc9ed
337
main.py
337
main.py
@ -5,130 +5,279 @@ import os
|
|||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import copy
|
import copy
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
|
import logging
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import zipfile
|
||||||
|
import tempfile
|
||||||
from openpyxl.utils import get_column_letter
|
from openpyxl.utils import get_column_letter
|
||||||
|
from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
async def translate_text(translator, text, target_language):
|
async def translate_text(translator, text, target_language):
|
||||||
|
"""Translate text to target language"""
|
||||||
try:
|
try:
|
||||||
translation = await translator.translate(text, dest=target_language)
|
translation = await translator.translate(text, dest=target_language)
|
||||||
return translation.text
|
return translation.text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error translating '{text}': {e}")
|
logging.error(f"Translation error: {e}")
|
||||||
return text # Return the original text if translation fails
|
return text # Return original if translation fails
|
||||||
|
|
||||||
def is_formula(text):
|
def is_formula(text):
|
||||||
"""Check if a cell value is a formula"""
|
"""Check if cell value is a formula"""
|
||||||
if isinstance(text, str):
|
if isinstance(text, str):
|
||||||
return text.startswith('=')
|
return text.startswith('=')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def copy_cell_format(source_cell, target_cell):
|
def should_translate(cell):
|
||||||
"""Copy formatting from source cell to target cell without copying the problematic style index"""
|
"""Determine if a cell should be translated"""
|
||||||
if source_cell.has_style:
|
if cell.value is None:
|
||||||
try:
|
return False
|
||||||
# Copy individual style attributes instead of the entire style object
|
|
||||||
if source_cell.font:
|
# Skip formulas
|
||||||
target_cell.font = copy.copy(source_cell.font)
|
if is_formula(cell.value):
|
||||||
if source_cell.border:
|
return False
|
||||||
target_cell.border = copy.copy(source_cell.border)
|
|
||||||
if source_cell.fill:
|
# Only translate string values
|
||||||
target_cell.fill = copy.copy(source_cell.fill)
|
if not isinstance(cell.value, str):
|
||||||
if source_cell.number_format:
|
return False
|
||||||
target_cell.number_format = source_cell.number_format
|
|
||||||
if source_cell.protection:
|
return True
|
||||||
target_cell.protection = copy.copy(source_cell.protection)
|
|
||||||
if source_cell.alignment:
|
|
||||||
target_cell.alignment = copy.copy(source_cell.alignment)
|
|
||||||
# Copy any hyperlink
|
|
||||||
if source_cell.hyperlink:
|
|
||||||
target_cell.hyperlink = source_cell.hyperlink
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error copying format for cell {source_cell.coordinate}: {e}")
|
|
||||||
|
|
||||||
def copy_sheet_properties(source_sheet, target_sheet):
|
def copy_cell_formatting(source_cell, target_cell):
|
||||||
"""Copy all sheet properties from source to target sheet"""
|
"""Complete and robust copy of cell formatting"""
|
||||||
# Copy column dimensions for all columns
|
if not source_cell or not target_cell:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Font
|
||||||
|
if source_cell.font:
|
||||||
|
font = Font(
|
||||||
|
name=source_cell.font.name,
|
||||||
|
size=source_cell.font.size,
|
||||||
|
bold=source_cell.font.bold,
|
||||||
|
italic=source_cell.font.italic,
|
||||||
|
vertAlign=source_cell.font.vertAlign,
|
||||||
|
underline=source_cell.font.underline,
|
||||||
|
strike=source_cell.font.strike,
|
||||||
|
color=source_cell.font.color
|
||||||
|
)
|
||||||
|
target_cell.font = font
|
||||||
|
|
||||||
|
# Fill
|
||||||
|
if source_cell.fill:
|
||||||
|
fill = PatternFill(
|
||||||
|
fill_type=source_cell.fill.fill_type,
|
||||||
|
start_color=source_cell.fill.start_color,
|
||||||
|
end_color=source_cell.fill.end_color
|
||||||
|
)
|
||||||
|
target_cell.fill = fill
|
||||||
|
|
||||||
|
# Border
|
||||||
|
if source_cell.border:
|
||||||
|
border = Border(
|
||||||
|
left=copy.copy(source_cell.border.left) if source_cell.border.left else None,
|
||||||
|
right=copy.copy(source_cell.border.right) if source_cell.border.right else None,
|
||||||
|
top=copy.copy(source_cell.border.top) if source_cell.border.top else None,
|
||||||
|
bottom=copy.copy(source_cell.border.bottom) if source_cell.border.bottom else None,
|
||||||
|
diagonal=copy.copy(source_cell.border.diagonal) if source_cell.border.diagonal else None,
|
||||||
|
diagonal_direction=source_cell.border.diagonal_direction,
|
||||||
|
outline=source_cell.border.outline,
|
||||||
|
vertical=source_cell.border.vertical,
|
||||||
|
horizontal=source_cell.border.horizontal
|
||||||
|
)
|
||||||
|
target_cell.border = border
|
||||||
|
|
||||||
|
# Alignment
|
||||||
|
if source_cell.alignment:
|
||||||
|
alignment = Alignment(
|
||||||
|
horizontal=source_cell.alignment.horizontal,
|
||||||
|
vertical=source_cell.alignment.vertical,
|
||||||
|
textRotation=source_cell.alignment.textRotation,
|
||||||
|
wrapText=source_cell.alignment.wrapText,
|
||||||
|
shrinkToFit=source_cell.alignment.shrinkToFit,
|
||||||
|
indent=source_cell.alignment.indent,
|
||||||
|
relativeIndent=source_cell.alignment.relativeIndent,
|
||||||
|
justifyLastLine=source_cell.alignment.justifyLastLine,
|
||||||
|
readingOrder=source_cell.alignment.readingOrder
|
||||||
|
)
|
||||||
|
target_cell.alignment = alignment
|
||||||
|
|
||||||
|
# Number Format
|
||||||
|
if source_cell.number_format:
|
||||||
|
target_cell.number_format = source_cell.number_format
|
||||||
|
|
||||||
|
# Protection
|
||||||
|
if source_cell.protection:
|
||||||
|
protection = Protection(
|
||||||
|
locked=source_cell.protection.locked,
|
||||||
|
hidden=source_cell.protection.hidden
|
||||||
|
)
|
||||||
|
target_cell.protection = protection
|
||||||
|
|
||||||
|
# Hyperlink
|
||||||
|
if source_cell.hyperlink:
|
||||||
|
target_cell.hyperlink = copy.copy(source_cell.hyperlink)
|
||||||
|
|
||||||
|
def copy_sheet_formatting(source_sheet, target_sheet):
|
||||||
|
"""Copy all formatting aspects of a sheet"""
|
||||||
|
|
||||||
|
# Copy column dimensions
|
||||||
for col_idx in range(1, source_sheet.max_column + 1):
|
for col_idx in range(1, source_sheet.max_column + 1):
|
||||||
col_letter = get_column_letter(col_idx)
|
col_letter = get_column_letter(col_idx)
|
||||||
if col_letter in source_sheet.column_dimensions:
|
if col_letter in source_sheet.column_dimensions:
|
||||||
source_dim = source_sheet.column_dimensions[col_letter]
|
source_dim = source_sheet.column_dimensions[col_letter]
|
||||||
target_dim = target_sheet.column_dimensions[col_letter]
|
target_sheet.column_dimensions[col_letter].width = source_dim.width
|
||||||
|
target_sheet.column_dimensions[col_letter].hidden = source_dim.hidden
|
||||||
# Copy all available attributes
|
|
||||||
if hasattr(source_dim, 'width') and source_dim.width:
|
|
||||||
target_dim.width = source_dim.width
|
|
||||||
if hasattr(source_dim, 'hidden'):
|
|
||||||
target_dim.hidden = source_dim.hidden
|
|
||||||
if hasattr(source_dim, 'outlineLevel'):
|
|
||||||
target_dim.outlineLevel = source_dim.outlineLevel
|
|
||||||
|
|
||||||
# Copy row dimensions for all rows
|
# Copy row dimensions
|
||||||
for row_idx in range(1, source_sheet.max_row + 1):
|
for row_idx in range(1, source_sheet.max_row + 1):
|
||||||
if row_idx in source_sheet.row_dimensions:
|
if row_idx in source_sheet.row_dimensions:
|
||||||
source_dim = source_sheet.row_dimensions[row_idx]
|
source_row = source_sheet.row_dimensions[row_idx]
|
||||||
target_dim = target_sheet.row_dimensions[row_idx]
|
target_sheet.row_dimensions[row_idx].height = source_row.height
|
||||||
|
|
||||||
|
async def process_table_xml_safely(zip_path, target_language, translator, translated_cache):
|
||||||
|
"""Process table XML files in Excel to translate headers with proper ZIP handling"""
|
||||||
|
# Create temp directory
|
||||||
|
temp_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Extract all files
|
||||||
|
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
||||||
|
zip_ref.extractall(temp_dir)
|
||||||
|
|
||||||
|
# Find all table XML files
|
||||||
|
table_files = []
|
||||||
|
for root, dirs, files in os.walk(os.path.join(temp_dir, 'xl', 'tables')):
|
||||||
|
for file in files:
|
||||||
|
if file.endswith('.xml'):
|
||||||
|
table_files.append(os.path.join(root, file))
|
||||||
|
|
||||||
|
if not table_files:
|
||||||
|
logging.info("No table XML files found in the Excel file")
|
||||||
|
return
|
||||||
|
|
||||||
|
logging.info(f"Found {len(table_files)} table XML files to process")
|
||||||
|
|
||||||
|
# Process each table XML
|
||||||
|
for table_file in table_files:
|
||||||
|
logging.info(f"Processing table XML: {os.path.basename(table_file)}")
|
||||||
|
|
||||||
if hasattr(source_dim, 'height') and source_dim.height:
|
# Parse XML
|
||||||
target_dim.height = source_dim.height
|
tree = ET.parse(table_file)
|
||||||
if hasattr(source_dim, 'hidden'):
|
root = tree.getroot()
|
||||||
target_dim.hidden = source_dim.hidden
|
|
||||||
if hasattr(source_dim, 'outlineLevel'):
|
# Find namespace
|
||||||
target_dim.outlineLevel = source_dim.outlineLevel
|
ns = root.tag.split('}')[0].strip('{') if '}' in root.tag else ''
|
||||||
|
|
||||||
|
# Find and translate table name/displayName if present
|
||||||
|
for attr in ['displayName', 'name']:
|
||||||
|
if attr in root.attrib:
|
||||||
|
original_text = root.attrib[attr]
|
||||||
|
if original_text in translated_cache:
|
||||||
|
root.attrib[attr] = translated_cache[original_text]
|
||||||
|
else:
|
||||||
|
translated_text = await translate_text(translator, original_text, target_language)
|
||||||
|
root.attrib[attr] = translated_text
|
||||||
|
translated_cache[original_text] = translated_text
|
||||||
|
|
||||||
|
# Find table columns and translate headers
|
||||||
|
ns_prefix = '{' + ns + '}' if ns else ''
|
||||||
|
columns_tag = f"{ns_prefix}tableColumns" if ns else "tableColumns"
|
||||||
|
column_tag = f"{ns_prefix}tableColumn" if ns else "tableColumn"
|
||||||
|
|
||||||
|
columns_element = root.find(f".//{columns_tag}")
|
||||||
|
if columns_element is not None:
|
||||||
|
for column in columns_element.findall(f".//{column_tag}"):
|
||||||
|
if 'name' in column.attrib:
|
||||||
|
header_text = column.attrib['name']
|
||||||
|
if header_text in translated_cache:
|
||||||
|
column.attrib['name'] = translated_cache[header_text]
|
||||||
|
else:
|
||||||
|
translated_header = await translate_text(translator, header_text, target_language)
|
||||||
|
column.attrib['name'] = translated_header
|
||||||
|
translated_cache[header_text] = translated_header
|
||||||
|
|
||||||
|
# Save the changes
|
||||||
|
tree.write(table_file, encoding='UTF-8', xml_declaration=True)
|
||||||
|
|
||||||
|
# Create a new zip file
|
||||||
|
new_zip_path = zip_path + '.new'
|
||||||
|
with zipfile.ZipFile(new_zip_path, 'w') as new_zip:
|
||||||
|
for folder_path, subfolders, files in os.walk(temp_dir):
|
||||||
|
for file in files:
|
||||||
|
absolute_path = os.path.join(folder_path, file)
|
||||||
|
relative_path = os.path.relpath(absolute_path, temp_dir)
|
||||||
|
new_zip.write(absolute_path, relative_path)
|
||||||
|
|
||||||
|
# Replace the old zip with the new one
|
||||||
|
shutil.move(new_zip_path, zip_path)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
async def translate_excel(file_path: str, target_language: str):
|
async def translate_excel(file_path: str, target_language: str):
|
||||||
|
"""Translate Excel file while preserving all formatting including tables"""
|
||||||
|
# Verify file exists
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
logging.error(f"File not found: {file_path}")
|
||||||
|
raise FileNotFoundError(f"The file {file_path} does not exist.")
|
||||||
|
|
||||||
|
# Create a copy of the original file to work with
|
||||||
|
base_name = os.path.splitext(file_path)[0]
|
||||||
|
translated_file_path = f"{base_name}_translated_{target_language}.xlsx"
|
||||||
|
|
||||||
|
logging.info(f"Creating a copy of the original file...")
|
||||||
|
shutil.copy2(file_path, translated_file_path)
|
||||||
|
|
||||||
|
# Open the copied file and modify it in-place
|
||||||
|
workbook = load_workbook(translated_file_path)
|
||||||
translator = Translator()
|
translator = Translator()
|
||||||
workbook = load_workbook(file_path)
|
|
||||||
translated_workbook = Workbook()
|
# Track unique values to minimize API calls
|
||||||
|
translated_cache = {}
|
||||||
|
|
||||||
# Count total cells for progress bar
|
# Count total cells for progress bar
|
||||||
total_cells = sum(
|
total_cells = sum(
|
||||||
sum(1 for _ in sheet.iter_rows())
|
sheet.max_row * sheet.max_column
|
||||||
for sheet in workbook.worksheets
|
for sheet in workbook.worksheets
|
||||||
)
|
)
|
||||||
|
|
||||||
progress_bar = tqdm(total=total_cells, desc="Translating cells")
|
with tqdm(total=total_cells, desc=f"Translating to {target_language}") as progress_bar:
|
||||||
|
# Process each sheet
|
||||||
for sheet_name in workbook.sheetnames:
|
for sheet in workbook.worksheets:
|
||||||
original_sheet = workbook[sheet_name]
|
logging.info(f"Processing sheet: {sheet.title} ({sheet.max_row} rows × {sheet.max_column} columns)")
|
||||||
translated_sheet = translated_workbook.create_sheet(title=sheet_name)
|
|
||||||
|
# Process cells row by row, column by column
|
||||||
print(f"Processing sheet: {sheet_name} with {original_sheet.max_column} columns")
|
for row in range(1, sheet.max_row + 1):
|
||||||
|
for col in range(1, sheet.max_column + 1):
|
||||||
# Copy sheet properties using the improved function
|
cell = sheet.cell(row=row, column=col)
|
||||||
copy_sheet_properties(original_sheet, translated_sheet)
|
progress_bar.update(1)
|
||||||
|
|
||||||
# Copy merged cells
|
# Check if cell should be translated
|
||||||
for merged_cell_range in original_sheet.merged_cells:
|
if should_translate(cell):
|
||||||
translated_sheet.merge_cells(str(merged_cell_range))
|
original_text = str(cell.value)
|
||||||
|
|
||||||
# Process each cell
|
# Use cached translation if available
|
||||||
for row in original_sheet.iter_rows():
|
if original_text in translated_cache:
|
||||||
for cell in row:
|
cell.value = translated_cache[original_text]
|
||||||
progress_bar.update(1)
|
else:
|
||||||
col_idx = cell.column
|
translated_text = await translate_text(translator, original_text, target_language)
|
||||||
row_idx = cell.row
|
cell.value = translated_text
|
||||||
|
translated_cache[original_text] = translated_text
|
||||||
# Create the cell at the same position in the new sheet
|
|
||||||
if cell.value is not None: # Use is not None to include 0 values
|
# Save the translated workbook
|
||||||
if is_formula(cell.value):
|
workbook.save(translated_file_path)
|
||||||
# Don't translate formulas
|
logging.info(f"Basic cell translation complete")
|
||||||
translated_cell = translated_sheet.cell(row=row_idx, column=col_idx, value=cell.value)
|
|
||||||
else:
|
# Process table XML files separately to fix table headers
|
||||||
translated_text = await translate_text(translator, str(cell.value), target_language)
|
logging.info("Processing table structures...")
|
||||||
translated_cell = translated_sheet.cell(row=row_idx, column=col_idx, value=translated_text)
|
await process_table_xml_safely(translated_file_path, target_language, translator, translated_cache)
|
||||||
else:
|
|
||||||
translated_cell = translated_sheet.cell(row=row_idx, column=col_idx)
|
logging.info(f"Translation complete! File saved as: {translated_file_path}")
|
||||||
|
return translated_file_path
|
||||||
# Copy formatting
|
|
||||||
copy_cell_format(cell, translated_cell)
|
|
||||||
|
|
||||||
# Remove the default sheet created by Workbook
|
|
||||||
if "Sheet" in translated_workbook.sheetnames:
|
|
||||||
del translated_workbook["Sheet"]
|
|
||||||
|
|
||||||
translated_file_path = os.path.splitext(file_path)[0] + f"_translated_{target_language}.xlsx"
|
|
||||||
translated_workbook.save(translated_file_path)
|
|
||||||
progress_bar.close()
|
|
||||||
print(f"Translated file saved as: {translated_file_path}")
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
input_file = r"F:\Dev\excel-translator\data\sample\test_sample.xlsx"
|
input_file = r"F:\Dev\excel-translator\data\sample\test_sample.xlsx"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user