139 lines
5.8 KiB
Python
139 lines
5.8 KiB
Python
from openpyxl import load_workbook, Workbook
|
|
import asyncio
|
|
from googletrans import Translator
|
|
import os
|
|
from tqdm import tqdm
|
|
import copy
|
|
import re
|
|
from openpyxl.utils import get_column_letter
|
|
|
|
async def translate_text(translator, text, target_language):
|
|
try:
|
|
translation = await translator.translate(text, dest=target_language)
|
|
return translation.text
|
|
except Exception as e:
|
|
print(f"Error translating '{text}': {e}")
|
|
return text # Return the original text if translation fails
|
|
|
|
def is_formula(text):
|
|
"""Check if a cell value is a formula"""
|
|
if isinstance(text, str):
|
|
return text.startswith('=')
|
|
return False
|
|
|
|
def copy_cell_format(source_cell, target_cell):
|
|
"""Copy formatting from source cell to target cell without copying the problematic style index"""
|
|
if source_cell.has_style:
|
|
try:
|
|
# Copy individual style attributes instead of the entire style object
|
|
if source_cell.font:
|
|
target_cell.font = copy.copy(source_cell.font)
|
|
if source_cell.border:
|
|
target_cell.border = copy.copy(source_cell.border)
|
|
if source_cell.fill:
|
|
target_cell.fill = copy.copy(source_cell.fill)
|
|
if source_cell.number_format:
|
|
target_cell.number_format = source_cell.number_format
|
|
if source_cell.protection:
|
|
target_cell.protection = copy.copy(source_cell.protection)
|
|
if source_cell.alignment:
|
|
target_cell.alignment = copy.copy(source_cell.alignment)
|
|
# Copy any hyperlink
|
|
if source_cell.hyperlink:
|
|
target_cell.hyperlink = source_cell.hyperlink
|
|
except Exception as e:
|
|
print(f"Error copying format for cell {source_cell.coordinate}: {e}")
|
|
|
|
def copy_sheet_properties(source_sheet, target_sheet):
|
|
"""Copy all sheet properties from source to target sheet"""
|
|
# Copy column dimensions for all columns
|
|
for col_idx in range(1, source_sheet.max_column + 1):
|
|
col_letter = get_column_letter(col_idx)
|
|
if col_letter in source_sheet.column_dimensions:
|
|
source_dim = source_sheet.column_dimensions[col_letter]
|
|
target_dim = target_sheet.column_dimensions[col_letter]
|
|
|
|
# Copy all available attributes
|
|
if hasattr(source_dim, 'width') and source_dim.width:
|
|
target_dim.width = source_dim.width
|
|
if hasattr(source_dim, 'hidden'):
|
|
target_dim.hidden = source_dim.hidden
|
|
if hasattr(source_dim, 'outlineLevel'):
|
|
target_dim.outlineLevel = source_dim.outlineLevel
|
|
|
|
# Copy row dimensions for all rows
|
|
for row_idx in range(1, source_sheet.max_row + 1):
|
|
if row_idx in source_sheet.row_dimensions:
|
|
source_dim = source_sheet.row_dimensions[row_idx]
|
|
target_dim = target_sheet.row_dimensions[row_idx]
|
|
|
|
if hasattr(source_dim, 'height') and source_dim.height:
|
|
target_dim.height = source_dim.height
|
|
if hasattr(source_dim, 'hidden'):
|
|
target_dim.hidden = source_dim.hidden
|
|
if hasattr(source_dim, 'outlineLevel'):
|
|
target_dim.outlineLevel = source_dim.outlineLevel
|
|
|
|
async def translate_excel(file_path: str, target_language: str):
|
|
translator = Translator()
|
|
workbook = load_workbook(file_path)
|
|
translated_workbook = Workbook()
|
|
|
|
# Count total cells for progress bar
|
|
total_cells = sum(
|
|
sum(1 for _ in sheet.iter_rows())
|
|
for sheet in workbook.worksheets
|
|
)
|
|
|
|
progress_bar = tqdm(total=total_cells, desc="Translating cells")
|
|
|
|
for sheet_name in workbook.sheetnames:
|
|
original_sheet = workbook[sheet_name]
|
|
translated_sheet = translated_workbook.create_sheet(title=sheet_name)
|
|
|
|
print(f"Processing sheet: {sheet_name} with {original_sheet.max_column} columns")
|
|
|
|
# Copy sheet properties using the improved function
|
|
copy_sheet_properties(original_sheet, translated_sheet)
|
|
|
|
# Copy merged cells
|
|
for merged_cell_range in original_sheet.merged_cells:
|
|
translated_sheet.merge_cells(str(merged_cell_range))
|
|
|
|
# Process each cell
|
|
for row in original_sheet.iter_rows():
|
|
for cell in row:
|
|
progress_bar.update(1)
|
|
col_idx = cell.column
|
|
row_idx = cell.row
|
|
|
|
# Create the cell at the same position in the new sheet
|
|
if cell.value is not None: # Use is not None to include 0 values
|
|
if is_formula(cell.value):
|
|
# Don't translate formulas
|
|
translated_cell = translated_sheet.cell(row=row_idx, column=col_idx, value=cell.value)
|
|
else:
|
|
translated_text = await translate_text(translator, str(cell.value), target_language)
|
|
translated_cell = translated_sheet.cell(row=row_idx, column=col_idx, value=translated_text)
|
|
else:
|
|
translated_cell = translated_sheet.cell(row=row_idx, column=col_idx)
|
|
|
|
# Copy formatting
|
|
copy_cell_format(cell, translated_cell)
|
|
|
|
# Remove the default sheet created by Workbook
|
|
if "Sheet" in translated_workbook.sheetnames:
|
|
del translated_workbook["Sheet"]
|
|
|
|
translated_file_path = os.path.splitext(file_path)[0] + f"_translated_{target_language}.xlsx"
|
|
translated_workbook.save(translated_file_path)
|
|
progress_bar.close()
|
|
print(f"Translated file saved as: {translated_file_path}")
|
|
|
|
async def main():
|
|
input_file = r"F:\Dev\excel-translator\data\sample\test_sample.xlsx"
|
|
language = "fr" # French
|
|
await translate_excel(input_file, language)
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main()) |