From 64e54819aa45e0d38b74856d6f3d5fb8cdf41f21 Mon Sep 17 00:00:00 2001 From: sepehr Date: Sun, 6 Apr 2025 21:39:00 +0200 Subject: [PATCH] first commit --- __init__.py | 1 + main.py | 102 ++++++++++++++++++++++++++++++++++++++ models/__init__.py | 1 + models/excel_models.py | 13 +++++ services/__init__.py | 1 + services/excel_service.py | 59 ++++++++++++++++++++++ services/translator.py | 49 ++++++++++++++++++ utils/__init__.py | 1 + utils/helpers.py | 31 ++++++++++++ 9 files changed, 258 insertions(+) create mode 100644 __init__.py create mode 100644 main.py create mode 100644 models/__init__.py create mode 100644 models/excel_models.py create mode 100644 services/__init__.py create mode 100644 services/excel_service.py create mode 100644 services/translator.py create mode 100644 utils/__init__.py create mode 100644 utils/helpers.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..82789f2 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +# This file is intentionally left blank. \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..ecd1323 --- /dev/null +++ b/main.py @@ -0,0 +1,102 @@ +from openpyxl import load_workbook, Workbook +import asyncio +from googletrans import Translator +import os +from tqdm import tqdm +import copy +import re + +async def translate_text(translator, text, target_language): + try: + translation = await translator.translate(text, dest=target_language) + return translation.text + except Exception as e: + print(f"Error translating '{text}': {e}") + return text # Return the original text if translation fails + +def is_formula(text): + """Check if a cell value is a formula""" + if isinstance(text, str): + return text.startswith('=') + return False + +def copy_cell_format(source_cell, target_cell): + """Copy formatting from source cell to target cell without copying the problematic style index""" + if source_cell.has_style: + try: + # Copy individual style attributes instead of the entire style object + target_cell.font = copy.copy(source_cell.font) + target_cell.border = copy.copy(source_cell.border) + target_cell.fill = copy.copy(source_cell.fill) + target_cell.number_format = source_cell.number_format + target_cell.protection = copy.copy(source_cell.protection) + target_cell.alignment = copy.copy(source_cell.alignment) + # Copy any hyperlink + if source_cell.hyperlink: + target_cell.hyperlink = source_cell.hyperlink + except Exception as e: + print(f"Error copying format: {e}") + +async def translate_excel(file_path: str, target_language: str): + translator = Translator() + workbook = load_workbook(file_path) + translated_workbook = Workbook() + + # Count total cells for progress bar + total_cells = sum( + sum(1 for _ in sheet.iter_rows()) + for sheet in workbook.worksheets + ) + + progress_bar = tqdm(total=total_cells, desc="Translating cells") + + for sheet_name in workbook.sheetnames: + original_sheet = workbook[sheet_name] + translated_sheet = translated_workbook.create_sheet(title=sheet_name) + + # Copy sheet properties (column dimensions, etc.) + for key, dimension in original_sheet.column_dimensions.items(): + if hasattr(dimension, 'width') and dimension.width: + translated_sheet.column_dimensions[key].width = dimension.width + + for key, dimension in original_sheet.row_dimensions.items(): + if hasattr(dimension, 'height') and dimension.height: + translated_sheet.row_dimensions[key].height = dimension.height + + # Copy merged cells + for merged_cell_range in original_sheet.merged_cells: + translated_sheet.merge_cells(str(merged_cell_range)) + + for row in original_sheet.iter_rows(): + for cell in row: + progress_bar.update(1) + # Create the cell at the same position in the new sheet + if cell.value: + if is_formula(cell.value): + # Don't translate formulas + translated_cell = translated_sheet.cell(row=cell.row, column=cell.column, value=cell.value) + else: + translated_text = await translate_text(translator, str(cell.value), target_language) + translated_cell = translated_sheet.cell(row=cell.row, column=cell.column, value=translated_text) + else: + translated_cell = translated_sheet.cell(row=cell.row, column=cell.column) + + # Copy formatting + copy_cell_format(cell, translated_cell) + + # Remove the default sheet created by Workbook + if "Sheet" in translated_workbook.sheetnames: + del translated_workbook["Sheet"] + + translated_file_path = os.path.splitext(file_path)[0] + f"_translated_{target_language}.xlsx" + translated_workbook.save(translated_file_path) + progress_bar.close() + print(f"Translated file saved as: {translated_file_path}") + +async def main(): + input_file = r"F:\Dev\excel-translator\data\sample\test_sample.xlsx" + language = "fr" # French + await translate_excel(input_file, language) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..82789f2 --- /dev/null +++ b/models/__init__.py @@ -0,0 +1 @@ +# This file is intentionally left blank. \ No newline at end of file diff --git a/models/excel_models.py b/models/excel_models.py new file mode 100644 index 0000000..00aa880 --- /dev/null +++ b/models/excel_models.py @@ -0,0 +1,13 @@ +from pydantic import BaseModel +from typing import List, Dict, Any + +class Cell(BaseModel): + value: str + format: Dict[str, Any] + +class Sheet(BaseModel): + name: str + cells: List[List[Cell]] + +class ExcelFile(BaseModel): + sheets: List[Sheet] \ No newline at end of file diff --git a/services/__init__.py b/services/__init__.py new file mode 100644 index 0000000..82789f2 --- /dev/null +++ b/services/__init__.py @@ -0,0 +1 @@ +# This file is intentionally left blank. \ No newline at end of file diff --git a/services/excel_service.py b/services/excel_service.py new file mode 100644 index 0000000..f9fa694 --- /dev/null +++ b/services/excel_service.py @@ -0,0 +1,59 @@ +from openpyxl import load_workbook, Workbook +from typing import List +from pydantic import BaseModel + +class CellData(BaseModel): + value: str + font: dict + fill: dict + border: dict + alignment: dict + +class SheetData(BaseModel): + name: str + cells: List[List[CellData]] + +def read_excel(file_path: str) -> List[SheetData]: + workbook = load_workbook(file_path) + sheets_data = [] + + for sheet in workbook.sheetnames: + ws = workbook[sheet] + cells_data = [] + + for row in ws.iter_rows(): + row_data = [] + for cell in row: + cell_data = CellData( + value=cell.value, + font=cell.font.__dict__, + fill=cell.fill.__dict__, + border=cell.border.__dict__, + alignment=cell.alignment.__dict__ + ) + row_data.append(cell_data) + cells_data.append(row_data) + + sheets_data.append(SheetData(name=sheet, cells=cells_data)) + + return sheets_data + +def write_excel(file_path: str, sheets_data: List[SheetData]): + workbook = Workbook() + + for sheet_data in sheets_data: + ws = workbook.create_sheet(title=sheet_data.name) + + for row_index, row in enumerate(sheet_data.cells): + for col_index, cell_data in enumerate(row): + cell = ws.cell(row=row_index + 1, column=col_index + 1, value=cell_data.value) + cell.font = cell_data.font + cell.fill = cell_data.fill + cell.border = cell_data.border + cell.alignment = cell_data.alignment + + # Remove the default sheet created by Workbook + if "Sheet" in workbook.sheetnames: + del workbook["Sheet"] + + workbook.save(file_path) \ No newline at end of file diff --git a/services/translator.py b/services/translator.py new file mode 100644 index 0000000..5bba1f8 --- /dev/null +++ b/services/translator.py @@ -0,0 +1,49 @@ +from typing import List +import openpyxl +from googletrans import Translator +from pydantic import BaseModel + +class Cell(BaseModel): + value: str + row: int + column: int + +class Sheet(BaseModel): + name: str + cells: List[Cell] + +class ExcelTranslator: + def __init__(self, src_language: str, dest_language: str): + self.translator = Translator() + self.src_language = src_language + self.dest_language = dest_language + + def translate_text(self, text: str) -> str: + translated = self.translator.translate(text, src=self.src_language, dest=self.dest_language) + return translated.text + + def translate_sheet(self, sheet: Sheet) -> Sheet: + translated_cells = [] + for cell in sheet.cells: + translated_value = self.translate_text(cell.value) + translated_cells.append(Cell(value=translated_value, row=cell.row, column=cell.column)) + return Sheet(name=sheet.name, cells=translated_cells) + + def translate_workbook(self, file_path: str) -> None: + workbook = openpyxl.load_workbook(file_path) + translated_workbook = openpyxl.Workbook() + + for sheet_name in workbook.sheetnames: + sheet = workbook[sheet_name] + translated_sheet = translated_workbook.create_sheet(title=sheet_name) + + for row in sheet.iter_rows(): + for cell in row: + translated_value = self.translate_text(cell.value) if cell.value else '' + translated_sheet[cell.coordinate].value = translated_value + # Preserve cell formatting + if cell.has_style: + translated_sheet[cell.coordinate]._style = cell._style + + translated_file_path = file_path.replace('.xlsx', '_translated.xlsx') + translated_workbook.save(translated_file_path) \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..82789f2 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1 @@ +# This file is intentionally left blank. \ No newline at end of file diff --git a/utils/helpers.py b/utils/helpers.py new file mode 100644 index 0000000..37d7941 --- /dev/null +++ b/utils/helpers.py @@ -0,0 +1,31 @@ +from typing import Any, Dict +import os +import logging + +def setup_logging() -> None: + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +def save_translated_file(file_path: str, data: Any) -> str: + base, ext = os.path.splitext(file_path) + translated_file_path = f"{base}_translated{ext}" + + # Assuming data is a DataFrame or similar structure + data.to_excel(translated_file_path, index=False) + + logging.info(f"Translated file saved as: {translated_file_path}") + return translated_file_path + +def read_excel_file(file_path: str) -> Dict[str, Any]: + import pandas as pd + + logging.info(f"Reading Excel file: {file_path}") + return pd.read_excel(file_path, sheet_name=None) + +def translate_text(text: str, target_language: str) -> str: + # Placeholder for translation logic + logging.info(f"Translating text: {text} to {target_language}") + return text # Replace with actual translation logic + +def handle_file_not_found(file_path: str) -> None: + logging.error(f"File not found: {file_path}") + raise FileNotFoundError(f"The file {file_path} does not exist.") \ No newline at end of file