From 8dd7b5ee6993cd31c3c10b47453818dcee044e1a Mon Sep 17 00:00:00 2001
From: sepehr <sepehr1151@gmail.com>
Date: Mon, 7 Apr 2025 14:30:27 +0200
Subject: [PATCH] correct the code, and the possibility to choose translator
 model

---
 main.py | 104 +++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 81 insertions(+), 23 deletions(-)

diff --git a/main.py b/main.py
index d10c0b4..39d2bd1 100644
--- a/main.py
+++ b/main.py
@@ -16,15 +16,15 @@ from pydantic_ai import Agent
 from pydantic_ai.models.openai import OpenAIModel
 from pydantic_ai.providers.openai import OpenAIProvider
 from pydantic import BaseModel, Field
-from typing import Optional
+from typing import Optional, Literal
 # Configure logging
 logging.basicConfig(level=None, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
-
 class TranslationOutput(BaseModel):
     translated_text: str
-async def translate_with_ollama(text: str, target_language: str, model_name: str = "llama3.1:latest") -> str:
+
+async def translate_with_ollama(text: str, target_language: str, model_name: str = "llama3.1:8b") -> str:
     """Translate text using Ollama via PydanticAI
     
     Args:
@@ -42,7 +42,7 @@ async def translate_with_ollama(text: str, target_language: str, model_name: str
     try:
         # Create agent with Ollama
         ollama_model = OpenAIModel(
-            model_name=model_name, provider=OpenAIProvider(base_url='http://localhost:11434/v1')
+            model_name=model_name, provider=OpenAIProvider(base_url='http://172.30.124.125:11434/v1')
         )
         agent = Agent(ollama_model, result_type=TranslationOutput,retries=2)
         # Create the input
@@ -50,7 +50,7 @@ async def translate_with_ollama(text: str, target_language: str, model_name: str
         
         # Execute translation
         result = await agent.run(
-        f"Translate this text to this language: {text} to {target_language}. Only return the translation, nothing else. NO ADDITIONAL TEXT.AND AVIOID Text to be translated,ADDITIONAL REMARKS,TRY TO understand the contexte for the translation, and avoid literal translation. "
+        f"Translate this text to this language: {text} to {target_language}. Only return the translation, nothing else. NO ADDITIONAL TEXT.AND AVIOID Text to be translated,ADDITIONAL REMARKS,TRY TO understand the contexte for the translation, and avoid literal translation. DON'T TrANSLATE NAMES AND SURNAMES and copy just the name for output, "
         )
         # print(text)
         # print(result.data.translated_text)
@@ -60,15 +60,41 @@ async def translate_with_ollama(text: str, target_language: str, model_name: str
         logging.error(f"Translation error with Ollama: {e}")
         return text  # Return original text on error
     
-async def translate_text(translator, text, target_language):
-    """Translate text to target language"""
+async def translate_text_google(translator, text, target_language):
+    """Translate text to target language using Google Translate"""
     try:
         translation = await translator.translate(text, dest=target_language)
         return translation.text
     except Exception as e:
-        logging.error(f"Translation error: {e}")
+        logging.error(f"Translation error with Google Translate: {e}")
         return text  # Return original if translation fails
 
+async def translate_text(text, target_language, translation_method, translator=None, model_name="llama3.1:8b"):
+    """Unified translation function that uses either Google Translate or LLM
+    
+    Args:
+        text: Text to translate
+        target_language: Target language code
+        translation_method: 'google' or 'llm'
+        translator: Google translator instance (required if method is 'google')
+        model_name: LLM model name (used if method is 'llm')
+        
+    Returns:
+        Translated text
+    """
+    if not text or (isinstance(text, str) and text.strip() == ""):
+        return text
+        
+    if translation_method == "google":
+        if translator is None:
+            raise ValueError("Translator instance is required when using Google Translate")
+        return await translate_text_google(translator, text, target_language)
+    elif translation_method == "llm":
+        return await translate_with_ollama(text, target_language, model_name)
+    else:
+        logging.error(f"Unknown translation method: {translation_method}")
+        return text
+
 def is_formula(text):
     """Check if cell value is a formula"""
     if isinstance(text, str):
@@ -181,7 +207,7 @@ def copy_sheet_formatting(source_sheet, target_sheet):
             source_row = source_sheet.row_dimensions[row_idx]
             target_sheet.row_dimensions[row_idx].height = source_row.height
 
-async def process_table_xml_safely(zip_path, target_language, translator, translated_cache):
+async def process_table_xml_safely(zip_path, target_language, translator, translated_cache, translation_method, model_name):
     """Process table XML files in Excel to translate headers with proper ZIP handling"""
     # Create temp directory
     temp_dir = tempfile.mkdtemp()
@@ -222,7 +248,8 @@ async def process_table_xml_safely(zip_path, target_language, translator, transl
                     if original_text in translated_cache:
                         root.attrib[attr] = translated_cache[original_text]
                     else:
-                        translated_text = await translate_text(translator, original_text, target_language)
+                        translated_text = await translate_text(original_text, target_language, 
+                                                             translation_method, translator, model_name)
                         root.attrib[attr] = translated_text
                         translated_cache[original_text] = translated_text
             
@@ -239,8 +266,8 @@ async def process_table_xml_safely(zip_path, target_language, translator, transl
                         if header_text in translated_cache:
                             column.attrib['name'] = translated_cache[header_text]
                         else:
-                            # translated_header = await translate_text(translator, header_text, target_language)
-                            translated_header = await translate_with_ollama(header_text, target_language)
+                            translated_header = await translate_text(header_text, target_language,
+                                                                  translation_method, translator, model_name)
                             column.attrib['name'] = translated_header
                             translated_cache[header_text] = translated_header
             
@@ -263,23 +290,39 @@ async def process_table_xml_safely(zip_path, target_language, translator, transl
         # Clean up
         shutil.rmtree(temp_dir)
 
-async def translate_excel(file_path: str, target_language: str):
-    """Translate Excel file while preserving all formatting including tables"""
+async def translate_excel(file_path: str, target_language: str, translation_method: str = "google", 
+                         llm_model: str = "llama3.1:8b"):
+    """Translate Excel file while preserving all formatting including tables
+    
+    Args:
+        file_path: Path to Excel file
+        target_language: Target language code (e.g. 'fr', 'en')
+        translation_method: 'google' or 'llm'
+        llm_model: LLM model name (used if method is 'llm')
+    """
     # Verify file exists
     if not os.path.exists(file_path):
         logging.error(f"File not found: {file_path}")
         raise FileNotFoundError(f"The file {file_path} does not exist.")
+    
+    # Validate translation method
+    if translation_method not in ["google", "llm"]:
+        logging.error(f"Invalid translation method: {translation_method}")
+        raise ValueError("Translation method must be 'google' or 'llm'")
         
     # Create a copy of the original file to work with
     base_name = os.path.splitext(file_path)[0]
-    translated_file_path = f"{base_name}_translated_{target_language}.xlsx"
+    method_suffix = "gt" if translation_method == "google" else "llm"
+    translated_file_path = f"{base_name}_translated_{target_language}_{method_suffix}.xlsx"
     
     logging.info(f"Creating a copy of the original file...")
     shutil.copy2(file_path, translated_file_path)
     
     # Open the copied file and modify it in-place
     workbook = load_workbook(translated_file_path)
-    translator = Translator()
+    
+    # Initialize translator if using Google
+    translator = Translator() if translation_method == "google" else None
     
     # Track unique values to minimize API calls
     translated_cache = {}
@@ -290,7 +333,7 @@ async def translate_excel(file_path: str, target_language: str):
         for sheet in workbook.worksheets
     )
     
-    with tqdm(total=total_cells, desc=f"Translating to {target_language}") as progress_bar:
+    with tqdm(total=total_cells, desc=f"Translating to {target_language} using {translation_method}") as progress_bar:
         # Process each sheet
         for sheet in workbook.worksheets:
             logging.info(f"Processing sheet: {sheet.title} ({sheet.max_row} rows × {sheet.max_column} columns)")
@@ -309,8 +352,13 @@ async def translate_excel(file_path: str, target_language: str):
                         if original_text in translated_cache:
                             cell.value = translated_cache[original_text]
                         else:
-                            # translated_text = await translate_text(translator, original_text, target_language)
-                            translated_text = await translate_with_ollama(original_text, target_language)
+                            translated_text = await translate_text(
+                                original_text, 
+                                target_language, 
+                                translation_method, 
+                                translator,
+                                llm_model
+                            )
                             cell.value = translated_text
                             translated_cache[original_text] = translated_text
     
@@ -320,15 +368,25 @@ async def translate_excel(file_path: str, target_language: str):
     
     # Process table XML files separately to fix table headers
     logging.info("Processing table structures...")
-    await process_table_xml_safely(translated_file_path, target_language, translator, translated_cache)
+    await process_table_xml_safely(translated_file_path, target_language, 
+                                  translator, translated_cache, 
+                                  translation_method, llm_model)
     
     logging.info(f"Translation complete! File saved as: {translated_file_path}")
     return translated_file_path
 
 async def main():
-    input_file = r"F:\Dev\excel-translator\data\sample\test_sample.xlsx"
-    language = "fr"  # French
-    await translate_excel(input_file, language)
+    input_file = r"C:\Users\serameza\host-data\Excels\BOLT eCAT System Modeling Q&A.xlsx"
+    language = "en"  # French
+    
+    # Choose translation method: "google" or "llm"
+    translation_method = "llm"  # Change to "llm" to use the LLM instead
+    
+    # LLM model name (only used if translation_method is "llm")
+    # llm_model = "llama3.1:8b"
+    llm_model = "qwen2.5:14b"
+    
+    await translate_excel(input_file, language, translation_method, llm_model)
 
 if __name__ == "__main__":
     asyncio.run(main())
\ No newline at end of file