#!/usr/bin/env python3 """ Migrate and translate all non-multilingual glossaries in the database to multilingual glossaries. It uses the free Google Translate provider to fill in the translations for target languages (de, es, it, pt, nl, ru, ja, ko, zh, ar, fa, en) in the glossary_terms table. """ import sys import logging from pathlib import Path ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT)) from database.connection import get_sync_session from database.models import Glossary, GlossaryTerm from services.providers.google_provider import get_legacy_google_adapter logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", ) logger = logging.getLogger("translate_non_multilingual_glossaries") TARGET_LANGUAGES = ["de", "es", "it", "pt", "nl", "ru", "ja", "ko", "zh", "ar", "fa"] def translate_term(adapter, text_to_translate: str, src_lang: str, target_langs: list[str]) -> dict: translations = {} for lang in target_langs: try: translated = adapter.translate(text_to_translate, target_language=lang, source_language=src_lang) translations[lang] = translated.strip() except Exception as e: logger.error(f"Failed to translate '{text_to_translate}' to {lang}: {e}") translations[lang] = "" return translations def main(): logger.info("Connecting to database and fetching all glossaries...") adapter = get_legacy_google_adapter() with get_sync_session() as session: # Find all glossaries all_glossaries = session.query(Glossary).all() if not all_glossaries: logger.info("No glossaries found in database.") return 0 logger.info(f"Found {len(all_glossaries)} glossaries to process.") for glossary in all_glossaries: logger.info(f"Processing glossary '{glossary.name}' (ID: {glossary.id})") # Change target_language to 'multi' if not already if glossary.target_language != 'multi': glossary.target_language = 'multi' # Rename the glossary to indicate it's now multilingual (e.g. replace "Anglais" with "Multilingue") if "Anglais" in glossary.name: glossary.name = glossary.name.replace("Anglais", "Multilingue") elif "English" in glossary.name: glossary.name = glossary.name.replace("English", "Multilingue") else: glossary.name = f"{glossary.name} → Multilingue" src_lang = glossary.source_language or "fr" terms = session.query(GlossaryTerm).filter(GlossaryTerm.glossary_id == glossary.id).all() logger.info(f"Checking translations for {len(terms)} terms in '{glossary.name}'...") updated_terms_count = 0 for idx, term in enumerate(terms): translations = term.translations or {} # Check if we already have translations for the target languages missing_langs = [lang for lang in TARGET_LANGUAGES if lang not in translations or not translations[lang]] if missing_langs: # Translate logger.info(f" [{idx+1}/{len(terms)}] Translating '{term.source}' to {missing_langs}...") new_translations = translate_term(adapter, term.source, src_lang, missing_langs) translations.update(new_translations) updated_terms_count += 1 # Ensure the original default target (e.g. English translation) is in the translations dict under 'en' if 'en' not in translations or not translations['en']: translations['en'] = term.target term.translations = translations if updated_terms_count > 0: session.commit() logger.info(f"Glossary '{glossary.name}' updated with {updated_terms_count} translated terms.") else: logger.info(f"No translations were missing for glossary '{glossary.name}'.") logger.info("Migration complete!") return 0 if __name__ == '__main__': sys.exit(main())