From dde80f6bc3f67fd88ef14f0ac19bbeff736b2e4a Mon Sep 17 00:00:00 2001 From: sepehr Date: Sun, 28 Jun 2026 10:46:48 +0200 Subject: [PATCH] feat(glossaries): update script to translate missing terms in any glossary, even if target_language is already 'multi' --- ...ossaries_to_multilingual_by_translation.py | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/scripts/migrate_glossaries_to_multilingual_by_translation.py b/scripts/migrate_glossaries_to_multilingual_by_translation.py index 0d4a0d5..2673ed5 100644 --- a/scripts/migrate_glossaries_to_multilingual_by_translation.py +++ b/scripts/migrate_glossaries_to_multilingual_by_translation.py @@ -36,39 +36,41 @@ def translate_term(adapter, text_to_translate: str, src_lang: str, target_langs: return translations def main(): - logger.info("Connecting to database and fetching non-multilingual glossaries...") + logger.info("Connecting to database and fetching all glossaries...") adapter = get_legacy_google_adapter() with get_sync_session() as session: - # Find glossaries where target_language != 'multi' - non_multi_glossaries = session.query(Glossary).filter(Glossary.target_language != 'multi').all() + # Find all glossaries + all_glossaries = session.query(Glossary).all() - if not non_multi_glossaries: - logger.info("All glossaries in database are already multilingual ('multi').") + if not all_glossaries: + logger.info("No glossaries found in database.") return 0 - logger.info(f"Found {len(non_multi_glossaries)} non-multilingual glossaries to convert.") + logger.info(f"Found {len(all_glossaries)} glossaries to process.") - for glossary in non_multi_glossaries: + for glossary in all_glossaries: logger.info(f"Processing glossary '{glossary.name}' (ID: {glossary.id})") - # Change target_language to 'multi' - glossary.target_language = 'multi' - - # Rename the glossary to indicate it's now multilingual (e.g. replace "Anglais" with "Multilingue") - if "Anglais" in glossary.name: - glossary.name = glossary.name.replace("Anglais", "Multilingue") - elif "English" in glossary.name: - glossary.name = glossary.name.replace("English", "Multilingue") - else: - glossary.name = f"{glossary.name} → Multilingue" + # Change target_language to 'multi' if not already + if glossary.target_language != 'multi': + glossary.target_language = 'multi' + + # Rename the glossary to indicate it's now multilingual (e.g. replace "Anglais" with "Multilingue") + if "Anglais" in glossary.name: + glossary.name = glossary.name.replace("Anglais", "Multilingue") + elif "English" in glossary.name: + glossary.name = glossary.name.replace("English", "Multilingue") + else: + glossary.name = f"{glossary.name} → Multilingue" src_lang = glossary.source_language or "fr" terms = session.query(GlossaryTerm).filter(GlossaryTerm.glossary_id == glossary.id).all() - logger.info(f"Translating {len(terms)} terms for '{glossary.name}'...") + logger.info(f"Checking translations for {len(terms)} terms in '{glossary.name}'...") + updated_terms_count = 0 for idx, term in enumerate(terms): translations = term.translations or {} @@ -80,6 +82,7 @@ def main(): logger.info(f" [{idx+1}/{len(terms)}] Translating '{term.source}' to {missing_langs}...") new_translations = translate_term(adapter, term.source, src_lang, missing_langs) translations.update(new_translations) + updated_terms_count += 1 # Ensure the original default target (e.g. English translation) is in the translations dict under 'en' if 'en' not in translations or not translations['en']: @@ -87,8 +90,11 @@ def main(): term.translations = translations - session.commit() - logger.info(f"Glossary '{glossary.name}' successfully converted to multilingual.") + if updated_terms_count > 0: + session.commit() + logger.info(f"Glossary '{glossary.name}' updated with {updated_terms_count} translated terms.") + else: + logger.info(f"No translations were missing for glossary '{glossary.name}'.") logger.info("Migration complete!") return 0