feat(glossaries): add migrate_glossaries_to_multilingual.py
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 5m10s
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 5m10s
Nouveau script dedie a la migration 'multilingue uniquement' : - Supprime tout glossaire dont le nom ne contient pas '-> Multilingue' - Backup JSON integral des glossaires + termes avant suppression - Mode --dry-run / confirmation interactive / --yes - Heuristique par nom uniquement (target_language peut etre 'multi' partout apres les migrations passees, donc non fiable) Utilise apres le dedup (user_id, name) pour finir la migration.
This commit is contained in:
266
scripts/migrate_glossaries_to_multilingual.py
Normal file
266
scripts/migrate_glossaries_to_multilingual.py
Normal file
@@ -0,0 +1,266 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Supprime tous les glossaires « Français → Anglais » (ou toute autre langue unique)
|
||||||
|
pour ne garder que les versions multilingues (« → Multilingue »).
|
||||||
|
|
||||||
|
⚠️ DESTRUCTIF. Par défaut, demande confirmation interactive.
|
||||||
|
Utiliser --yes pour les exécutions automatisées.
|
||||||
|
|
||||||
|
Le script :
|
||||||
|
1. Génère un backup JSON de TOUS les glossaires à supprimer (avec leurs termes)
|
||||||
|
2. Supprime les glossaires non-multilingues (et leurs termes via cascade)
|
||||||
|
3. Laisse intacts les glossaires dont le nom contient « → Multilingue »
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
# Dry-run (relecture) :
|
||||||
|
DATABASE_URL=... python scripts/migrate_glossaries_to_multilingual.py --dry-run
|
||||||
|
|
||||||
|
# Confirmation interactive :
|
||||||
|
DATABASE_URL=... python scripts/migrate_glossaries_to_multilingual.py
|
||||||
|
|
||||||
|
# Sans confirmation :
|
||||||
|
DATABASE_URL=... python scripts/migrate_glossaries_to_multilingual.py --yes
|
||||||
|
|
||||||
|
# Limiter à un utilisateur :
|
||||||
|
DATABASE_URL=... python scripts/migrate_glossaries_to_multilingual.py --user <user_id>
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
sys.path.insert(0, str(ROOT))
|
||||||
|
|
||||||
|
# In container, /app is the WORKDIR with the project root mounted.
|
||||||
|
if Path("/app").exists() and Path("/app/database").exists():
|
||||||
|
sys.path.insert(0, "/app")
|
||||||
|
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
from database.connection import get_sync_session
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||||
|
)
|
||||||
|
logger = logging.getLogger("migrate_to_multilingual")
|
||||||
|
|
||||||
|
BACKUP_DIR = Path("/app/backups") if Path("/app").exists() else ROOT / "backups"
|
||||||
|
|
||||||
|
|
||||||
|
def find_non_multilingual(session, user_id: str | None = None) -> list[dict]:
|
||||||
|
"""Find all glossaries that are NOT multilingual.
|
||||||
|
|
||||||
|
Heuristique : on supprime tout glossaire dont le nom ne contient PAS
|
||||||
|
« → Multilingue ». Le signal fiable est le nom (target_language peut
|
||||||
|
être 'multi' partout après les migrations passées).
|
||||||
|
|
||||||
|
Les multilingues sont préservés ; tout le reste (anglaise, custom, etc.)
|
||||||
|
est marqué pour suppression — l'utilisateur a explicitement demandé
|
||||||
|
« on garde que des glossaires multilingues ».
|
||||||
|
"""
|
||||||
|
sql = """
|
||||||
|
SELECT id, user_id, name, source_language, target_language, template_id,
|
||||||
|
created_at, updated_at
|
||||||
|
FROM glossaries
|
||||||
|
WHERE name NOT LIKE '%→ Multilingue%'
|
||||||
|
"""
|
||||||
|
params: dict = {}
|
||||||
|
if user_id:
|
||||||
|
sql += " AND user_id = :user_id"
|
||||||
|
params["user_id"] = user_id
|
||||||
|
sql += " ORDER BY user_id, name, created_at"
|
||||||
|
|
||||||
|
rows = session.execute(text(sql), params).fetchall()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"id": r.id,
|
||||||
|
"user_id": r.user_id,
|
||||||
|
"name": r.name,
|
||||||
|
"source_language": r.source_language,
|
||||||
|
"target_language": r.target_language,
|
||||||
|
"template_id": r.template_id,
|
||||||
|
"created_at": r.created_at,
|
||||||
|
"updated_at": r.updated_at,
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_terms(session, glossary_id: str) -> list[dict]:
|
||||||
|
"""Fetch all terms for a glossary."""
|
||||||
|
rows = session.execute(
|
||||||
|
text(
|
||||||
|
"SELECT id, source, target, translations "
|
||||||
|
"FROM glossary_terms WHERE glossary_id = :id ORDER BY id"
|
||||||
|
),
|
||||||
|
{"id": glossary_id},
|
||||||
|
).fetchall()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"id": r.id,
|
||||||
|
"source": r.source,
|
||||||
|
"target": r.target,
|
||||||
|
"translations": r.translations or {},
|
||||||
|
}
|
||||||
|
for r in rows
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def write_backup(glossaries: list[dict], session) -> Path:
|
||||||
|
"""Backup the glossaries (with terms) to a JSON file."""
|
||||||
|
payload = {
|
||||||
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"schema_version": 1,
|
||||||
|
"note": (
|
||||||
|
"Glossaires non-multilingues (« → Anglais », etc.) — supprimés lors de "
|
||||||
|
"la migration vers le mode multilingue exclusif. Les termes sont "
|
||||||
|
"intégralement conservés ici pour permettre une restauration manuelle."
|
||||||
|
),
|
||||||
|
"total_to_delete": len(glossaries),
|
||||||
|
"groups": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
for g in glossaries:
|
||||||
|
terms = fetch_terms(session, g["id"])
|
||||||
|
payload["groups"].append({
|
||||||
|
**g,
|
||||||
|
"created_at": g["created_at"].isoformat() if g["created_at"] else None,
|
||||||
|
"updated_at": g["updated_at"].isoformat() if g["updated_at"] else None,
|
||||||
|
"terms": terms,
|
||||||
|
"terms_count": len(terms),
|
||||||
|
})
|
||||||
|
|
||||||
|
out = BACKUP_DIR / f"glossary_migration_to_multilingual_{datetime.now(timezone.utc).strftime('%Y%m%dT%H%M%SZ')}.json"
|
||||||
|
out.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
out.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def delete_glossary(session, glossary_id: str) -> int:
|
||||||
|
"""Delete one glossary + its terms. Returns term count."""
|
||||||
|
term_count = session.execute(
|
||||||
|
text("SELECT COUNT(*) FROM glossary_terms WHERE glossary_id = :id"),
|
||||||
|
{"id": glossary_id},
|
||||||
|
).scalar() or 0
|
||||||
|
session.execute(
|
||||||
|
text("DELETE FROM glossary_terms WHERE glossary_id = :id"),
|
||||||
|
{"id": glossary_id},
|
||||||
|
)
|
||||||
|
session.execute(
|
||||||
|
text("DELETE FROM glossaries WHERE id = :id"),
|
||||||
|
{"id": glossary_id},
|
||||||
|
)
|
||||||
|
return term_count
|
||||||
|
|
||||||
|
|
||||||
|
def print_preview(glossaries: list[dict], terms_per: list[int]) -> None:
|
||||||
|
"""Print what would be deleted."""
|
||||||
|
logger.info("=" * 78)
|
||||||
|
logger.info("Aperçu de la migration vers « multilingue uniquement »")
|
||||||
|
logger.info("=" * 78)
|
||||||
|
logger.info("Glossaires à supprimer : %d", len(glossaries))
|
||||||
|
logger.info("Termes concernés : %d", sum(terms_per))
|
||||||
|
logger.info("")
|
||||||
|
logger.info("Détail :")
|
||||||
|
for g, tcount in zip(glossaries, terms_per):
|
||||||
|
logger.info(
|
||||||
|
" 🗑️ '%s' (target=%s, template=%s, %d termes, créé %s)",
|
||||||
|
g["name"], g["target_language"], g["template_id"], tcount, g["created_at"],
|
||||||
|
)
|
||||||
|
logger.info("=" * 78)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Supprime tous les glossaires non-multilingues (ne garde que « → Multilingue »)."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--user",
|
||||||
|
metavar="USER_ID",
|
||||||
|
help="Limite la migration à un seul utilisateur.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run",
|
||||||
|
action="store_true",
|
||||||
|
help="Affiche ce qui serait supprimé sans rien modifier.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--yes",
|
||||||
|
action="store_true",
|
||||||
|
help="Ne demande pas de confirmation interactive.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"🔍 Recherche des glossaires non-multilingues%s…",
|
||||||
|
f" pour user_id={args.user}" if args.user else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
with get_sync_session() as session:
|
||||||
|
to_delete = find_non_multilingual(session, user_id=args.user)
|
||||||
|
if not to_delete:
|
||||||
|
logger.info("✅ Aucun glossaire non-multilingue trouvé — rien à faire.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Pré-calcul des compteurs de termes pour le preview
|
||||||
|
terms_per = []
|
||||||
|
for g in to_delete:
|
||||||
|
n = session.execute(
|
||||||
|
text("SELECT COUNT(*) FROM glossary_terms WHERE glossary_id = :id"),
|
||||||
|
{"id": g["id"]},
|
||||||
|
).scalar() or 0
|
||||||
|
terms_per.append(n)
|
||||||
|
|
||||||
|
print_preview(to_delete, terms_per)
|
||||||
|
|
||||||
|
if args.dry_run:
|
||||||
|
logger.info("⚠️ Mode --dry-run : aucune suppression effectuée.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if not args.yes:
|
||||||
|
try:
|
||||||
|
answer = input(f"\nSupprimer {len(to_delete)} glossaire(s) ? [oui/non] : ").strip().lower()
|
||||||
|
except EOFError:
|
||||||
|
answer = ""
|
||||||
|
if answer not in ("oui", "o", "yes", "y"):
|
||||||
|
logger.info("Annulé par l'utilisateur.")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Backup avant suppression
|
||||||
|
logger.info("💾 Backup en cours…")
|
||||||
|
backup_path = write_backup(to_delete, session)
|
||||||
|
logger.info(" Backup écrit : %s (%d octets)", backup_path, backup_path.stat().st_size)
|
||||||
|
|
||||||
|
# Suppression
|
||||||
|
logger.info("🗑️ Suppression en cours…")
|
||||||
|
deleted = 0
|
||||||
|
terms_deleted = 0
|
||||||
|
for g in to_delete:
|
||||||
|
try:
|
||||||
|
tcount = delete_glossary(session, g["id"])
|
||||||
|
session.commit()
|
||||||
|
deleted += 1
|
||||||
|
terms_deleted += tcount
|
||||||
|
logger.info(" ✓ Supprimé '%s' (%d termes)", g["name"], tcount)
|
||||||
|
except Exception as e:
|
||||||
|
session.rollback()
|
||||||
|
logger.error(" ✗ Échec pour '%s' : %s", g["name"], e)
|
||||||
|
|
||||||
|
logger.info("=" * 78)
|
||||||
|
logger.info(
|
||||||
|
"✅ Terminé : %d/%d glossaire(s) supprimé(s), %d termes supprimé(s).",
|
||||||
|
deleted, len(to_delete), terms_deleted,
|
||||||
|
)
|
||||||
|
logger.info(" Backup conservé : %s", backup_path)
|
||||||
|
logger.info("=" * 78)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
Reference in New Issue
Block a user