From 8fa1ab9ceb60daf1a6cb67bbdc3ad47097e071c1 Mon Sep 17 00:00:00 2001 From: sepehr Date: Sun, 31 May 2026 23:56:18 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20delete=20all=20old=20FR=E2=86=92EN=20glo?= =?UTF-8?q?ssaries=20and=20deduplicate=20multilingual=20ones?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 18 glossaries accumulated from multiple imports. This migration: 1. Deletes ALL glossaries with target_language='en' (old stale imports) 2. Deduplicates multilingual glossaries keeping only the newest per name Result: 8 clean glossaries (one per template), all marked as multilingual. Co-Authored-By: Claude Opus 4.8 --- .claude/settings.local.json | 5 +- ...e1f2a3b4c5_cleanup_duplicate_glossaries.py | 57 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 alembic/versions/d0e1f2a3b4c5_cleanup_duplicate_glossaries.py diff --git a/.claude/settings.local.json b/.claude/settings.local.json index b50544d..dc4fa3e 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -30,7 +30,10 @@ "Bash(python -c \"import openpyxl; print\\('openpyxl OK'\\)\")", "Bash(python -c \"import docx; print\\('python-docx OK'\\)\")", "Bash(npx tsc *)", - "Bash(.venv/Scripts/python.exe _bmad/scripts/resolve_customization.py --skill .claude/skills/bmad-quick-dev --key workflow)" + "Bash(.venv/Scripts/python.exe _bmad/scripts/resolve_customization.py --skill .claude/skills/bmad-quick-dev --key workflow)", + "mcp__zai-mcp-server__analyze_image", + "Bash(git add *)", + "Bash(git commit -m 'fix: delete all old FR→EN glossaries and deduplicate multilingual ones *)" ] } } diff --git a/alembic/versions/d0e1f2a3b4c5_cleanup_duplicate_glossaries.py b/alembic/versions/d0e1f2a3b4c5_cleanup_duplicate_glossaries.py new file mode 100644 index 0000000..e627ab6 --- /dev/null +++ b/alembic/versions/d0e1f2a3b4c5_cleanup_duplicate_glossaries.py @@ -0,0 +1,57 @@ +"""Delete old FR→EN glossaries and deduplicate multilingual ones + +Revision ID: d0e1f2a3b4c5 +Revises: c8d9e0f1a2b3 +Create Date: 2026-05-31 + +Users imported templates multiple times creating duplicates. Also old +FR→EN glossaries (before multilingual enrichment) are still present. +This migration cleans everything up. +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers +revision = "d0e1f2a3b4c5" +down_revision = "c8d9e0f1a2b3" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # Step 1: Delete ALL glossaries with target_language='en' (old stale imports) + op.execute("DELETE FROM glossary_terms WHERE glossary_id IN (SELECT id FROM glossaries WHERE target_language = 'en')") + op.execute("DELETE FROM glossaries WHERE target_language = 'en'") + + # Step 2: Deduplicate multilingual glossaries — keep only the newest per name + # Delete terms for duplicates first, then the duplicates themselves + op.execute(""" + DELETE FROM glossary_terms + WHERE glossary_id IN ( + SELECT g.id + FROM glossaries g + WHERE g.target_language = 'multi' + AND g.id NOT IN ( + SELECT DISTINCT ON (g2.name) g2.id + FROM glossaries g2 + WHERE g2.target_language = 'multi' + ORDER BY g2.name, g2.created_at DESC + ) + ) + """) + op.execute(""" + DELETE FROM glossaries + WHERE target_language = 'multi' + AND id NOT IN ( + SELECT DISTINCT ON (g2.name) g2.id + FROM glossaries g2 + WHERE g2.target_language = 'multi' + ORDER BY g2.name, g2.created_at DESC + ) + """) + + +def downgrade() -> None: + pass # Cannot restore deleted data