Files
office_translator/alembic/versions/c8d9e0f1a2b3_rename_multilingual_glossaries.py
sepehr 32d75f8a84
Some checks failed
Deploy to Production / Build and Deploy (push) Has been cancelled
fix: delete stale FR→EN glossaries + convert all remaining to multilingual
Old glossaries imported before enrichment have target_language='en' and no
translations dict — they're stale duplicates. This migration:

1. Deletes old FR→EN glossaries with empty/null translations (no Persian, etc.)
2. Renames multilingual glossaries from 'Anglais' to 'Multilingue'
3. Sets ALL remaining FR glossaries to target_language='multi'
4. Users re-import from enriched templates to get Persian/Arabic/etc. terms

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 23:31:24 +02:00

98 lines
3.1 KiB
Python

"""Clean up glossaries: delete old FR→EN imports, rename multilingual ones
Revision ID: c8d9e0f1a2b3
Revises: b7c8d9e0f1a2
Create Date: 2026-05-31
Old glossaries imported before multilingual enrichment have target_language='en'
and no translations. They are stale duplicates of the enriched templates.
This migration deletes them so users re-import the enriched versions.
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers
revision = "c8d9e0f1a2b3"
down_revision = "b7c8d9e0f1a2"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# 1. Delete old FR→EN glossaries that don't have multilingual translations
# (imported before enrichment, they are stale duplicates)
op.execute("""
DELETE FROM glossary_terms
WHERE glossary_id IN (
SELECT g.id
FROM glossaries g
WHERE g.target_language = 'en'
AND g.source_language = 'fr'
AND g.id IN (
SELECT g2.id
FROM glossaries g2
JOIN glossary_terms gt ON gt.glossary_id = g2.id
WHERE gt.translations IS NULL
OR jsonb_typeof(gt.translations) = 'null'
OR (SELECT count(*) FROM jsonb_object_keys(COALESCE(gt.translations, '{}'))) = 0
LIMIT 1
)
)
""")
op.execute("""
DELETE FROM glossaries
WHERE target_language = 'en'
AND source_language = 'fr'
AND id IN (
SELECT g.id
FROM glossaries g
LEFT JOIN glossary_terms gt ON gt.glossary_id = g.id
WHERE gt.translations IS NULL
OR jsonb_typeof(COALESCE(gt.translations, 'null')) IN ('null', 'null')
LIMIT 1
)
""")
# 2. Rename remaining multilingual glossaries from 'Anglais' to 'Multilingue'
op.execute("""
UPDATE glossaries
SET name = REPLACE(name, 'Anglais', 'Multilingue')
WHERE target_language = 'multi'
AND name LIKE '%%Anglais%%'
""")
# 3. Also set any remaining FR glossaries with target_language='en' to 'multi'
# (they might have been imported from enriched templates but migration missed them)
op.execute("""
UPDATE glossaries
SET target_language = 'multi'
WHERE source_language = 'fr'
AND target_language = 'en'
""")
# 4. Rename those too
op.execute("""
UPDATE glossaries
SET name = REPLACE(name, 'Anglais', 'Multilingue')
WHERE target_language = 'multi'
AND name LIKE '%%Anglais%%'
""")
def downgrade() -> None:
# Cannot restore deleted glossaries, just revert names and target_language
op.execute("""
UPDATE glossaries
SET name = REPLACE(name, 'Multilingue', 'Anglais')
WHERE name LIKE '%%Multilingue%%'
""")
op.execute("""
UPDATE glossaries
SET target_language = 'en'
WHERE target_language = 'multi'
""")