Files
office_translator/alembic/versions/d0e1f2a3b4c5_cleanup_duplicate_glossaries.py
sepehr adc3583358
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m57s
fix(db): make migrations and glossary index SQLite-compatible
2026-06-14 19:01:07 +02:00

66 lines
2.1 KiB
Python

"""Delete old FR→EN glossaries and deduplicate multilingual ones
Revision ID: d0e1f2a3b4c5
Revises: c8d9e0f1a2b3
Create Date: 2026-05-31
Users imported templates multiple times creating duplicates. Also old
FR→EN glossaries (before multilingual enrichment) are still present.
This migration cleans everything up.
"""
from typing import Sequence, Union
from alembic import op
# revision identifiers
revision = "d0e1f2a3b4c5"
down_revision = "c8d9e0f1a2b3"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
dialect = conn.dialect.name
# Step 1: Delete ALL glossaries with target_language='en' (old stale imports)
op.execute("DELETE FROM glossary_terms WHERE glossary_id IN (SELECT id FROM glossaries WHERE target_language = 'en')")
op.execute("DELETE FROM glossaries WHERE target_language = 'en'")
if dialect == "sqlite":
# SQLite test databases are created fresh and have no duplicate multilingual glossaries.
# The PostgreSQL-specific DISTINCT ON logic is not needed here.
return
# Step 2: Deduplicate multilingual glossaries — keep only the newest per name
# Delete terms for duplicates first, then the duplicates themselves
op.execute("""
DELETE FROM glossary_terms
WHERE glossary_id IN (
SELECT g.id
FROM glossaries g
WHERE g.target_language = 'multi'
AND g.id NOT IN (
SELECT DISTINCT ON (g2.name) g2.id
FROM glossaries g2
WHERE g2.target_language = 'multi'
ORDER BY g2.name, g2.created_at DESC
)
)
""")
op.execute("""
DELETE FROM glossaries
WHERE target_language = 'multi'
AND id NOT IN (
SELECT DISTINCT ON (g2.name) g2.id
FROM glossaries g2
WHERE g2.target_language = 'multi'
ORDER BY g2.name, g2.created_at DESC
)
""")
def downgrade() -> None:
pass # Cannot restore deleted data