Files
office_translator/alembic/versions/b7c8d9e0f1a2_set_multilingual_target_language.py
sepehr adc3583358
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 2m57s
fix(db): make migrations and glossary index SQLite-compatible
2026-06-14 19:01:07 +02:00

84 lines
2.7 KiB
Python

"""Set multilingual glossaries target_language to 'multi'
Revision ID: b7c8d9e0f1a2
Revises: f6a7b8c9d0e1
Create Date: 2026-05-31
Glossary templates that were enriched with multilingual translations
(via enrich_glossary_templates.py) contain translations for 11 languages
(de, es, it, pt, nl, ru, ja, ko, zh, ar, fa) in each term's translations
field. These should be marked as target_language='multi' instead of 'en'.
This migration detects glossaries whose terms have multilingual translations
and sets their target_language to 'multi'.
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers
revision = "b7c8d9e0f1a2"
down_revision = "f6a7b8c9d0e1"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
conn = op.get_bind()
dialect = conn.dialect.name
# Glossaries with terms containing 5+ translation keys are multilingual templates
# (enriched glossaries have 11 translations: de, es, it, pt, nl, ru, ja, ko, zh, ar, fa)
if dialect == "sqlite":
# SQLite does not have jsonb_object_keys. Use json_each for compatibility.
op.execute("""
UPDATE glossaries
SET target_language = 'multi'
WHERE id IN (
SELECT DISTINCT g.id
FROM glossaries g
JOIN glossary_terms gt ON gt.glossary_id = g.id
WHERE gt.translations IS NOT NULL
AND json_type(gt.translations, '$') = 'object'
AND (
SELECT count(*)
FROM json_each(gt.translations)
) >= 5
)
""")
else:
op.execute("""
UPDATE glossaries
SET target_language = 'multi'
WHERE id IN (
SELECT DISTINCT g.id
FROM glossaries g
JOIN glossary_terms gt ON gt.glossary_id = g.id
WHERE gt.translations IS NOT NULL
AND jsonb_typeof(gt.translations) = 'object'
AND (
SELECT count(*)
FROM jsonb_object_keys(gt.translations)
) >= 5
)
""")
# Also rename glossaries: replace "Anglais" with "Multilingue" in the name
op.execute("""
UPDATE glossaries
SET name = REPLACE(name, 'Anglais', 'Multilingue')
WHERE target_language = 'multi'
AND name LIKE '%%Anglais%%'
""")
def downgrade() -> None:
# Revert multilingual glossaries back to 'en'
op.execute("""
UPDATE glossaries
SET target_language = 'en'
WHERE target_language = 'multi'
""")