Files
Momento/memento-note/scripts/generate-i18n-overrides.py
Antigravity 8c7ca69640
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 5s
fix: brainstorm infinite loop, ghost cursor, embedding ::vector cast, semantic search, billing stats, usage meter accordion
- Fix useBrainstormSocket: stable guestId via useRef, remove setState in cleanup
- Fix GhostCursor: direct DOM manipulation via refs, no useState re-renders
- Fix all SQL embedding queries: add ::vector cast on text columns
- Fix embedding truncation to 15000 chars (under 8192 token limit)
- Fix NoteEmbedding INSERT: remove non-existent updatedAt column
- Fix billing page: show all quota stats in grid instead of single metric
- Fix usage meter: accordion expand/collapse, per-feature detail
- Fix semantic search: rebuild 103 note embeddings, ::vector cast on vectorSearch
- Fix brainstorm expand/manual-idea/create: ::vector cast on embedding SQL
2026-05-16 18:50:34 +00:00

146 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""Generate flat i18n-overrides/*.json for keys still equal to en while fr differs."""
from __future__ import annotations
import json
import re
import sys
import time
from pathlib import Path
from deep_translator import GoogleTranslator
ROOT = Path(__file__).resolve().parents[1]
LOCALES = ROOT / "locales"
OUT_DIR = Path(__file__).resolve().parent / "i18n-overrides"
LANG_TARGETS = {
"ja": "ja",
"ko": "ko",
"zh": "zh-CN",
"hi": "hi",
"fa": "fa",
}
MISSING_11 = [
"ai.featureLocked",
"ai.quotaExceeded",
"profile.tab",
"about.tab",
"appearance.tab",
"usageMeter.featureReformulate",
"usageMeter.featureChat",
"usageMeter.featureBrainstormCreate",
"usageMeter.featureBrainstormExpand",
"usageMeter.featureBrainstormEnrich",
"billing.tab",
]
PERSIAN_DIGITS = "۰۱۲۳۴۵۶۷۸۹"
def flatten_leaves(obj: dict, prefix: str = "") -> dict[str, str]:
out: dict[str, str] = {}
for k, v in obj.items():
path = f"{prefix}.{k}" if prefix else k
if isinstance(v, dict):
out.update(flatten_leaves(v, path))
elif isinstance(v, str):
out[path] = v
return out
def to_persian_digits(text: str) -> str:
return re.sub(r"\d", lambda m: PERSIAN_DIGITS[int(m.group())], text)
def collect_override_keys(en: dict[str, str], fr: dict[str, str], loc: dict[str, str], lang: str) -> list[str]:
fr_translated = [k for k in en if k in fr and fr[k] != en[k]]
keys = {k for k in fr_translated if loc.get(k, en.get(k)) == en[k]}
if lang != "fa":
keys.update(MISSING_11)
return sorted(keys)
def translate_unique(texts: list[str], target_code: str, batch_size: int = 30) -> dict[str, str]:
translator = GoogleTranslator(source="en", target=target_code)
mapping: dict[str, str] = {}
for i in range(0, len(texts), batch_size):
batch = texts[i : i + batch_size]
try:
outs = translator.translate_batch(batch)
except Exception as e:
print(f" batch error ({e}), per-item…", flush=True)
outs = []
for t in batch:
try:
outs.append(translator.translate(t))
except Exception:
outs.append(t)
time.sleep(0.2)
if not isinstance(outs, list) or len(outs) != len(batch):
outs = batch
for src, dst in zip(batch, outs):
mapping[src] = dst if isinstance(dst, str) and dst.strip() else src
time.sleep(0.5)
print(f"{min(i + batch_size, len(texts))}/{len(texts)}", flush=True)
return mapping
def post_process(lang: str, key: str, en_val: str, translated: str) -> str:
if lang == "fa" and re.search(r"\d", translated):
translated = to_persian_digits(translated)
# Momento note-taking: keep common product tokens
if key.endswith(".technology.ai") or key == "about.technology.ai":
if lang == "ja":
return "AI"
if lang == "zh":
return "AI"
if lang == "ko":
return "AI"
if lang == "hi":
return "AI"
if lang == "fa":
return "هوش مصنوعی"
if key == "about.technology.ui":
ui_map = {"ja": "UI", "ko": "UI", "zh": "界面", "hi": "UI", "fa": "رابط کاربری"}
return ui_map.get(lang, translated)
return translated
def main() -> int:
en = flatten_leaves(json.loads((LOCALES / "en.json").read_text(encoding="utf-8")))
fr = flatten_leaves(json.loads((LOCALES / "fr.json").read_text(encoding="utf-8")))
OUT_DIR.mkdir(parents=True, exist_ok=True)
for lang, google_target in LANG_TARGETS.items():
loc = flatten_leaves(json.loads((LOCALES / f"{lang}.json").read_text(encoding="utf-8")))
keys = collect_override_keys(en, fr, loc, lang)
print(f"\n=== {lang}.json — {len(keys)} override keys", flush=True)
text_to_keys: dict[str, list[str]] = {}
for k in keys:
text_to_keys.setdefault(en[k], []).append(k)
unique = list(text_to_keys.keys())
trans_map = translate_unique(unique, google_target)
overrides: dict[str, str] = {}
for src, key_list in text_to_keys.items():
tr = trans_map.get(src, src)
for k in key_list:
overrides[k] = post_process(lang, k, en[k], tr)
out_path = OUT_DIR / f"{lang}.json"
out_path.write_text(
json.dumps(overrides, ensure_ascii=False, indent=2) + "\n",
encoding="utf-8",
)
print(f" Wrote {out_path} ({len(overrides)} keys)", flush=True)
return 0
if __name__ == "__main__":
raise SystemExit(main())