feat: add multilingual glossary support (backend + frontend types)
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 1m31s
All checks were successful
Deploy to Production / Build and Deploy (push) Successful in 1m31s
Backend: - Add source_language column to glossaries table - Add translations JSON column to glossary_terms table - Alembic migration for schema changes - format_glossary_for_prompt now language-aware: extracts correct translation per target language, falls back to EN reference for templates with only FR→EN data - CRUD routes accept/return source_language and translations - Pydantic schemas updated Frontend: - Types updated: GlossaryTerm now has translations: Record<string, string> - Glossary/GlossaryListItem now have source_language - Added SUPPORTED_LANGUAGES constant (13 languages) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
35
alembic/versions/d4a1f8e2b3c7_add_glossary_multilingual.py
Normal file
35
alembic/versions/d4a1f8e2b3c7_add_glossary_multilingual.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
"""Add multilingual support to glossaries
|
||||||
|
|
||||||
|
Revision ID: d4a1f8e2b3c7
|
||||||
|
Revises: cb71a958ad92
|
||||||
|
Create Date: 2026-05-16
|
||||||
|
|
||||||
|
Adds source_language to glossaries and translations JSON to glossary_terms.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
# revision identifiers
|
||||||
|
revision = "d4a1f8e2b3c7"
|
||||||
|
down_revision = "cb71a958ad92"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.add_column(
|
||||||
|
"glossaries",
|
||||||
|
sa.Column("source_language", sa.String(10), nullable=False, server_default="fr"),
|
||||||
|
)
|
||||||
|
op.add_column(
|
||||||
|
"glossary_terms",
|
||||||
|
sa.Column("translations", sa.JSON, nullable=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_column("glossary_terms", "translations")
|
||||||
|
op.drop_column("glossaries", "source_language")
|
||||||
@@ -330,6 +330,7 @@ class Glossary(Base):
|
|||||||
String(36), ForeignKey("users.id", ondelete="CASCADE"), nullable=False
|
String(36), ForeignKey("users.id", ondelete="CASCADE"), nullable=False
|
||||||
)
|
)
|
||||||
name = Column(String(255), nullable=False)
|
name = Column(String(255), nullable=False)
|
||||||
|
source_language = Column(String(10), nullable=False, default="fr")
|
||||||
created_at = Column(DateTime, default=_utcnow)
|
created_at = Column(DateTime, default=_utcnow)
|
||||||
updated_at = Column(DateTime, default=_utcnow, onupdate=_utcnow)
|
updated_at = Column(DateTime, default=_utcnow, onupdate=_utcnow)
|
||||||
|
|
||||||
@@ -346,6 +347,7 @@ class Glossary(Base):
|
|||||||
"id": self.id,
|
"id": self.id,
|
||||||
"user_id": self.user_id,
|
"user_id": self.user_id,
|
||||||
"name": self.name,
|
"name": self.name,
|
||||||
|
"source_language": self.source_language,
|
||||||
"terms": [term.to_dict() for term in self.terms] if self.terms else [],
|
"terms": [term.to_dict() for term in self.terms] if self.terms else [],
|
||||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
|
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
|
||||||
@@ -365,6 +367,7 @@ class GlossaryTerm(Base):
|
|||||||
)
|
)
|
||||||
source = Column(String(500), nullable=False)
|
source = Column(String(500), nullable=False)
|
||||||
target = Column(String(500), nullable=False)
|
target = Column(String(500), nullable=False)
|
||||||
|
translations = Column(JSON, nullable=True, default=dict)
|
||||||
created_at = Column(DateTime, default=_utcnow)
|
created_at = Column(DateTime, default=_utcnow)
|
||||||
|
|
||||||
# Relationship
|
# Relationship
|
||||||
@@ -378,6 +381,7 @@ class GlossaryTerm(Base):
|
|||||||
"id": self.id,
|
"id": self.id,
|
||||||
"source": self.source,
|
"source": self.source,
|
||||||
"target": self.target,
|
"target": self.target,
|
||||||
|
"translations": self.translations or {},
|
||||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ export function EditGlossaryDialog({
|
|||||||
id: `temp-${i}`,
|
id: `temp-${i}`,
|
||||||
source: t.source,
|
source: t.source,
|
||||||
target: t.target,
|
target: t.target,
|
||||||
|
translations: t.translations || {},
|
||||||
created_at: null,
|
created_at: null,
|
||||||
})),
|
})),
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -2,12 +2,14 @@ export interface GlossaryTerm {
|
|||||||
id: string;
|
id: string;
|
||||||
source: string;
|
source: string;
|
||||||
target: string;
|
target: string;
|
||||||
|
translations: Record<string, string>;
|
||||||
created_at: string | null;
|
created_at: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Glossary {
|
export interface Glossary {
|
||||||
id: string;
|
id: string;
|
||||||
name: string;
|
name: string;
|
||||||
|
source_language: string;
|
||||||
terms: GlossaryTerm[];
|
terms: GlossaryTerm[];
|
||||||
created_at: string;
|
created_at: string;
|
||||||
updated_at: string;
|
updated_at: string;
|
||||||
@@ -16,6 +18,7 @@ export interface Glossary {
|
|||||||
export interface GlossaryListItem {
|
export interface GlossaryListItem {
|
||||||
id: string;
|
id: string;
|
||||||
name: string;
|
name: string;
|
||||||
|
source_language: string;
|
||||||
terms_count: number;
|
terms_count: number;
|
||||||
created_at: string;
|
created_at: string;
|
||||||
}
|
}
|
||||||
@@ -48,6 +51,7 @@ export interface GlossaryUpdateResponse {
|
|||||||
export interface GlossaryTermInput {
|
export interface GlossaryTermInput {
|
||||||
source: string;
|
source: string;
|
||||||
target: string;
|
target: string;
|
||||||
|
translations?: Record<string, string>;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface GlossaryTermInputWithId extends GlossaryTermInput {
|
export interface GlossaryTermInputWithId extends GlossaryTermInput {
|
||||||
@@ -56,16 +60,34 @@ export interface GlossaryTermInputWithId extends GlossaryTermInput {
|
|||||||
|
|
||||||
export interface GlossaryCreateInput {
|
export interface GlossaryCreateInput {
|
||||||
name: string;
|
name: string;
|
||||||
|
source_language?: string;
|
||||||
terms?: GlossaryTermInput[];
|
terms?: GlossaryTermInput[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface GlossaryUpdateInput {
|
export interface GlossaryUpdateInput {
|
||||||
name?: string;
|
name?: string;
|
||||||
|
source_language?: string;
|
||||||
terms?: GlossaryTermInput[];
|
terms?: GlossaryTermInput[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export const MAX_TERMS_PER_GLOSSARY = 500;
|
export const MAX_TERMS_PER_GLOSSARY = 500;
|
||||||
|
|
||||||
|
export const SUPPORTED_LANGUAGES: { code: string; label: string; flag: string }[] = [
|
||||||
|
{ code: 'en', label: 'English', flag: '🇬🇧' },
|
||||||
|
{ code: 'fr', label: 'Français', flag: '🇫🇷' },
|
||||||
|
{ code: 'es', label: 'Español', flag: '🇪🇸' },
|
||||||
|
{ code: 'de', label: 'Deutsch', flag: '🇩🇪' },
|
||||||
|
{ code: 'pt', label: 'Português', flag: '🇧🇷' },
|
||||||
|
{ code: 'it', label: 'Italiano', flag: '🇮🇹' },
|
||||||
|
{ code: 'nl', label: 'Nederlands', flag: '🇳🇱' },
|
||||||
|
{ code: 'ru', label: 'Русский', flag: '🇷🇺' },
|
||||||
|
{ code: 'ja', label: '日本語', flag: '🇯🇵' },
|
||||||
|
{ code: 'ko', label: '한국어', flag: '🇰🇷' },
|
||||||
|
{ code: 'zh', label: '中文', flag: '🇨🇳' },
|
||||||
|
{ code: 'ar', label: 'العربية', flag: '🇸🇦' },
|
||||||
|
{ code: 'fa', label: 'فارسی', flag: '🇮🇷' },
|
||||||
|
];
|
||||||
|
|
||||||
// Generate unique IDs for React keys
|
// Generate unique IDs for React keys
|
||||||
let idCounter = 0;
|
let idCounter = 0;
|
||||||
export function generateTermId(): string {
|
export function generateTermId(): string {
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ def _format_term(term: GlossaryTerm) -> dict:
|
|||||||
"id": term.id,
|
"id": term.id,
|
||||||
"source": term.source,
|
"source": term.source,
|
||||||
"target": term.target,
|
"target": term.target,
|
||||||
|
"translations": term.translations or {},
|
||||||
"created_at": term.created_at.isoformat() if term.created_at else None,
|
"created_at": term.created_at.isoformat() if term.created_at else None,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -55,6 +56,7 @@ def _format_glossary(glossary: Glossary) -> dict:
|
|||||||
return {
|
return {
|
||||||
"id": glossary.id,
|
"id": glossary.id,
|
||||||
"name": glossary.name,
|
"name": glossary.name,
|
||||||
|
"source_language": glossary.source_language,
|
||||||
"terms": [_format_term(t) for t in glossary.terms] if glossary.terms else [],
|
"terms": [_format_term(t) for t in glossary.terms] if glossary.terms else [],
|
||||||
"created_at": glossary.created_at.isoformat() if glossary.created_at else None,
|
"created_at": glossary.created_at.isoformat() if glossary.created_at else None,
|
||||||
"updated_at": glossary.updated_at.isoformat() if glossary.updated_at else None,
|
"updated_at": glossary.updated_at.isoformat() if glossary.updated_at else None,
|
||||||
@@ -103,6 +105,7 @@ async def create_glossary(
|
|||||||
glossary = Glossary(
|
glossary = Glossary(
|
||||||
user_id=user.id,
|
user_id=user.id,
|
||||||
name=body.name,
|
name=body.name,
|
||||||
|
source_language=body.source_language,
|
||||||
created_at=datetime.now(timezone.utc),
|
created_at=datetime.now(timezone.utc),
|
||||||
updated_at=datetime.now(timezone.utc),
|
updated_at=datetime.now(timezone.utc),
|
||||||
)
|
)
|
||||||
@@ -112,6 +115,7 @@ async def create_glossary(
|
|||||||
glossary=glossary,
|
glossary=glossary,
|
||||||
source=term_data.source,
|
source=term_data.source,
|
||||||
target=term_data.target,
|
target=term_data.target,
|
||||||
|
translations=term_data.translations or {},
|
||||||
created_at=datetime.now(timezone.utc),
|
created_at=datetime.now(timezone.utc),
|
||||||
)
|
)
|
||||||
session.add(term)
|
session.add(term)
|
||||||
@@ -180,6 +184,7 @@ async def list_glossaries(
|
|||||||
GlossaryListItem(
|
GlossaryListItem(
|
||||||
id=g.id,
|
id=g.id,
|
||||||
name=g.name,
|
name=g.name,
|
||||||
|
source_language=g.source_language or "fr",
|
||||||
terms_count=len(g.terms) if g.terms else 0,
|
terms_count=len(g.terms) if g.terms else 0,
|
||||||
created_at=g.created_at,
|
created_at=g.created_at,
|
||||||
)
|
)
|
||||||
@@ -331,6 +336,9 @@ async def update_glossary(
|
|||||||
if body.name is not None:
|
if body.name is not None:
|
||||||
glossary.name = body.name
|
glossary.name = body.name
|
||||||
|
|
||||||
|
if body.source_language is not None:
|
||||||
|
glossary.source_language = body.source_language
|
||||||
|
|
||||||
if body.terms is not None:
|
if body.terms is not None:
|
||||||
# Delete existing terms
|
# Delete existing terms
|
||||||
session.query(GlossaryTerm).filter(
|
session.query(GlossaryTerm).filter(
|
||||||
@@ -343,6 +351,7 @@ async def update_glossary(
|
|||||||
glossary_id=glossary.id,
|
glossary_id=glossary.id,
|
||||||
source=term_data.source,
|
source=term_data.source,
|
||||||
target=term_data.target,
|
target=term_data.target,
|
||||||
|
translations=term_data.translations or {},
|
||||||
created_at=datetime.now(timezone.utc),
|
created_at=datetime.now(timezone.utc),
|
||||||
)
|
)
|
||||||
session.add(term)
|
session.add(term)
|
||||||
|
|||||||
@@ -915,10 +915,13 @@ async def _run_translation_job(
|
|||||||
|
|
||||||
# Story 3.10: Retrieve and format glossary terms for LLM prompt
|
# Story 3.10: Retrieve and format glossary terms for LLM prompt
|
||||||
glossary_terms = None
|
glossary_terms = None
|
||||||
|
glossary_source_lang = "fr"
|
||||||
if glossary_id and user_id:
|
if glossary_id and user_id:
|
||||||
try:
|
try:
|
||||||
glossary_terms = get_glossary_terms(glossary_id, user_id)
|
glossary_data = get_glossary_terms(glossary_id, user_id)
|
||||||
logger.info(f"Job {job_id}: Loaded {len(glossary_terms)} glossary terms")
|
glossary_terms = glossary_data["terms"]
|
||||||
|
glossary_source_lang = glossary_data.get("source_language", "fr")
|
||||||
|
logger.info(f"Job {job_id}: Loaded {len(glossary_terms)} glossary terms (source: {glossary_source_lang})")
|
||||||
except GlossaryNotFoundError as e:
|
except GlossaryNotFoundError as e:
|
||||||
tracker.set_error(str(e))
|
tracker.set_error(str(e))
|
||||||
logger.error(f"Job {job_id}: Glossary error - {e}")
|
logger.error(f"Job {job_id}: Glossary error - {e}")
|
||||||
@@ -940,7 +943,10 @@ async def _run_translation_job(
|
|||||||
effective_prompt = custom_prompt
|
effective_prompt = custom_prompt
|
||||||
|
|
||||||
# Build the full prompt combining effective prompt and glossary
|
# Build the full prompt combining effective prompt and glossary
|
||||||
full_prompt = build_full_prompt(effective_prompt, glossary_terms)
|
full_prompt = build_full_prompt(
|
||||||
|
effective_prompt, glossary_terms,
|
||||||
|
source_lang=glossary_source_lang, target_lang=target_lang,
|
||||||
|
)
|
||||||
|
|
||||||
translation_provider = None
|
translation_provider = None
|
||||||
_p = provider.lower()
|
_p = provider.lower()
|
||||||
|
|||||||
@@ -17,6 +17,9 @@ class GlossaryTermCreate(BaseModel):
|
|||||||
target: str = Field(
|
target: str = Field(
|
||||||
..., min_length=1, max_length=500, description="Traduction cible"
|
..., min_length=1, max_length=500, description="Traduction cible"
|
||||||
)
|
)
|
||||||
|
translations: Optional[dict[str, str]] = Field(
|
||||||
|
None, description="Traductions multilingues: {\"en\": \"coil\", \"de\": \"Spule\", ...}"
|
||||||
|
)
|
||||||
|
|
||||||
@field_validator("source", "target")
|
@field_validator("source", "target")
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -30,6 +33,7 @@ class GlossaryTermResponse(BaseModel):
|
|||||||
id: str
|
id: str
|
||||||
source: str
|
source: str
|
||||||
target: str
|
target: str
|
||||||
|
translations: dict[str, str] = {}
|
||||||
created_at: Optional[datetime] = None
|
created_at: Optional[datetime] = None
|
||||||
|
|
||||||
model_config = {"from_attributes": True}
|
model_config = {"from_attributes": True}
|
||||||
@@ -39,6 +43,9 @@ class GlossaryCreate(BaseModel):
|
|||||||
"""Schema for creating a glossary."""
|
"""Schema for creating a glossary."""
|
||||||
|
|
||||||
name: str = Field(..., min_length=1, max_length=255, description="Nom du glossaire")
|
name: str = Field(..., min_length=1, max_length=255, description="Nom du glossaire")
|
||||||
|
source_language: str = Field(
|
||||||
|
default="fr", max_length=10, description="Langue source (ISO code)"
|
||||||
|
)
|
||||||
terms: list[GlossaryTermCreate] = Field(
|
terms: list[GlossaryTermCreate] = Field(
|
||||||
default_factory=list, description="Liste des termes"
|
default_factory=list, description="Liste des termes"
|
||||||
)
|
)
|
||||||
@@ -53,6 +60,7 @@ class GlossaryUpdate(BaseModel):
|
|||||||
"""Schema for updating a glossary (all fields optional)."""
|
"""Schema for updating a glossary (all fields optional)."""
|
||||||
|
|
||||||
name: Optional[str] = Field(None, min_length=1, max_length=255)
|
name: Optional[str] = Field(None, min_length=1, max_length=255)
|
||||||
|
source_language: Optional[str] = Field(None, max_length=10)
|
||||||
terms: Optional[list[GlossaryTermCreate]] = Field(None)
|
terms: Optional[list[GlossaryTermCreate]] = Field(None)
|
||||||
|
|
||||||
@field_validator("name")
|
@field_validator("name")
|
||||||
@@ -66,6 +74,7 @@ class GlossaryResponse(BaseModel):
|
|||||||
|
|
||||||
id: str
|
id: str
|
||||||
name: str
|
name: str
|
||||||
|
source_language: str = "fr"
|
||||||
terms: list[GlossaryTermResponse] = []
|
terms: list[GlossaryTermResponse] = []
|
||||||
created_at: Optional[datetime] = None
|
created_at: Optional[datetime] = None
|
||||||
updated_at: Optional[datetime] = None
|
updated_at: Optional[datetime] = None
|
||||||
@@ -78,6 +87,7 @@ class GlossaryListItem(BaseModel):
|
|||||||
|
|
||||||
id: str
|
id: str
|
||||||
name: str
|
name: str
|
||||||
|
source_language: str = "fr"
|
||||||
terms_count: int = Field(
|
terms_count: int = Field(
|
||||||
default=0, description="Nombre de termes dans le glossaire"
|
default=0, description="Nombre de termes dans le glossaire"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -15,17 +15,17 @@ from utils.exceptions import GlossaryNotFoundError
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def get_glossary_terms(glossary_id: str, user_id: str) -> List[Dict[str, str]]:
|
def get_glossary_terms(glossary_id: str, user_id: str) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Retrieve glossary terms for a specific glossary owned by a user.
|
Retrieve glossary terms and metadata for a specific glossary owned by a user.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
glossary_id: UUID of the glossary
|
glossary_id: UUID of the glossary
|
||||||
user_id: UUID of the user (must own the glossary)
|
user_id: UUID of the user (must own the glossary)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of dictionaries with 'source' and 'target' keys
|
Dict with 'source_language' and 'terms' (list of dicts with source, target, translations)
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
GlossaryNotFoundError: If glossary doesn't exist or doesn't belong to user
|
GlossaryNotFoundError: If glossary doesn't exist or doesn't belong to user
|
||||||
"""
|
"""
|
||||||
@@ -36,28 +36,33 @@ def get_glossary_terms(glossary_id: str, user_id: str) -> List[Dict[str, str]]:
|
|||||||
.filter(Glossary.id == glossary_id, Glossary.user_id == user_id)
|
.filter(Glossary.id == glossary_id, Glossary.user_id == user_id)
|
||||||
.first()
|
.first()
|
||||||
)
|
)
|
||||||
|
|
||||||
if not glossary:
|
if not glossary:
|
||||||
raise GlossaryNotFoundError(
|
raise GlossaryNotFoundError(
|
||||||
message="Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
|
message="Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
|
||||||
details={"glossary_id": glossary_id}
|
details={"glossary_id": glossary_id}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get all terms for this glossary
|
|
||||||
terms = (
|
terms = (
|
||||||
session.query(GlossaryTerm)
|
session.query(GlossaryTerm)
|
||||||
.filter(GlossaryTerm.glossary_id == glossary_id)
|
.filter(GlossaryTerm.glossary_id == glossary_id)
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
# Format as list of dicts
|
result = [{
|
||||||
result = [{"source": term.source, "target": term.target} for term in terms]
|
"source": term.source,
|
||||||
|
"target": term.target,
|
||||||
|
"translations": term.translations or {}
|
||||||
|
} for term in terms]
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Retrieved {len(result)} terms from glossary {glossary_id} for user {user_id}"
|
f"Retrieved {len(result)} terms from glossary {glossary_id} for user {user_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
return result
|
return {
|
||||||
|
"source_language": glossary.source_language or "fr",
|
||||||
|
"terms": result,
|
||||||
|
}
|
||||||
|
|
||||||
except GlossaryNotFoundError:
|
except GlossaryNotFoundError:
|
||||||
raise
|
raise
|
||||||
@@ -112,72 +117,101 @@ def validate_glossary_access(glossary_id: str, user_id: str) -> bool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def format_glossary_for_prompt(terms: List[Dict[str, str]]) -> str:
|
def format_glossary_for_prompt(
|
||||||
|
terms: List[Dict[str, str]],
|
||||||
|
source_lang: str = "fr",
|
||||||
|
target_lang: str = "en",
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Format glossary terms for injection into an LLM system prompt.
|
Format glossary terms for injection into an LLM system prompt.
|
||||||
|
|
||||||
The format is designed to be clear and unambiguous for LLMs:
|
When a term has a translation for target_lang in its translations dict,
|
||||||
- Clear header explaining the purpose
|
that specific translation is used. Otherwise, falls back to the default
|
||||||
- Simple source → target format
|
target field (backward compat). For templates that only have EN translations,
|
||||||
- Explicit instruction to use these translations
|
the LLM is instructed to derive the correct target_lang equivalent.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
terms: List of dictionaries with 'source' and 'target' keys
|
terms: List of dicts with 'source', 'target', and optional 'translations'
|
||||||
|
source_lang: ISO code of the source language
|
||||||
|
target_lang: ISO code of the target language
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Formatted string for LLM prompt
|
Formatted string for LLM prompt
|
||||||
"""
|
"""
|
||||||
if not terms:
|
if not terms:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# Sort terms by length (longest first) to avoid substring conflicts
|
|
||||||
# e.g., "machine learning" should match before "machine"
|
|
||||||
sorted_terms = sorted(terms, key=lambda t: len(t.get("source", "")), reverse=True)
|
sorted_terms = sorted(terms, key=lambda t: len(t.get("source", "")), reverse=True)
|
||||||
|
|
||||||
lines = [
|
lines = [
|
||||||
"TERMINOLOGY GLOSSARY (use these exact translations):",
|
f"TERMINOLOGY GLOSSARY (translate from {source_lang} to {target_lang}):",
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
|
|
||||||
|
has_fallback = False
|
||||||
for term in sorted_terms:
|
for term in sorted_terms:
|
||||||
source = term.get("source", "").strip()
|
source = term.get("source", "").strip()
|
||||||
target = term.get("target", "").strip()
|
if not source:
|
||||||
if source and target:
|
continue
|
||||||
# Escape single quotes in terms for clarity
|
|
||||||
|
translations = term.get("translations", {}) or {}
|
||||||
|
specific = translations.get(target_lang, "").strip()
|
||||||
|
default_target = term.get("target", "").strip()
|
||||||
|
|
||||||
|
if specific:
|
||||||
source_escaped = source.replace("'", "\\'")
|
source_escaped = source.replace("'", "\\'")
|
||||||
target_escaped = target.replace("'", "\\'")
|
target_escaped = specific.replace("'", "\\'")
|
||||||
lines.append(f"- '{source_escaped}' → '{target_escaped}'")
|
lines.append(f"- '{source_escaped}' → '{target_escaped}'")
|
||||||
|
elif default_target:
|
||||||
|
source_escaped = source.replace("'", "\\'")
|
||||||
|
target_escaped = default_target.replace("'", "\\'")
|
||||||
|
lines.append(f"- '{source_escaped}' → '{target_escaped}' (EN reference, adapt to {target_lang})")
|
||||||
|
has_fallback = True
|
||||||
|
# If neither specific nor default, skip the term
|
||||||
|
|
||||||
|
if not any(line.startswith("- ") for line in lines):
|
||||||
|
return ""
|
||||||
|
|
||||||
lines.extend([
|
lines.extend([
|
||||||
"",
|
"",
|
||||||
"IMPORTANT: Always use these translations when the terms appear in the text."
|
"IMPORTANT: Always use these translations when the terms appear in the text."
|
||||||
])
|
])
|
||||||
|
|
||||||
|
if has_fallback:
|
||||||
|
lines.append(
|
||||||
|
"NOTE: Some entries show an English reference — translate to the correct "
|
||||||
|
f"{target_lang} equivalent while preserving the intended meaning."
|
||||||
|
)
|
||||||
|
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def build_full_prompt(
|
def build_full_prompt(
|
||||||
custom_prompt: Optional[str],
|
custom_prompt: Optional[str],
|
||||||
glossary_terms: Optional[List[Dict[str, str]]]
|
glossary_terms: Optional[List[Dict[str, str]]],
|
||||||
|
source_lang: str = "fr",
|
||||||
|
target_lang: str = "en",
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Build the complete prompt combining custom prompt and glossary.
|
Build the complete prompt combining custom prompt and glossary.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
custom_prompt: Optional custom system prompt from user
|
custom_prompt: Optional custom system prompt from user
|
||||||
glossary_terms: Optional list of glossary terms
|
glossary_terms: Optional list of glossary terms
|
||||||
|
source_lang: ISO code of the source language
|
||||||
|
target_lang: ISO code of the target language
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Combined prompt string
|
Combined prompt string
|
||||||
"""
|
"""
|
||||||
parts = []
|
parts = []
|
||||||
|
|
||||||
if custom_prompt:
|
if custom_prompt:
|
||||||
parts.append(custom_prompt)
|
parts.append(custom_prompt)
|
||||||
|
|
||||||
if glossary_terms:
|
if glossary_terms:
|
||||||
glossary_prompt = format_glossary_for_prompt(glossary_terms)
|
glossary_prompt = format_glossary_for_prompt(glossary_terms, source_lang, target_lang)
|
||||||
if glossary_prompt:
|
if glossary_prompt:
|
||||||
parts.append(glossary_prompt)
|
parts.append(glossary_prompt)
|
||||||
|
|
||||||
return "\n\n".join(parts) if parts else ""
|
return "\n\n".join(parts) if parts else ""
|
||||||
Reference in New Issue
Block a user