feat: revue de code, doc CODE_REVIEW, forfaits 2026, traduction LLM, providers avec modèle

Made-with: Cursor
This commit is contained in:
Sepehr Ramezani
2026-03-07 11:42:58 +01:00
parent 3d37ce4582
commit 473b3e26c7
181 changed files with 30617 additions and 7170 deletions

View File

@@ -0,0 +1,183 @@
"""
Glossary Service for Translation
Story 3.10: Glossaires - Application lors Traduction LLM
Provides functions to retrieve glossary terms and format them for LLM prompts.
"""
import logging
from typing import List, Dict, Any, Optional
from database.connection import get_sync_session
from database.models import Glossary, GlossaryTerm
from utils.exceptions import GlossaryNotFoundError
logger = logging.getLogger(__name__)
def get_glossary_terms(glossary_id: str, user_id: str) -> List[Dict[str, str]]:
"""
Retrieve glossary terms for a specific glossary owned by a user.
Args:
glossary_id: UUID of the glossary
user_id: UUID of the user (must own the glossary)
Returns:
List of dictionaries with 'source' and 'target' keys
Raises:
GlossaryNotFoundError: If glossary doesn't exist or doesn't belong to user
"""
try:
with get_sync_session() as session:
glossary = (
session.query(Glossary)
.filter(Glossary.id == glossary_id, Glossary.user_id == user_id)
.first()
)
if not glossary:
raise GlossaryNotFoundError(
message="Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
details={"glossary_id": glossary_id}
)
# Get all terms for this glossary
terms = (
session.query(GlossaryTerm)
.filter(GlossaryTerm.glossary_id == glossary_id)
.all()
)
# Format as list of dicts
result = [{"source": term.source, "target": term.target} for term in terms]
logger.info(
f"Retrieved {len(result)} terms from glossary {glossary_id} for user {user_id}"
)
return result
except GlossaryNotFoundError:
raise
except Exception as e:
logger.error(f"Error retrieving glossary {glossary_id}: {e}")
raise GlossaryNotFoundError(
message="Erreur lors de la récupération du glossaire.",
details={"glossary_id": glossary_id, "error": str(e)}
)
def validate_glossary_access(glossary_id: str, user_id: str) -> bool:
"""
Validate that a glossary exists and belongs to the user.
This is a lightweight check that doesn't return the terms,
useful for early validation before starting a translation job.
Args:
glossary_id: UUID of the glossary
user_id: UUID of the user (must own the glossary)
Returns:
True if glossary exists and belongs to user
Raises:
GlossaryNotFoundError: If glossary doesn't exist or doesn't belong to user
"""
try:
with get_sync_session() as session:
glossary = (
session.query(Glossary)
.filter(Glossary.id == glossary_id, Glossary.user_id == user_id)
.first()
)
if not glossary:
raise GlossaryNotFoundError(
message="Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
details={"glossary_id": glossary_id}
)
return True
except GlossaryNotFoundError:
raise
except Exception as e:
logger.error(f"Error validating glossary access {glossary_id}: {e}")
raise GlossaryNotFoundError(
message="Erreur lors de la validation du glossaire.",
details={"glossary_id": glossary_id, "error": str(e)}
)
def format_glossary_for_prompt(terms: List[Dict[str, str]]) -> str:
"""
Format glossary terms for injection into an LLM system prompt.
The format is designed to be clear and unambiguous for LLMs:
- Clear header explaining the purpose
- Simple source → target format
- Explicit instruction to use these translations
Args:
terms: List of dictionaries with 'source' and 'target' keys
Returns:
Formatted string for LLM prompt
"""
if not terms:
return ""
# Sort terms by length (longest first) to avoid substring conflicts
# e.g., "machine learning" should match before "machine"
sorted_terms = sorted(terms, key=lambda t: len(t.get("source", "")), reverse=True)
lines = [
"TERMINOLOGY GLOSSARY (use these exact translations):",
""
]
for term in sorted_terms:
source = term.get("source", "").strip()
target = term.get("target", "").strip()
if source and target:
# Escape single quotes in terms for clarity
source_escaped = source.replace("'", "\\'")
target_escaped = target.replace("'", "\\'")
lines.append(f"- '{source_escaped}''{target_escaped}'")
lines.extend([
"",
"IMPORTANT: Always use these translations when the terms appear in the text."
])
return "\n".join(lines)
def build_full_prompt(
custom_prompt: Optional[str],
glossary_terms: Optional[List[Dict[str, str]]]
) -> str:
"""
Build the complete prompt combining custom prompt and glossary.
Args:
custom_prompt: Optional custom system prompt from user
glossary_terms: Optional list of glossary terms
Returns:
Combined prompt string
"""
parts = []
if custom_prompt:
parts.append(custom_prompt)
if glossary_terms:
glossary_prompt = format_glossary_for_prompt(glossary_terms)
if glossary_prompt:
parts.append(glossary_prompt)
return "\n\n".join(parts) if parts else ""