feat: revue de code, doc CODE_REVIEW, forfaits 2026, traduction LLM, providers avec modèle

Made-with: Cursor
2026-03-07 11:42:58 +01:00
parent 3d37ce4582
commit 473b3e26c7
181 changed files with 30617 additions and 7170 deletions
--- a/services/glossary_service.py
+++ b/services/glossary_service.py
@@ -0,0 +1,183 @@
+"""
+Glossary Service for Translation
+Story 3.10: Glossaires - Application lors Traduction LLM
+
+Provides functions to retrieve glossary terms and format them for LLM prompts.
+"""
+
+import logging
+from typing import List, Dict, Any, Optional
+
+from database.connection import get_sync_session
+from database.models import Glossary, GlossaryTerm
+from utils.exceptions import GlossaryNotFoundError
+
+logger = logging.getLogger(__name__)
+
+
+def get_glossary_terms(glossary_id: str, user_id: str) -> List[Dict[str, str]]:
+    """
+    Retrieve glossary terms for a specific glossary owned by a user.
+    
+    Args:
+        glossary_id: UUID of the glossary
+        user_id: UUID of the user (must own the glossary)
+    
+    Returns:
+        List of dictionaries with 'source' and 'target' keys
+        
+    Raises:
+        GlossaryNotFoundError: If glossary doesn't exist or doesn't belong to user
+    """
+    try:
+        with get_sync_session() as session:
+            glossary = (
+                session.query(Glossary)
+                .filter(Glossary.id == glossary_id, Glossary.user_id == user_id)
+                .first()
+            )
+            
+            if not glossary:
+                raise GlossaryNotFoundError(
+                    message="Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
+                    details={"glossary_id": glossary_id}
+                )
+            
+            # Get all terms for this glossary
+            terms = (
+                session.query(GlossaryTerm)
+                .filter(GlossaryTerm.glossary_id == glossary_id)
+                .all()
+            )
+            
+            # Format as list of dicts
+            result = [{"source": term.source, "target": term.target} for term in terms]
+            
+            logger.info(
+                f"Retrieved {len(result)} terms from glossary {glossary_id} for user {user_id}"
+            )
+            
+            return result
+            
+    except GlossaryNotFoundError:
+        raise
+    except Exception as e:
+        logger.error(f"Error retrieving glossary {glossary_id}: {e}")
+        raise GlossaryNotFoundError(
+            message="Erreur lors de la récupération du glossaire.",
+            details={"glossary_id": glossary_id, "error": str(e)}
+        )
+
+
+def validate_glossary_access(glossary_id: str, user_id: str) -> bool:
+    """
+    Validate that a glossary exists and belongs to the user.
+    
+    This is a lightweight check that doesn't return the terms,
+    useful for early validation before starting a translation job.
+    
+    Args:
+        glossary_id: UUID of the glossary
+        user_id: UUID of the user (must own the glossary)
+    
+    Returns:
+        True if glossary exists and belongs to user
+        
+    Raises:
+        GlossaryNotFoundError: If glossary doesn't exist or doesn't belong to user
+    """
+    try:
+        with get_sync_session() as session:
+            glossary = (
+                session.query(Glossary)
+                .filter(Glossary.id == glossary_id, Glossary.user_id == user_id)
+                .first()
+            )
+            
+            if not glossary:
+                raise GlossaryNotFoundError(
+                    message="Glossaire introuvable ou vous n'avez pas accès à cette ressource.",
+                    details={"glossary_id": glossary_id}
+                )
+            
+            return True
+            
+    except GlossaryNotFoundError:
+        raise
+    except Exception as e:
+        logger.error(f"Error validating glossary access {glossary_id}: {e}")
+        raise GlossaryNotFoundError(
+            message="Erreur lors de la validation du glossaire.",
+            details={"glossary_id": glossary_id, "error": str(e)}
+        )
+
+
+def format_glossary_for_prompt(terms: List[Dict[str, str]]) -> str:
+    """
+    Format glossary terms for injection into an LLM system prompt.
+    
+    The format is designed to be clear and unambiguous for LLMs:
+    - Clear header explaining the purpose
+    - Simple source → target format
+    - Explicit instruction to use these translations
+    
+    Args:
+        terms: List of dictionaries with 'source' and 'target' keys
+    
+    Returns:
+        Formatted string for LLM prompt
+    """
+    if not terms:
+        return ""
+    
+    # Sort terms by length (longest first) to avoid substring conflicts
+    # e.g., "machine learning" should match before "machine"
+    sorted_terms = sorted(terms, key=lambda t: len(t.get("source", "")), reverse=True)
+    
+    lines = [
+        "TERMINOLOGY GLOSSARY (use these exact translations):",
+        ""
+    ]
+    
+    for term in sorted_terms:
+        source = term.get("source", "").strip()
+        target = term.get("target", "").strip()
+        if source and target:
+            # Escape single quotes in terms for clarity
+            source_escaped = source.replace("'", "\\'")
+            target_escaped = target.replace("'", "\\'")
+            lines.append(f"- '{source_escaped}' → '{target_escaped}'")
+    
+    lines.extend([
+        "",
+        "IMPORTANT: Always use these translations when the terms appear in the text."
+    ])
+    
+    return "\n".join(lines)
+
+
+def build_full_prompt(
+    custom_prompt: Optional[str],
+    glossary_terms: Optional[List[Dict[str, str]]]
+) -> str:
+    """
+    Build the complete prompt combining custom prompt and glossary.
+    
+    Args:
+        custom_prompt: Optional custom system prompt from user
+        glossary_terms: Optional list of glossary terms
+    
+    Returns:
+        Combined prompt string
+    """
+    parts = []
+    
+    if custom_prompt:
+        parts.append(custom_prompt)
+    
+    if glossary_terms:
+        glossary_prompt = format_glossary_for_prompt(glossary_terms)
+        if glossary_prompt:
+            parts.append(glossary_prompt)
+    
+    return "\n\n".join(parts) if parts else ""