Implement chatbot response handling and improve error logging

2025-03-08 00:23:14 +01:00 · 2025-03-08 00:23:14 +01:00 · d4518a89dd
commit d4518a89dd
parent e9a273d844
1 changed files with 269 additions and 61 deletions
--- a/gradio_chatbot.py
+++ b/gradio_chatbot.py
@ -44,6 +44,19 @@ default_model = "llama3.2"
 # Liste des modèles disponibles
 AVAILABLE_MODELS = ["llama3.1", "llama3.2","deepseek-r1:7b", "deepseek-r1:14b"]

+# Mapping des langues pour une meilleure compréhension par le LLM
+LANGUAGE_MAPPING = {
+    "Français": "français",
+    "English": "English",
+    "Español": "español",
+    "Deutsch": "Deutsch",
+    "Italiano": "italiano",
+    "中文": "Chinese",
+    "日本語": "Japanese",
+    "العربية": "Arabic",
+    "فارسی": "Persian"  # Added Persian language
+}
+
 # Initialiser le chatbot RAG avec le modèle par défaut
 rag_bot = MultimodalRAGChatbot(
    qdrant_url=qdrant_url,
@ -77,8 +90,30 @@ def change_model(model_name):
        print(f"Erreur lors du changement de modèle: {e}")
        return f"❌ Erreur: {str(e)}"

+# Fonction pour changer de collection
+def change_collection(collection_name):
+    global rag_bot, qdrant_collection_name
+    
+    try:
+        # Mise à jour de la variable globale
+        qdrant_collection_name = collection_name
+        
+        # Réinitialiser le chatbot avec la nouvelle collection
+        rag_bot = MultimodalRAGChatbot(
+            qdrant_url=qdrant_url,
+            qdrant_collection_name=collection_name,
+            ollama_model=rag_bot.llm.model,  # Conserver le modèle actuel
+            embedding_model=embedding_model,
+            ollama_url=ollama_url
+        )
+        print(f"Collection changée pour: {collection_name}")
+        return f"✅ Collection changée pour: {collection_name}"
+    except Exception as e:
+        print(f"Erreur lors du changement de collection: {e}")
+        return f"❌ Erreur: {str(e)}"
+
 # Fonction de traitement des requêtes avec support du streaming dans Gradio
-def process_query(message, history, streaming, show_sources, max_images):
+def process_query(message, history, streaming, show_sources, max_images, language):
    global current_images, current_tables
    
    if not message.strip():
@ -104,6 +139,8 @@ def process_query(message, history, streaming, show_sources, max_images):
            prompt_template = ChatPromptTemplate.from_template("""
            Tu es un assistant documentaire spécialisé qui utilise toutes les informations disponibles dans le contexte fourni.

+            TRÈS IMPORTANT: Tu dois répondre EXCLUSIVEMENT en {language}. Ne réponds JAMAIS dans une autre langue.
+
            Instructions spécifiques:
            1. Pour chaque image mentionnée dans le contexte, inclue TOUJOURS dans ta réponse:
            - La légende/caption exacte de l'image
@ -120,10 +157,12 @@ def process_query(message, history, streaming, show_sources, max_images):
            - Reproduis-les fidèlement sans modification

            4. IMPORTANT: Ne pas inventer d'informations - si une donnée n'est pas explicitement fournie dans le contexte,
-            indique clairement "Cette information n'est pas disponible dans les documents fournis."
+            indique clairement que cette information n'est pas disponible dans les documents fournis.

            5. Cite précisément les sources pour chaque élément d'information (format: [Source, Page]).

+            6. CRUCIAL: Ta réponse doit être UNIQUEMENT et INTÉGRALEMENT en {language}, quelle que soit la langue de la question.
+
            Historique de conversation:
            {chat_history}

@ -133,13 +172,15 @@ def process_query(message, history, streaming, show_sources, max_images):
            Question: {question}

            Réponds de façon structurée et précise en intégrant activement les images, tableaux et équations disponibles dans le contexte.
+            Ta réponse doit être exclusivement en {language}.
            """)
            
            # 4. Formater les messages pour le LLM
            messages = prompt_template.format_messages(
                chat_history=history_text,
                context=context,
-                question=message
+                question=message,
+                language=LANGUAGE_MAPPING.get(language, "français")  # Use the mapped language value
            )
            
            # 5. Créer un handler de streaming personnalisé
@ -295,15 +336,65 @@ def display_tables():
        return None
    
    html = ""
-    for table in current_tables:
+    for idx, table in enumerate(current_tables):
+        # Convert raw table data to a proper HTML table
+        table_data = table['data']
+        table_html = ""
+        
+        # Try to convert the table data to a formatted HTML table
+        try:
+            # If it's a string representation, convert to DataFrame and then to HTML
+            if isinstance(table_data, str):
+                # Try to parse as markdown table or CSV
+                if '|' in table_data:
+                    # Clean up the table data - remove extra pipes and spaces
+                    rows = table_data.strip().split('\n')
+                    table_html = '<div class="table-container"><table>'
+                    
+                    for i, row in enumerate(rows):
+                        # Skip separator rows (---|---) in markdown tables
+                        if i == 1 and all(c in ':-|' for c in row):
+                            continue
+                            
+                        # Process each cell
+                        cells = row.split('|')
+                        
+                        # Remove empty cells from start/end (caused by leading/trailing |)
+                        if cells and cells[0].strip() == '':
+                            cells = cells[1:]
+                        if cells and cells[-1].strip() == '':
+                            cells = cells[:-1]
+                        
+                        # Create table row
+                        if cells:
+                            is_header = (i == 0)
+                            table_html += '<tr>'
+                            for cell in cells:
+                                cell_content = cell.strip()
+                                if is_header:
+                                    table_html += f'<th>{cell_content}</th>'
+                                else:
+                                    table_html += f'<td>{cell_content}</td>'
+                            table_html += '</tr>'
+                    
+                    table_html += '</table></div>'
+                else:
+                    # If not pipe-separated, wrap in pre for code formatting
+                    table_html = f'<pre>{table_data}</pre>'
+            else:
+                # For any other format, just use a pre tag
+                table_html = f'<pre>{table_data}</pre>'
+        except Exception as e:
+            # Fallback if conversion fails
+            print(f"Error formatting table {idx}: {e}")
+            table_html = f'<pre>{table_data}</pre>'
+        
+        # Create the table container with metadata - REMOVED description
        html += f"""
        <div style="margin-bottom: 20px; border: 1px solid #ddd; padding: 15px; border-radius: 8px;">
            <h3>{table['caption']}</h3>
            <p style="color:#666; font-size:0.9em;">Source: {table['source']}, Page: {table['page']}</p>
-            <p><strong>Description:</strong> {table['description']}</p>
-            <div style="background-color:#f5f5f5; padding:10px; border-radius:5px; overflow:auto;">
-                <pre>{table['data']}</pre>
-            </div>
+            {table_html}
        </div>
        """
    
@ -355,6 +446,25 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
                )
                model_status = gr.Markdown(f"Modèle actuel: **{default_model}**")
                
+                # Sélecteur de langue
+                language_selector = gr.Dropdown(
+                    choices=["Français", "English", "Español", "Deutsch", "Italiano", "中文", "日本語", "العربية", "فارسی"],
+                    value="Français",
+                    label="Langue des réponses",
+                    info="Choisir la langue dans laquelle l'assistant répondra"
+                )
+                
+                # Sélecteur de collection Qdrant
+                collection_name_input = gr.Textbox(
+                    value=qdrant_collection_name,
+                    label="Collection Qdrant",
+                    info="Nom de la collection de documents à utiliser"
+                )
+                collection_status = gr.Markdown(f"Collection actuelle: **{qdrant_collection_name}**")
+                
+                # Apply collection button
+                apply_collection_btn = gr.Button("Appliquer la collection")
+                
                streaming = gr.Checkbox(
                    label="Mode streaming", 
                    value=True,
@ -390,16 +500,23 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
        outputs=model_status
    )
    
+    # Connecter le changement de collection
+    apply_collection_btn.click(
+        fn=change_collection,
+        inputs=collection_name_input,
+        outputs=collection_status
+    )
+    
    # Configuration des actions
    msg.submit(
        process_query,
-        inputs=[msg, chat_interface, streaming, show_sources, max_images],
+        inputs=[msg, chat_interface, streaming, show_sources, max_images, language_selector],
        outputs=[chat_interface, source_info, image_gallery, tables_display]
    ).then(lambda: "", outputs=msg)
    
    submit_btn.click(
        process_query,
-        inputs=[msg, chat_interface, streaming, show_sources, max_images],
+        inputs=[msg, chat_interface, streaming, show_sources, max_images, language_selector],
        outputs=[chat_interface, source_info, image_gallery, tables_display]
    ).then(lambda: "", outputs=msg)
    
@ -415,53 +532,135 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
    #chatbot {height: 600px; overflow-y: auto;}
    #sources_info {margin-top: 10px; color: #666;}
    
-    /* Style pour les équations */
+    /* Improved styles for equations */
    .katex { font-size: 1.1em !important; }
    .math-inline { background: #f8f9fa; padding: 2px 5px; border-radius: 4px; }
    .math-display { background: #f8f9fa; margin: 10px 0; padding: 10px; border-radius: 5px; overflow-x: auto; text-align: center; }
+    
+    /* Table styles */
+    table {
+        border-collapse: collapse;
+        width: 100%;
+        margin: 15px 0;
+        font-size: 0.9em;
+    }
+    table, th, td {
+        border: 1px solid #ddd;
+    }
+    th, td {
+        padding: 8px 12px;
+        text-align: left;
+    }
+    th {
+        background-color: #f2f2f2;
+    }
+    tr:nth-child(even) {
+        background-color: #f9f9f9;
+    }
+    .table-container {
+        overflow-x: auto;
+        margin-top: 10px;
+    }
    </style>
    
-    <!-- Chargement de KaTeX -->
+    <!-- Loading KaTeX -->
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/katex.min.css">
    <script src="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/katex.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/contrib/auto-render.min.js"></script>
    
    <script>
-    // Fonction pour rendre les équations avec KaTeX
+    // Function to process math equations with KaTeX
    function renderMathInElement(element) {
        if (!window.renderMathInElement) return;
        
-        window.renderMathInElement(element, {
-            delimiters: [
-                {left: '$$', right: '$$', display: true},
-                {left: '$', right: '$', display: false},
-                {left: '\\(', right: '\\)', display: false},
-                {left: '\\[', right: '\\]', display: true}
-            ],
-            throwOnError: false,
-            trust: true,
-            strict: false
-        });
+        try {
+            window.renderMathInElement(element, {
+                delimiters: [
+                    {left: '$$', right: '$$', display: true},
+                    {left: '$', right: '$', display: false},
+                    {left: '\\\\(', right: '\\\\)', display: false},
+                    {left: '\\\\[', right: '\\\\]', display: true}
+                ],
+                throwOnError: false,
+                trust: true,
+                strict: false,
+                macros: {
+                    "\\\\R": "\\\\mathbb{R}",
+                    "\\\\N": "\\\\mathbb{N}"
+                }
+            });
+        } catch (e) {
+            console.error("KaTeX rendering error:", e);
+        }
    }
    
-    // Fonction pour remplacer les underscores échappés qui posent problème
-    function fixUnderscores(element) {
-        const messages = element.querySelectorAll('.message');
-        messages.forEach(msg => {
-            const text = msg.innerHTML;
-            // Remplacer les patterns comme u_(i) par u_{i} pour une meilleure compatibilité LaTeX
-            const fixed = text.replace(/([a-zA-Z])_\(([^)]+)\)/g, '$1_{$2}');
-            
-            // Remplacer également les & qui peuvent causer des problèmes
-            const cleanAmpersand = fixed.replace(/&amp;/g, '');
-            
-            if (text !== cleanAmpersand) {
-                msg.innerHTML = cleanAmpersand;
+    // Function to fix and prepare text for LaTeX rendering
+    function prepareTextForLatex(text) {
+        if (!text) return text;
+        
+        // Don't modify code blocks
+        if (text.indexOf('<pre>') !== -1) {
+            const parts = text.split(/<pre>|<\/pre>/);
+            for (let i = 0; i < parts.length; i++) {
+                // Only process odd-indexed parts (non-code)
+                if (i % 2 === 0) {
+                    parts[i] = prepareLatexInText(parts[i]);
+                }
            }
-        });
+            return parts.join('');
+        }
+        
+        return prepareLatexInText(text);
    }
    
-    // Observer les changements dans le chat
+    // Helper to process LaTeX in regular text
+    function prepareLatexInText(text) {
+        // Make sure dollar signs used for math have proper spacing
+        // First, protect existing well-formed math expressions
+        text = text.replace(/(\\$\\$[^\\$]+\\$\\$)/g, '<protect>$1</protect>');  // protect display math
+        text = text.replace(/(\\$[^\\$\\n]+\\$)/g, '<protect>$1</protect>');    // protect inline math
+        
+        // Fix common LaTeX formatting issues outside protected regions
+        text = text.replace(/([^<]protect[^>]*)(\\$)([^\\s])/g, '$1$2 $3');  // Add space after $ if needed
+        text = text.replace(/([^\\s])(\\$)([^<]protect[^>]*)/g, '$1 $2$3');  // Add space before $ if needed
+        
+        // Handle subscripts: transform x_1 into x_{1} for better LaTeX compatibility
+        text = text.replace(/([a-zA-Z])_([0-9a-zA-Z])/g, '$1_{$2}');
+        
+        // Restore protected content
+        text = text.replace(/<protect>(.*?)<\/protect>/g, '$1');
+        
+        return text;
+    }
+    
+    // Enhanced message processor for KaTeX rendering
+    function processMessage(message) {
+        if (!message) return;
+        
+        try {
+            // Get direct textual content when possible
+            const elements = message.querySelectorAll('p, li, h1, h2, h3, h4, h5, span');
+            elements.forEach(el => {
+                const originalText = el.innerHTML;
+                const preparedText = prepareTextForLatex(originalText);
+                
+                // Only update if changes were made
+                if (preparedText !== originalText) {
+                    el.innerHTML = preparedText;
+                }
+                
+                // Render equations in this element
+                renderMathInElement(el);
+            });
+            
+            // Also try to render on the entire message as fallback
+            renderMathInElement(message);
+        } catch (e) {
+            console.error("Error processing message for LaTeX:", e);
+        }
+    }
+    
+    // Function to monitor for new messages
    function setupMathObserver() {
        const chatElement = document.getElementById('chatbot');
        if (!chatElement) {
@ -469,42 +668,51 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
            return;
        }
        
+        // Process any existing messages
+        chatElement.querySelectorAll('.message').forEach(processMessage);
+        
+        // Set up observer for new content
        const observer = new MutationObserver((mutations) => {
-            mutations.forEach(mutation => {
-                if (mutation.type === 'childList' || mutation.type === 'subtree') {
-                    const messages = chatElement.querySelectorAll('.message');
-                    if (messages.length > 0) {
-                        // D'abord corriger les underscores problématiques
-                        fixUnderscores(chatElement);
-                        
-                        // Puis rendre les équations
-                        messages.forEach(msg => {
-                            renderMathInElement(msg);
-                        });
-                    }
+            for (const mutation of mutations) {
+                if (mutation.addedNodes.length > 0 || mutation.type === 'characterData') {
+                    chatElement.querySelectorAll('.message').forEach(processMessage);
+                    break;
                }
-            });
+            }
        });
        
        observer.observe(chatElement, { 
            childList: true, 
-            subtree: true,
-            characterData: true
+            subtree: true, 
+            characterData: true 
        });
        
-        // Rendre les équations déjà présentes
-        renderMathInElement(document);
+        console.log("LaTeX rendering observer set up successfully");
    }
    
-    // Initialisation lorsque la page est chargée
-    document.addEventListener('DOMContentLoaded', function() {
-        // Attendre que KaTeX soit chargé
+    // Initialize once the document is fully loaded
+    function initializeRendering() {
        if (window.renderMathInElement) {
            setupMathObserver();
        } else {
-            // Attendre le chargement de KaTeX
-            document.querySelector('script[src*="auto-render.min.js"]').onload = setupMathObserver;
+            // If KaTeX isn't loaded yet, wait for it
+            const katexScript = document.querySelector('script[src*="auto-render.min.js"]');
+            if (katexScript) {
+                katexScript.onload = setupMathObserver;
+            } else {
+                // Last resort: try again later
+                setTimeout(initializeRendering, 500);
+            }
        }
+    }
+    
+    // Set up multiple trigger points to ensure it loads
+    document.addEventListener('DOMContentLoaded', function() {
+        setTimeout(initializeRendering, 800);
+    });
+    
+    window.addEventListener('load', function() {
+        setTimeout(initializeRendering, 1200);
    });
    </script>
    """)