Implement chatbot response handling and improve error logging

This commit is contained in:
sepehr 2025-03-08 00:23:14 +01:00
parent e9a273d844
commit d4518a89dd

View File

@ -44,6 +44,19 @@ default_model = "llama3.2"
# Liste des modèles disponibles
AVAILABLE_MODELS = ["llama3.1", "llama3.2","deepseek-r1:7b", "deepseek-r1:14b"]
# Mapping des langues pour une meilleure compréhension par le LLM
LANGUAGE_MAPPING = {
"Français": "français",
"English": "English",
"Español": "español",
"Deutsch": "Deutsch",
"Italiano": "italiano",
"中文": "Chinese",
"日本語": "Japanese",
"العربية": "Arabic",
"فارسی": "Persian" # Added Persian language
}
# Initialiser le chatbot RAG avec le modèle par défaut
rag_bot = MultimodalRAGChatbot(
qdrant_url=qdrant_url,
@ -77,8 +90,30 @@ def change_model(model_name):
print(f"Erreur lors du changement de modèle: {e}")
return f"❌ Erreur: {str(e)}"
# Fonction pour changer de collection
def change_collection(collection_name):
global rag_bot, qdrant_collection_name
try:
# Mise à jour de la variable globale
qdrant_collection_name = collection_name
# Réinitialiser le chatbot avec la nouvelle collection
rag_bot = MultimodalRAGChatbot(
qdrant_url=qdrant_url,
qdrant_collection_name=collection_name,
ollama_model=rag_bot.llm.model, # Conserver le modèle actuel
embedding_model=embedding_model,
ollama_url=ollama_url
)
print(f"Collection changée pour: {collection_name}")
return f"✅ Collection changée pour: {collection_name}"
except Exception as e:
print(f"Erreur lors du changement de collection: {e}")
return f"❌ Erreur: {str(e)}"
# Fonction de traitement des requêtes avec support du streaming dans Gradio
def process_query(message, history, streaming, show_sources, max_images):
def process_query(message, history, streaming, show_sources, max_images, language):
global current_images, current_tables
if not message.strip():
@ -104,6 +139,8 @@ def process_query(message, history, streaming, show_sources, max_images):
prompt_template = ChatPromptTemplate.from_template("""
Tu es un assistant documentaire spécialisé qui utilise toutes les informations disponibles dans le contexte fourni.
TRÈS IMPORTANT: Tu dois répondre EXCLUSIVEMENT en {language}. Ne réponds JAMAIS dans une autre langue.
Instructions spécifiques:
1. Pour chaque image mentionnée dans le contexte, inclue TOUJOURS dans ta réponse:
- La légende/caption exacte de l'image
@ -120,10 +157,12 @@ def process_query(message, history, streaming, show_sources, max_images):
- Reproduis-les fidèlement sans modification
4. IMPORTANT: Ne pas inventer d'informations - si une donnée n'est pas explicitement fournie dans le contexte,
indique clairement "Cette information n'est pas disponible dans les documents fournis."
indique clairement que cette information n'est pas disponible dans les documents fournis.
5. Cite précisément les sources pour chaque élément d'information (format: [Source, Page]).
6. CRUCIAL: Ta réponse doit être UNIQUEMENT et INTÉGRALEMENT en {language}, quelle que soit la langue de la question.
Historique de conversation:
{chat_history}
@ -133,13 +172,15 @@ def process_query(message, history, streaming, show_sources, max_images):
Question: {question}
Réponds de façon structurée et précise en intégrant activement les images, tableaux et équations disponibles dans le contexte.
Ta réponse doit être exclusivement en {language}.
""")
# 4. Formater les messages pour le LLM
messages = prompt_template.format_messages(
chat_history=history_text,
context=context,
question=message
question=message,
language=LANGUAGE_MAPPING.get(language, "français") # Use the mapped language value
)
# 5. Créer un handler de streaming personnalisé
@ -295,15 +336,65 @@ def display_tables():
return None
html = ""
for table in current_tables:
for idx, table in enumerate(current_tables):
# Convert raw table data to a proper HTML table
table_data = table['data']
table_html = ""
# Try to convert the table data to a formatted HTML table
try:
# If it's a string representation, convert to DataFrame and then to HTML
if isinstance(table_data, str):
# Try to parse as markdown table or CSV
if '|' in table_data:
# Clean up the table data - remove extra pipes and spaces
rows = table_data.strip().split('\n')
table_html = '<div class="table-container"><table>'
for i, row in enumerate(rows):
# Skip separator rows (---|---) in markdown tables
if i == 1 and all(c in ':-|' for c in row):
continue
# Process each cell
cells = row.split('|')
# Remove empty cells from start/end (caused by leading/trailing |)
if cells and cells[0].strip() == '':
cells = cells[1:]
if cells and cells[-1].strip() == '':
cells = cells[:-1]
# Create table row
if cells:
is_header = (i == 0)
table_html += '<tr>'
for cell in cells:
cell_content = cell.strip()
if is_header:
table_html += f'<th>{cell_content}</th>'
else:
table_html += f'<td>{cell_content}</td>'
table_html += '</tr>'
table_html += '</table></div>'
else:
# If not pipe-separated, wrap in pre for code formatting
table_html = f'<pre>{table_data}</pre>'
else:
# For any other format, just use a pre tag
table_html = f'<pre>{table_data}</pre>'
except Exception as e:
# Fallback if conversion fails
print(f"Error formatting table {idx}: {e}")
table_html = f'<pre>{table_data}</pre>'
# Create the table container with metadata - REMOVED description
html += f"""
<div style="margin-bottom: 20px; border: 1px solid #ddd; padding: 15px; border-radius: 8px;">
<h3>{table['caption']}</h3>
<p style="color:#666; font-size:0.9em;">Source: {table['source']}, Page: {table['page']}</p>
<p><strong>Description:</strong> {table['description']}</p>
<div style="background-color:#f5f5f5; padding:10px; border-radius:5px; overflow:auto;">
<pre>{table['data']}</pre>
</div>
{table_html}
</div>
"""
@ -355,6 +446,25 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
)
model_status = gr.Markdown(f"Modèle actuel: **{default_model}**")
# Sélecteur de langue
language_selector = gr.Dropdown(
choices=["Français", "English", "Español", "Deutsch", "Italiano", "中文", "日本語", "العربية", "فارسی"],
value="Français",
label="Langue des réponses",
info="Choisir la langue dans laquelle l'assistant répondra"
)
# Sélecteur de collection Qdrant
collection_name_input = gr.Textbox(
value=qdrant_collection_name,
label="Collection Qdrant",
info="Nom de la collection de documents à utiliser"
)
collection_status = gr.Markdown(f"Collection actuelle: **{qdrant_collection_name}**")
# Apply collection button
apply_collection_btn = gr.Button("Appliquer la collection")
streaming = gr.Checkbox(
label="Mode streaming",
value=True,
@ -390,16 +500,23 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
outputs=model_status
)
# Connecter le changement de collection
apply_collection_btn.click(
fn=change_collection,
inputs=collection_name_input,
outputs=collection_status
)
# Configuration des actions
msg.submit(
process_query,
inputs=[msg, chat_interface, streaming, show_sources, max_images],
inputs=[msg, chat_interface, streaming, show_sources, max_images, language_selector],
outputs=[chat_interface, source_info, image_gallery, tables_display]
).then(lambda: "", outputs=msg)
submit_btn.click(
process_query,
inputs=[msg, chat_interface, streaming, show_sources, max_images],
inputs=[msg, chat_interface, streaming, show_sources, max_images, language_selector],
outputs=[chat_interface, source_info, image_gallery, tables_display]
).then(lambda: "", outputs=msg)
@ -415,53 +532,135 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
#chatbot {height: 600px; overflow-y: auto;}
#sources_info {margin-top: 10px; color: #666;}
/* Style pour les équations */
/* Improved styles for equations */
.katex { font-size: 1.1em !important; }
.math-inline { background: #f8f9fa; padding: 2px 5px; border-radius: 4px; }
.math-display { background: #f8f9fa; margin: 10px 0; padding: 10px; border-radius: 5px; overflow-x: auto; text-align: center; }
/* Table styles */
table {
border-collapse: collapse;
width: 100%;
margin: 15px 0;
font-size: 0.9em;
}
table, th, td {
border: 1px solid #ddd;
}
th, td {
padding: 8px 12px;
text-align: left;
}
th {
background-color: #f2f2f2;
}
tr:nth-child(even) {
background-color: #f9f9f9;
}
.table-container {
overflow-x: auto;
margin-top: 10px;
}
</style>
<!-- Chargement de KaTeX -->
<!-- Loading KaTeX -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/katex.min.css">
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/katex.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/contrib/auto-render.min.js"></script>
<script>
// Fonction pour rendre les équations avec KaTeX
// Function to process math equations with KaTeX
function renderMathInElement(element) {
if (!window.renderMathInElement) return;
window.renderMathInElement(element, {
delimiters: [
{left: '$$', right: '$$', display: true},
{left: '$', right: '$', display: false},
{left: '\\(', right: '\\)', display: false},
{left: '\\[', right: '\\]', display: true}
],
throwOnError: false,
trust: true,
strict: false
});
try {
window.renderMathInElement(element, {
delimiters: [
{left: '$$', right: '$$', display: true},
{left: '$', right: '$', display: false},
{left: '\\\\(', right: '\\\\)', display: false},
{left: '\\\\[', right: '\\\\]', display: true}
],
throwOnError: false,
trust: true,
strict: false,
macros: {
"\\\\R": "\\\\mathbb{R}",
"\\\\N": "\\\\mathbb{N}"
}
});
} catch (e) {
console.error("KaTeX rendering error:", e);
}
}
// Fonction pour remplacer les underscores échappés qui posent problème
function fixUnderscores(element) {
const messages = element.querySelectorAll('.message');
messages.forEach(msg => {
const text = msg.innerHTML;
// Remplacer les patterns comme u_(i) par u_{i} pour une meilleure compatibilité LaTeX
const fixed = text.replace(/([a-zA-Z])_\(([^)]+)\)/g, '$1_{$2}');
// Remplacer également les & qui peuvent causer des problèmes
const cleanAmpersand = fixed.replace(/&amp;/g, '');
if (text !== cleanAmpersand) {
msg.innerHTML = cleanAmpersand;
// Function to fix and prepare text for LaTeX rendering
function prepareTextForLatex(text) {
if (!text) return text;
// Don't modify code blocks
if (text.indexOf('<pre>') !== -1) {
const parts = text.split(/<pre>|<\/pre>/);
for (let i = 0; i < parts.length; i++) {
// Only process odd-indexed parts (non-code)
if (i % 2 === 0) {
parts[i] = prepareLatexInText(parts[i]);
}
}
});
return parts.join('');
}
return prepareLatexInText(text);
}
// Observer les changements dans le chat
// Helper to process LaTeX in regular text
function prepareLatexInText(text) {
// Make sure dollar signs used for math have proper spacing
// First, protect existing well-formed math expressions
text = text.replace(/(\\$\\$[^\\$]+\\$\\$)/g, '<protect>$1</protect>'); // protect display math
text = text.replace(/(\\$[^\\$\\n]+\\$)/g, '<protect>$1</protect>'); // protect inline math
// Fix common LaTeX formatting issues outside protected regions
text = text.replace(/([^<]protect[^>]*)(\\$)([^\\s])/g, '$1$2 $3'); // Add space after $ if needed
text = text.replace(/([^\\s])(\\$)([^<]protect[^>]*)/g, '$1 $2$3'); // Add space before $ if needed
// Handle subscripts: transform x_1 into x_{1} for better LaTeX compatibility
text = text.replace(/([a-zA-Z])_([0-9a-zA-Z])/g, '$1_{$2}');
// Restore protected content
text = text.replace(/<protect>(.*?)<\/protect>/g, '$1');
return text;
}
// Enhanced message processor for KaTeX rendering
function processMessage(message) {
if (!message) return;
try {
// Get direct textual content when possible
const elements = message.querySelectorAll('p, li, h1, h2, h3, h4, h5, span');
elements.forEach(el => {
const originalText = el.innerHTML;
const preparedText = prepareTextForLatex(originalText);
// Only update if changes were made
if (preparedText !== originalText) {
el.innerHTML = preparedText;
}
// Render equations in this element
renderMathInElement(el);
});
// Also try to render on the entire message as fallback
renderMathInElement(message);
} catch (e) {
console.error("Error processing message for LaTeX:", e);
}
}
// Function to monitor for new messages
function setupMathObserver() {
const chatElement = document.getElementById('chatbot');
if (!chatElement) {
@ -469,42 +668,51 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
return;
}
// Process any existing messages
chatElement.querySelectorAll('.message').forEach(processMessage);
// Set up observer for new content
const observer = new MutationObserver((mutations) => {
mutations.forEach(mutation => {
if (mutation.type === 'childList' || mutation.type === 'subtree') {
const messages = chatElement.querySelectorAll('.message');
if (messages.length > 0) {
// D'abord corriger les underscores problématiques
fixUnderscores(chatElement);
// Puis rendre les équations
messages.forEach(msg => {
renderMathInElement(msg);
});
}
for (const mutation of mutations) {
if (mutation.addedNodes.length > 0 || mutation.type === 'characterData') {
chatElement.querySelectorAll('.message').forEach(processMessage);
break;
}
});
}
});
observer.observe(chatElement, {
childList: true,
subtree: true,
characterData: true
subtree: true,
characterData: true
});
// Rendre les équations déjà présentes
renderMathInElement(document);
console.log("LaTeX rendering observer set up successfully");
}
// Initialisation lorsque la page est chargée
document.addEventListener('DOMContentLoaded', function() {
// Attendre que KaTeX soit chargé
// Initialize once the document is fully loaded
function initializeRendering() {
if (window.renderMathInElement) {
setupMathObserver();
} else {
// Attendre le chargement de KaTeX
document.querySelector('script[src*="auto-render.min.js"]').onload = setupMathObserver;
// If KaTeX isn't loaded yet, wait for it
const katexScript = document.querySelector('script[src*="auto-render.min.js"]');
if (katexScript) {
katexScript.onload = setupMathObserver;
} else {
// Last resort: try again later
setTimeout(initializeRendering, 500);
}
}
}
// Set up multiple trigger points to ensure it loads
document.addEventListener('DOMContentLoaded', function() {
setTimeout(initializeRendering, 800);
});
window.addEventListener('load', function() {
setTimeout(initializeRendering, 1200);
});
</script>
""")