diff --git a/__pycache__/rag_chatbot.cpython-311.pyc b/__pycache__/rag_chatbot.cpython-311.pyc
index d6f59ab..dda7696 100644
Binary files a/__pycache__/rag_chatbot.cpython-311.pyc and b/__pycache__/rag_chatbot.cpython-311.pyc differ
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..680783c
--- /dev/null
+++ b/app.py
@@ -0,0 +1,31 @@
+# filepath: f:\Dev\Rag\chat_bot_rag\app.py
+
+import gradio as gr
+from services.rag_service import initialize_rag_bot
+from components.chatbot import process_query, reset_conversation, change_model, change_collection
+from components.ui import build_interface, update_ui_language_elements
+
+def main():
+ """Main entry point for the chatbot application"""
+ # Initialize the RAG chatbot
+ initialize_rag_bot()
+
+ # Construire l'interface
+ interface = build_interface(
+ process_query_fn=process_query,
+ reset_conversation_fn=reset_conversation,
+ change_model_fn=change_model,
+ change_collection_fn=change_collection,
+ update_ui_language_fn=update_ui_language_elements # Ajout du paramètre manquant
+ )
+
+ # Lancer l'appli Gradio
+ interface.launch(
+ share=False,
+ inbrowser=True,
+ server_name="localhost",
+ server_port=7860
+ )
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/components/__init__.py b/components/__init__.py
new file mode 100644
index 0000000..c09ded8
--- /dev/null
+++ b/components/__init__.py
@@ -0,0 +1,2 @@
+from .chatbot import process_query, reset_conversation, change_model, change_collection
+from .callbacks import GradioStreamingHandler
diff --git a/components/callbacks.py b/components/callbacks.py
new file mode 100644
index 0000000..d060386
--- /dev/null
+++ b/components/callbacks.py
@@ -0,0 +1,12 @@
+import queue
+from langchain.callbacks.base import BaseCallbackHandler
+
+# Handler personnalisé pour capturer les tokens en streaming
+class GradioStreamingHandler(BaseCallbackHandler):
+ def __init__(self):
+ self.tokens_queue = queue.Queue()
+ self.full_text = ""
+
+ def on_llm_new_token(self, token, **kwargs):
+ self.tokens_queue.put(token)
+ self.full_text += token
\ No newline at end of file
diff --git a/components/chatbot.py b/components/chatbot.py
new file mode 100644
index 0000000..e6433c6
--- /dev/null
+++ b/components/chatbot.py
@@ -0,0 +1,385 @@
+import traceback
+import threading
+import queue
+from langchain.prompts import ChatPromptTemplate
+from langchain_ollama import ChatOllama
+from rag_chatbot import MultimodalRAGChatbot
+from config.settings import QDRANT_URL, QDRANT_COLLECTION_NAME, EMBEDDING_MODEL, OLLAMA_URL, DEFAULT_MODEL
+from translations.lang_mappings import LANGUAGE_MAPPING
+from utils.image_utils import base64_to_image
+from langchain.callbacks.base import BaseCallbackHandler
+import re
+
+def clean_llm_response(text):
+ """Nettoie la réponse du LLM en enlevant les balises de pensée et autres éléments non désirés."""
+ # Supprimer les blocs de pensée (... )
+ text = re.sub(r'.*? ', '', text, flags=re.DOTALL)
+ # Supprimer les espaces supplémentaires au début de la réponse
+ text = text.lstrip()
+ return text
+# Handler personnalisé pour le streaming
+class GradioStreamingHandler(BaseCallbackHandler):
+ def __init__(self):
+ self.tokens_queue = queue.Queue()
+ self.full_text = ""
+
+ def on_llm_new_token(self, token, **kwargs):
+ self.tokens_queue.put(token)
+ self.full_text += token
+
+# Initialiser le chatbot
+rag_bot = MultimodalRAGChatbot(
+ qdrant_url=QDRANT_URL,
+ qdrant_collection_name=QDRANT_COLLECTION_NAME,
+ ollama_model=DEFAULT_MODEL,
+ embedding_model=EMBEDDING_MODEL,
+ ollama_url=OLLAMA_URL
+)
+print(f"Chatbot initialisé avec modèle: {DEFAULT_MODEL}")
+
+# Variables globales
+current_images = []
+current_tables = []
+
+# Fonctions utilitaires
+def display_images(images_list=None):
+ """Crée une liste de tuples (image, caption) pour Gradio Gallery"""
+ images_to_use = images_list if images_list is not None else current_images
+
+ if not images_to_use:
+ return None
+
+ gallery = []
+ for img_data in images_to_use:
+ image = img_data["image"]
+ if image:
+ caption = f"{img_data['caption']} (Source: {img_data['source']}, Page: {img_data['page']})"
+ gallery.append((image, caption))
+
+ return gallery if gallery else None
+
+def display_tables(tables_list=None, language=None):
+ """Crée le HTML pour afficher les tableaux"""
+ tables_to_use = tables_list if tables_list is not None else current_tables
+
+ if not tables_to_use:
+ return None
+
+ html = ""
+ for idx, table in enumerate(tables_to_use):
+ table_data = table['data']
+ table_html = ""
+
+ try:
+ if isinstance(table_data, str):
+ if '|' in table_data:
+ rows = table_data.strip().split('\n')
+ table_html = '
'
+
+ for i, row in enumerate(rows):
+ if i == 1 and all(c in ':-|' for c in row):
+ continue
+
+ cells = row.split('|')
+
+ if cells and cells[0].strip() == '':
+ cells = cells[1:]
+ if cells and cells[-1].strip() == '':
+ cells = cells[:-1]
+
+ if cells:
+ is_header = (i == 0)
+ table_html += ''
+ for cell in cells:
+ cell_content = cell.strip()
+ if is_header:
+ table_html += f'{cell_content} '
+ else:
+ table_html += f'{cell_content} '
+ table_html += ' '
+
+ table_html += '
'
+ else:
+ table_html = f'{table_data} '
+ else:
+ table_html = f'{table_data} '
+ except Exception as e:
+ print(f"Error formatting table {idx}: {e}")
+ table_html = f'{table_data} '
+
+ html += f"""
+
+
{table.get('caption', 'Tableau')}
+
Source: {table.get('source', 'N/A')}, Page: {table.get('page', 'N/A')}
+
Description: {table.get('description', '')}
+ {table_html}
+
+ """
+
+ return html if html else None
+
+# Fonction pour changer de modèle
+def change_model(model_name, language="Français"):
+ global rag_bot
+
+ try:
+ rag_bot = MultimodalRAGChatbot(
+ qdrant_url=QDRANT_URL,
+ qdrant_collection_name=QDRANT_COLLECTION_NAME,
+ ollama_model=model_name,
+ embedding_model=EMBEDDING_MODEL,
+ ollama_url=OLLAMA_URL
+ )
+ print(f"Modèle changé pour: {model_name}")
+ return f"✅ Modèle changé pour: {model_name}"
+ except Exception as e:
+ print(f"Erreur lors du changement de modèle: {e}")
+ return f"❌ Erreur: {str(e)}"
+
+# Fonction pour changer de collection
+def change_collection(collection_name, language="Français"):
+ global rag_bot
+
+ try:
+ rag_bot = MultimodalRAGChatbot(
+ qdrant_url=QDRANT_URL,
+ qdrant_collection_name=collection_name,
+ ollama_model=rag_bot.llm.model,
+ embedding_model=EMBEDDING_MODEL,
+ ollama_url=OLLAMA_URL
+ )
+ print(f"Collection changée pour: {collection_name}")
+ return f"✅ Collection changée pour: {collection_name}"
+ except Exception as e:
+ print(f"Erreur lors du changement de collection: {e}")
+ return f"❌ Erreur: {str(e)}"
+
+# Fonction de traitement de requête
+def process_query(message, history, streaming, show_sources, max_images, language):
+ global current_images, current_tables
+
+ if not message.strip():
+ return history, "", None, None
+
+ current_images = []
+ current_tables = []
+ print(f"Traitement du message: {message}")
+ print(f"Streaming: {streaming}")
+
+ try:
+ if streaming:
+ # Version avec streaming dans Gradio
+ history = history + [(message, "")]
+
+ # 1. Récupérer les documents pertinents
+ docs = rag_bot._retrieve_relevant_documents(message)
+
+ # 2. Préparer le contexte et l'historique
+ context = rag_bot._format_documents(docs)
+ history_text = rag_bot._format_chat_history()
+
+ # 3. Préparer le prompt
+ prompt_template = ChatPromptTemplate.from_template("""
+ Tu es un assistant documentaire spécialisé qui utilise toutes les informations disponibles dans le contexte fourni.
+
+ TRÈS IMPORTANT: Tu dois répondre EXCLUSIVEMENT en {language}. Ne réponds JAMAIS dans une autre langue.
+
+ Instructions spécifiques:
+ 1. Pour chaque image mentionnée dans le contexte, inclue TOUJOURS dans ta réponse:
+ - La légende/caption exacte de l'image
+ - La source et le numéro de page
+ - Une description brève de ce qu'elle montre
+
+ 2. Pour chaque tableau mentionné dans le contexte, inclue TOUJOURS:
+ - Le titre/caption exact du tableau
+ - La source et le numéro de page
+ - Ce que contient et signifie le tableau
+
+ 3. Lorsque tu cites des équations mathématiques:
+ - Utilise la syntaxe LaTeX exacte comme dans le document ($...$ ou $$...$$)
+ - Reproduis-les fidèlement sans modification
+
+ 4. IMPORTANT: Ne pas inventer d'informations - si une donnée n'est pas explicitement fournie dans le contexte,
+ indique clairement que cette information n'est pas disponible dans les documents fournis.
+
+ 5. Cite précisément les sources pour chaque élément d'information (format: [Source, Page]).
+
+ 6. CRUCIAL: Ta réponse doit être UNIQUEMENT et INTÉGRALEMENT en {language}, quelle que soit la langue de la question.
+
+ Historique de conversation:
+ {chat_history}
+
+ Contexte (à utiliser pour répondre):
+ {context}
+
+ Question: {question}
+
+ Réponds de façon structurée et précise en intégrant activement les images, tableaux et équations disponibles dans le contexte.
+ Ta réponse doit être exclusivement en {language}.
+ """)
+
+ # 4. Formater les messages pour le LLM
+ messages = prompt_template.format_messages(
+ chat_history=history_text,
+ context=context,
+ question=message,
+ language=LANGUAGE_MAPPING.get(language, "français")
+ )
+
+ # 5. Créer un handler de streaming personnalisé
+ handler = GradioStreamingHandler()
+
+ # 6. Créer un modèle LLM avec notre handler
+ streaming_llm = ChatOllama(
+ model=rag_bot.llm.model,
+ base_url=rag_bot.llm.base_url,
+ streaming=True,
+ callbacks=[handler]
+ )
+
+ # 7. Lancer la génération dans un thread pour ne pas bloquer l'UI
+ def generate_response():
+ streaming_llm.invoke(messages)
+
+ thread = threading.Thread(target=generate_response)
+ thread.start()
+
+ # 8. Récupérer les tokens et mettre à jour l'interface
+ partial_response = ""
+
+ # Attendre les tokens avec un timeout
+ while thread.is_alive() or not handler.tokens_queue.empty():
+ try:
+ token = handler.tokens_queue.get(timeout=0.05)
+ partial_response += token
+
+ # Nettoyer la réponse uniquement pour l'affichage (pas pour l'historique interne)
+ clean_response = clean_llm_response(partial_response)
+ history[-1] = (message, clean_response)
+ yield history, "", None, None
+ except queue.Empty:
+ continue
+
+ # Après la boucle, nettoyer la réponse complète pour l'historique interne
+ partial_response = clean_llm_response(partial_response)
+ rag_bot.chat_history.append({"role": "user", "content": message})
+ rag_bot.chat_history.append({"role": "assistant", "content": partial_response})
+
+ # 10. Récupérer les sources, images, tableaux
+ texts, images, tables = rag_bot._process_documents(docs)
+
+ # Préparer les informations sur les sources
+ source_info = ""
+ if texts:
+ source_info += f"📚 {len(texts)} textes • "
+ if images:
+ source_info += f"🖼️ {len(images)} images • "
+ if tables:
+ source_info += f"📊 {len(tables)} tableaux"
+
+ if source_info:
+ source_info = "Sources trouvées: " + source_info
+
+ # 11. Traiter les images
+ if show_sources and images:
+ images = images[:max_images]
+ for img in images:
+ img_data = img.get("image_data")
+ if img_data:
+ image = base64_to_image(img_data)
+ if image:
+ current_images.append({
+ "image": image,
+ "caption": img.get("caption", ""),
+ "source": img.get("source", ""),
+ "page": img.get("page", ""),
+ "description": img.get("description", "")
+ })
+
+ # 12. Traiter les tableaux
+ if show_sources and tables:
+ for table in tables:
+ current_tables.append({
+ "data": rag_bot.format_table(table.get("table_data", "")),
+ "caption": table.get("caption", ""),
+ "source": table.get("source", ""),
+ "page": table.get("page", ""),
+ "description": table.get("description", "")
+ })
+
+ # 13. Retourner les résultats finaux
+ images_display = display_images()
+ tables_display = display_tables()
+ yield history, source_info, images_display, tables_display
+
+ else:
+ # Version sans streaming
+ print("Mode non-streaming activé")
+ source_info = ""
+
+ result = rag_bot.chat(message, stream=False)
+ # Nettoyer la réponse des balises
+ result["response"] = clean_llm_response(result["response"])
+ history = history + [(message, result["response"])]
+
+ # Mise à jour de l'historique interne
+ rag_bot.chat_history.append({"role": "user", "content": message})
+ rag_bot.chat_history.append({"role": "assistant", "content": result["response"]})
+
+ # Traiter les sources
+ if "texts" in result:
+ source_info += f"📚 {len(result['texts'])} textes • "
+ if "images" in result:
+ source_info += f"🖼️ {len(result['images'])} images • "
+ if "tables" in result:
+ source_info += f"📊 {len(result['tables'])} tableaux"
+
+ if source_info:
+ source_info = "Sources trouvées: " + source_info
+
+ # Traiter les images et tableaux
+ if show_sources and "images" in result and result["images"]:
+ images = result["images"][:max_images]
+ for img in images:
+ img_data = img.get("image_data")
+ if img_data:
+ image = base64_to_image(img_data)
+ if image:
+ current_images.append({
+ "image": image,
+ "caption": img.get("caption", ""),
+ "source": img.get("source", ""),
+ "page": img.get("page", ""),
+ "description": img.get("description", "")
+ })
+
+ if show_sources and "tables" in result and result["tables"]:
+ tables = result["tables"]
+ for table in tables:
+ current_tables.append({
+ "data": rag_bot.format_table(table.get("table_data", "")),
+ "caption": table.get("caption", ""),
+ "source": table.get("source", ""),
+ "page": table.get("page", ""),
+ "description": table.get("description", "")
+ })
+
+ yield history, source_info, display_images(), display_tables()
+
+ except Exception as e:
+ error_msg = f"Une erreur est survenue: {str(e)}"
+ traceback_text = traceback.format_exc()
+ print(error_msg)
+ print(traceback_text)
+ history = history + [(message, error_msg)]
+ yield history, "Erreur lors du traitement de la requête", None, None
+
+# Fonction pour réinitialiser la conversation
+def reset_conversation():
+ global current_images, current_tables
+ current_images = []
+ current_tables = []
+
+ rag_bot.clear_history()
+
+ return [], "", None, None
\ No newline at end of file
diff --git a/components/ui.py b/components/ui.py
new file mode 100644
index 0000000..78fb7e3
--- /dev/null
+++ b/components/ui.py
@@ -0,0 +1,198 @@
+import gradio as gr
+from config.settings import DEFAULT_MODEL, QDRANT_COLLECTION_NAME, AVAILABLE_MODELS
+from translations.lang_mappings import UI_TRANSLATIONS, UI_SUPPORTED_LANGUAGES
+from utils.katex_script import KATEX_CSS_JS
+
+def update_ui_language_elements(language):
+ """Met à jour les éléments de l'interface utilisateur en fonction de la langue sélectionnée"""
+ pass # Implémentez selon vos besoins
+
+def build_interface(
+ process_query_fn,
+ reset_conversation_fn,
+ change_model_fn,
+ change_collection_fn,
+ update_ui_language_fn
+):
+ """Construit l'interface utilisateur avec Gradio."""
+ with gr.Blocks(css=KATEX_CSS_JS, theme=gr.themes.Soft(primary_hue="blue")) as interface:
+ gr.Markdown("# 📚 Assistant documentaire intelligent")
+
+ with gr.Row():
+ with gr.Column(scale=2):
+ # Chatbot principal
+ chat_interface = gr.Chatbot(
+ height=600,
+ show_label=False,
+ layout="bubble",
+ elem_id="chatbot"
+ )
+
+ with gr.Row():
+ msg = gr.Textbox(
+ show_label=False,
+ placeholder="Posez votre question...",
+ container=False,
+ scale=4
+ )
+ submit_btn = gr.Button("Envoyer", variant="primary", scale=1)
+
+ clear_btn = gr.Button("Effacer la conversation")
+ source_info = gr.Markdown("", elem_id="sources_info")
+
+ with gr.Column(scale=1):
+ with gr.Accordion("Options", open=True):
+ # Sélecteur de modèle
+ model_selector = gr.Dropdown(
+ choices=AVAILABLE_MODELS,
+ value=DEFAULT_MODEL,
+ label="Modèle Ollama",
+ info="Choisir le modèle de language à utiliser"
+ )
+ model_status = gr.Markdown(f"Modèle actuel: **{DEFAULT_MODEL}**")
+
+ # Sélecteur de langue
+ language_selector = gr.Dropdown(
+ choices=UI_SUPPORTED_LANGUAGES,
+ value=UI_SUPPORTED_LANGUAGES[0],
+ label="Langue des réponses",
+ info="Choisir la langue dans laquelle l'assistant répondra"
+ )
+
+ # Sélecteur de collection Qdrant
+ collection_name_input = gr.Textbox(
+ value=QDRANT_COLLECTION_NAME,
+ label="Collection Qdrant",
+ info="Nom de la collection de documents à utiliser"
+ )
+ collection_status = gr.Markdown(f"Collection actuelle: **{QDRANT_COLLECTION_NAME}**")
+
+ # Bouton d'application de la collection
+ apply_collection_btn = gr.Button("Appliquer la collection")
+
+ streaming = gr.Checkbox(
+ label="Mode streaming",
+ value=True,
+ info="Voir les réponses s'afficher progressivement"
+ )
+ show_sources = gr.Checkbox(label="Afficher les sources", value=True)
+ max_images = gr.Slider(
+ minimum=1,
+ maximum=10,
+ value=3,
+ step=1,
+ label="Nombre max d'images"
+ )
+
+ gr.Markdown("---")
+
+ gr.Markdown("### 🖼️ Images pertinentes")
+ image_gallery = gr.Gallery(
+ label="Images pertinentes",
+ show_label=False,
+ columns=2,
+ height=300,
+ object_fit="contain"
+ )
+
+ gr.Markdown("### 📊 Tableaux")
+ tables_display = gr.HTML()
+
+ # Connecter le changement de modèle
+ model_selector.change(
+ fn=change_model_fn,
+ inputs=model_selector,
+ outputs=model_status
+ )
+
+ # Connecter le changement de collection
+ apply_collection_btn.click(
+ fn=change_collection_fn,
+ inputs=collection_name_input,
+ outputs=collection_status
+ )
+
+ # Fonction pour effacer l'entrée
+ def clear_input():
+ return ""
+
+ # Configuration des actions principales
+ msg.submit(
+ process_query_fn,
+ inputs=[msg, chat_interface, streaming, show_sources, max_images, language_selector],
+ outputs=[chat_interface, source_info, image_gallery, tables_display]
+ ).then(clear_input, None, msg)
+
+ submit_btn.click(
+ process_query_fn,
+ inputs=[msg, chat_interface, streaming, show_sources, max_images, language_selector],
+ outputs=[chat_interface, source_info, image_gallery, tables_display]
+ ).then(clear_input, None, msg)
+
+ clear_btn.click(
+ reset_conversation_fn,
+ outputs=[chat_interface, source_info, image_gallery, tables_display]
+ )
+
+ # Style KaTeX et amélioration du design
+ gr.Markdown("""
+
+
+
+
+
+
+
+
+ """)
+
+ return interface
\ No newline at end of file
diff --git a/config/settings.py b/config/settings.py
new file mode 100644
index 0000000..cbefde5
--- /dev/null
+++ b/config/settings.py
@@ -0,0 +1,18 @@
+import gradio as gr
+
+# Configuration settings for the application
+
+# URLs and connection settings
+QDRANT_URL = "http://localhost:6333"
+QDRANT_COLLECTION_NAME = "my_custom_collection"
+OLLAMA_URL = "http://127.0.0.1:11434"
+
+# Model settings
+EMBEDDING_MODEL = "mxbai-embed-large"
+DEFAULT_MODEL = "llama3.2"
+
+# Available models
+AVAILABLE_MODELS = ["llama3.1", "llama3.2", "deepseek-r1:7b", "deepseek-r1:14b"]
+
+# Default theme
+DEFAULT_THEME = gr.themes.Soft(primary_hue="blue")
diff --git a/config/translations.py b/config/translations.py
new file mode 100644
index 0000000..ffc95c3
--- /dev/null
+++ b/config/translations.py
@@ -0,0 +1,50 @@
+# Mapping des langues pour une meilleure compréhension par le LLM
+LANGUAGE_MAPPING = {
+ "Français": "français",
+ "English": "English",
+ "Español": "español",
+ "Deutsch": "Deutsch",
+ "Italiano": "italiano",
+ "中文": "Chinese",
+ "日本語": "Japanese",
+ "العربية": "Arabic",
+ "فارسی": "Persian"
+}
+
+# Dictionnaire de traductions pour l'interface
+UI_TRANSLATIONS = {
+ "Français": {
+ "title": "📚 Assistant documentaire intelligent",
+ "placeholder": "Posez votre question...",
+ "send_btn": "Envoyer",
+ "clear_btn": "Effacer la conversation",
+ "model_selector": "Modèle Ollama",
+ "model_info": "Choisir le modèle de language à utiliser",
+ "model_current": "Modèle actuel",
+ "language_selector": "Langue des réponses",
+ "language_info": "Choisir la langue dans laquelle l'assistant répondra",
+ "collection_input": "Collection Qdrant",
+ "collection_info": "Nom de la collection de documents à utiliser",
+ "collection_current": "Collection actuelle",
+ "apply_btn": "Appliquer la collection",
+ "streaming_label": "Mode streaming",
+ "streaming_info": "Voir les réponses s'afficher progressivement",
+ "sources_label": "Afficher les sources",
+ "max_images_label": "Nombre max d'images",
+ "images_title": "🖼️ Images pertinentes",
+ "tables_title": "📊 Tableaux",
+ "sources_found": "Sources trouvées",
+ "texts": "textes",
+ "images": "images",
+ "tables": "tableaux",
+ "error_msg": "Une erreur est survenue"
+ },
+ "English": {
+ "title": "📚 Intelligent Document Assistant",
+ "placeholder": "Ask your question...",
+ "send_btn": "Send",
+ "clear_btn": "Clear conversation",
+ # Ajoutez les autres traductions anglaises ici
+ }
+ # Ajoutez d'autres langues au besoin
+}
\ No newline at end of file
diff --git a/test_mistral.ipynb b/test_mistral.ipynb
new file mode 100644
index 0000000..c56da06
--- /dev/null
+++ b/test_mistral.ipynb
@@ -0,0 +1,142 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "SDKError",
+ "evalue": "API error occurred: Status 401\n{\n \"message\":\"Unauthorized\",\n \"request_id\":\"11390a73fd79bc1a934c5858569caa3a\"\n}",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+ "\u001b[31mSDKError\u001b[39m Traceback (most recent call last)",
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[11]\u001b[39m\u001b[32m, line 8\u001b[39m\n\u001b[32m 4\u001b[39m api_key =\u001b[33m\"\u001b[39m\u001b[33mxmM3IG80Y97Hg8kJVUPy1ijyIhmS2H9j\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 6\u001b[39m client = Mistral(api_key=api_key)\n\u001b[32m----> \u001b[39m\u001b[32m8\u001b[39m uploaded_pdf = \u001b[43mclient\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfiles\u001b[49m\u001b[43m.\u001b[49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 9\u001b[39m \u001b[43m \u001b[49m\u001b[43mfile\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m 10\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfile_name\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m11_chapitre3.pdf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[33;43mr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mF:\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43mDev\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43mRag\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43mRag_Modeling\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43mdocument\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43m11_chapitre3.pdf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrb\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 12\u001b[39m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 13\u001b[39m \u001b[43m \u001b[49m\u001b[43mpurpose\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mocr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m 14\u001b[39m \u001b[43m)\u001b[49m \n\u001b[32m 15\u001b[39m client.files.retrieve(file_id=uploaded_pdf.id) \n",
+ "\u001b[36mFile \u001b[39m\u001b[32mf:\\Dev\\Rag\\chat_bot_rag\\.venv\\Lib\\site-packages\\mistralai\\files.py:101\u001b[39m, in \u001b[36mFiles.upload\u001b[39m\u001b[34m(self, file, purpose, retries, server_url, timeout_ms, http_headers)\u001b[39m\n\u001b[32m 99\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m utils.match_response(http_res, \u001b[33m\"\u001b[39m\u001b[33m4XX\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m*\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 100\u001b[39m http_res_text = utils.stream_to_text(http_res)\n\u001b[32m--> \u001b[39m\u001b[32m101\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m models.SDKError(\n\u001b[32m 102\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mAPI error occurred\u001b[39m\u001b[33m\"\u001b[39m, http_res.status_code, http_res_text, http_res\n\u001b[32m 103\u001b[39m )\n\u001b[32m 104\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m utils.match_response(http_res, \u001b[33m\"\u001b[39m\u001b[33m5XX\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m*\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 105\u001b[39m http_res_text = utils.stream_to_text(http_res)\n",
+ "\u001b[31mSDKError\u001b[39m: API error occurred: Status 401\n{\n \"message\":\"Unauthorized\",\n \"request_id\":\"11390a73fd79bc1a934c5858569caa3a\"\n}"
+ ]
+ }
+ ],
+ "source": [
+ "from mistralai import Mistral\n",
+ "import os\n",
+ "\n",
+ "api_key =\"xmM3IG80Y97Hg8kJVUPy1ijyIhmS2H9j\"\n",
+ "\n",
+ "client = Mistral(api_key=api_key)\n",
+ "\n",
+ "uploaded_pdf = client.files.upload(\n",
+ " file={\n",
+ " \"file_name\": \"11_chapitre3.pdf\",\n",
+ " \"content\": open(r\"F:\\Dev\\Rag\\Rag_Modeling\\document\\11_chapitre3.pdf\", \"rb\"),\n",
+ " },\n",
+ " purpose=\"ocr\"\n",
+ ") \n",
+ "client.files.retrieve(file_id=uploaded_pdf.id) \n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "SDKError",
+ "evalue": "API error occurred: Status 401\n{\n \"message\":\"Unauthorized\",\n \"request_id\":\"bf40e3105e1f257ec16fc233e4d0396b\"\n}",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+ "\u001b[31mSDKError\u001b[39m Traceback (most recent call last)",
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m 1\u001b[39m model = \u001b[33m\"\u001b[39m\u001b[33mmistral-large-latest\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 3\u001b[39m client = Mistral(api_key=api_key)\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m chat_response = \u001b[43mclient\u001b[49m\u001b[43m.\u001b[49m\u001b[43mchat\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcomplete\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m 9\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrole\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 10\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mWhat is the best French cheese?\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 12\u001b[39m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[32m 13\u001b[39m \u001b[43m)\u001b[49m\n\u001b[32m 14\u001b[39m \u001b[38;5;28mprint\u001b[39m(chat_response.choices[\u001b[32m0\u001b[39m].message.content)\n",
+ "\u001b[36mFile \u001b[39m\u001b[32mf:\\Dev\\Rag\\chat_bot_rag\\.venv\\Lib\\site-packages\\mistralai\\chat.py:235\u001b[39m, in \u001b[36mChat.complete\u001b[39m\u001b[34m(self, model, messages, temperature, top_p, max_tokens, stream, stop, random_seed, response_format, tools, tool_choice, presence_penalty, frequency_penalty, n, prediction, safe_prompt, retries, server_url, timeout_ms, http_headers)\u001b[39m\n\u001b[32m 233\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m utils.match_response(http_res, \u001b[33m\"\u001b[39m\u001b[33m4XX\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m*\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 234\u001b[39m http_res_text = utils.stream_to_text(http_res)\n\u001b[32m--> \u001b[39m\u001b[32m235\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m models.SDKError(\n\u001b[32m 236\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mAPI error occurred\u001b[39m\u001b[33m\"\u001b[39m, http_res.status_code, http_res_text, http_res\n\u001b[32m 237\u001b[39m )\n\u001b[32m 238\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m utils.match_response(http_res, \u001b[33m\"\u001b[39m\u001b[33m5XX\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m*\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 239\u001b[39m http_res_text = utils.stream_to_text(http_res)\n",
+ "\u001b[31mSDKError\u001b[39m: API error occurred: Status 401\n{\n \"message\":\"Unauthorized\",\n \"request_id\":\"bf40e3105e1f257ec16fc233e4d0396b\"\n}"
+ ]
+ }
+ ],
+ "source": [
+ "model = \"mistral-large-latest\"\n",
+ "\n",
+ "client = Mistral(api_key=api_key)\n",
+ "\n",
+ "chat_response = client.chat.complete(\n",
+ " model= model,\n",
+ " messages = [\n",
+ " {\n",
+ " \"role\": \"user\",\n",
+ " \"content\": \"What is the best French cheese?\",\n",
+ " },\n",
+ " ]\n",
+ ")\n",
+ "print(chat_response.choices[0].message.content)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "<_io.BufferedReader name='F:\\\\Dev\\\\Rag\\\\Rag_Modeling\\\\document\\\\11_chapitre3.pdf'>"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "open(r\"F:\\Dev\\Rag\\Rag_Modeling\\document\\11_chapitre3.pdf\", \"rb\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MISTRAL_API_KEY: None\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "print(\"MISTRAL_API_KEY:\", repr(os.environ.get(\"MISTRAL_API_KEY\")))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/translations/lang_mappings.py b/translations/lang_mappings.py
new file mode 100644
index 0000000..5733043
--- /dev/null
+++ b/translations/lang_mappings.py
@@ -0,0 +1,105 @@
+# Mapping des langues pour une meilleure compréhension par le LLM
+LANGUAGE_MAPPING = {
+ "Français": "français",
+ "English": "English",
+ "Español": "español",
+ "Deutsch": "Deutsch",
+ "Italiano": "italiano",
+ "中文": "Chinese",
+ "日本語": "Japanese",
+ "العربية": "Arabic"
+}
+
+# Dictionnaire de traductions pour l'interface
+UI_TRANSLATIONS = {
+ "Français": {
+ "title": "📚 Assistant documentaire intelligent",
+ "placeholder": "Posez votre question...",
+ "send_btn": "Envoyer",
+ "clear_btn": "Effacer la conversation",
+ "model_selector": "Modèle Ollama",
+ "model_info": "Choisir le modèle de language à utiliser",
+ "model_current": "Modèle actuel",
+ "language_selector": "Langue des réponses",
+ "language_info": "Choisir la langue dans laquelle l'assistant répondra",
+ "collection_input": "Collection Qdrant",
+ "collection_info": "Nom de la collection de documents à utiliser",
+ "collection_current": "Collection actuelle",
+ "apply_btn": "Appliquer la collection",
+ "streaming_label": "Mode streaming",
+ "streaming_info": "Voir les réponses s'afficher progressivement",
+ "sources_label": "Afficher les sources",
+ "max_images_label": "Nombre max d'images",
+ "images_title": "🖼️ Images pertinentes",
+ "tables_title": "📊 Tableaux",
+ "sources_found": "Sources trouvées",
+ "texts": "textes",
+ "images": "images",
+ "tables": "tableaux",
+ "error_msg": "Une erreur est survenue",
+ "processing_error": "Erreur lors du traitement de la requête",
+ "table_translation": "Traduction",
+ "table_description": "Ce tableau présente des données sur"
+ },
+ "English": {
+ "title": "📚 Intelligent Document Assistant",
+ "placeholder": "Ask your question...",
+ "send_btn": "Send",
+ "clear_btn": "Clear conversation",
+ "model_selector": "Ollama Model",
+ "model_info": "Choose the language model to use",
+ "model_current": "Current model",
+ "language_selector": "Response language",
+ "language_info": "Choose the language in which the assistant will respond",
+ "collection_input": "Qdrant Collection",
+ "collection_info": "Name of the document collection to use",
+ "collection_current": "Current collection",
+ "apply_btn": "Apply collection",
+ "streaming_label": "Streaming mode",
+ "streaming_info": "See responses appear progressively",
+ "sources_label": "Show sources",
+ "max_images_label": "Maximum number of images",
+ "images_title": "🖼️ Relevant images",
+ "tables_title": "📊 Tables",
+ "sources_found": "Sources found",
+ "texts": "texts",
+ "images": "images",
+ "tables": "tables",
+ "error_msg": "An error occurred",
+ "processing_error": "Error processing request",
+ "table_translation": "Translation",
+ "table_description": "This table presents data on"
+ },
+ "Español": {
+ "title": "📚 Asistente documental inteligente",
+ "placeholder": "Haz tu pregunta...",
+ "send_btn": "Enviar",
+ "clear_btn": "Borrar conversación",
+ "model_selector": "Modelo Ollama",
+ "model_info": "Elegir el modelo de lenguaje a utilizar",
+ "model_current": "Modelo actual",
+ "language_selector": "Idioma de respuesta",
+ "language_info": "Elegir el idioma en el que responderá el asistente",
+ "collection_input": "Colección Qdrant",
+ "collection_info": "Nombre de la colección de documentos a utilizar",
+ "collection_current": "Colección actual",
+ "apply_btn": "Aplicar colección",
+ "streaming_label": "Modo streaming",
+ "streaming_info": "Ver las respuestas aparecer progresivamente",
+ "sources_label": "Mostrar fuentes",
+ "max_images_label": "Número máximo de imágenes",
+ "images_title": "🖼️ Imágenes relevantes",
+ "tables_title": "📊 Tablas",
+ "sources_found": "Fuentes encontradas",
+ "texts": "textos",
+ "images": "imágenes",
+ "tables": "tablas",
+ "error_msg": "Se ha producido un error",
+ "processing_error": "Error al procesar la solicitud",
+ "table_translation": "Traducción",
+ "table_description": "Esta tabla presenta datos sobre"
+ }
+}
+
+# Définir les langues supportées par l'interface
+UI_SUPPORTED_LANGUAGES = ["Français", "English", "Español"]
\ No newline at end of file
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..96be681
--- /dev/null
+++ b/utils/__init__.py
@@ -0,0 +1 @@
+# Package initialization for display utilities
diff --git a/utils/conversion.py b/utils/conversion.py
new file mode 100644
index 0000000..6635e89
--- /dev/null
+++ b/utils/conversion.py
@@ -0,0 +1,15 @@
+import base64
+from io import BytesIO
+from PIL import Image
+
+def base64_to_image(base64_data):
+ """Convertit une image base64 en objet Image pour l'affichage direct"""
+ try:
+ if not base64_data:
+ return None
+ image_bytes = base64.b64decode(base64_data)
+ image = Image.open(BytesIO(image_bytes))
+ return image
+ except Exception as e:
+ print(f"Erreur lors de la conversion d'image: {e}")
+ return None
\ No newline at end of file
diff --git a/utils/display.py b/utils/display.py
new file mode 100644
index 0000000..515152c
--- /dev/null
+++ b/utils/display.py
@@ -0,0 +1,40 @@
+from PIL import Image
+from io import BytesIO
+import base64
+
+def base64_to_image(base64_data):
+ """Convert base64 image data to PIL Image"""
+ try:
+ if not base64_data:
+ return None
+ image_bytes = base64.b64decode(base64_data)
+ return Image.open(BytesIO(image_bytes))
+ except Exception as e:
+ print(f"Image conversion error: {e}")
+ return None
+
+def display_images(current_images):
+ """Format images for Gradio gallery display"""
+ if not current_images:
+ return None
+ return [
+ (img["image"], f"{img['caption']} (Source: {img['source']}, Page: {img['page']})")
+ for img in current_images
+ if img.get("image")
+ ]
+
+def display_tables(current_tables):
+ """Format tables for HTML display"""
+ if not current_tables:
+ return None
+
+ html = ""
+ for table in current_tables:
+ html += f"""
+
+
{table['caption']}
+
Source: {table['source']}, Page: {table['page']}
+
{table.get('data', '')}
+
+ """
+ return html if html else None
diff --git a/utils/image_utils.py b/utils/image_utils.py
new file mode 100644
index 0000000..ba7937d
--- /dev/null
+++ b/utils/image_utils.py
@@ -0,0 +1,29 @@
+from io import BytesIO
+from PIL import Image
+import base64
+
+def base64_to_image(base64_data):
+ """Convertit une image base64 en objet Image pour l'affichage direct"""
+ try:
+ if not base64_data:
+ return None
+ image_bytes = base64.b64decode(base64_data)
+ image = Image.open(BytesIO(image_bytes))
+ return image
+ except Exception as e:
+ print(f"Erreur lors de la conversion d'image: {e}")
+ return None
+
+def display_images(current_images):
+ """Prépare les images pour l'affichage dans la galerie Gradio"""
+ if not current_images:
+ return None
+
+ gallery = []
+ for img_data in current_images:
+ image = img_data["image"]
+ if image:
+ caption = f"{img_data['caption']} (Source: {img_data['source']}, Page: {img_data['page']})"
+ gallery.append((image, caption))
+
+ return gallery if gallery else None
\ No newline at end of file
diff --git a/utils/katex_script.py b/utils/katex_script.py
new file mode 100644
index 0000000..7dec972
--- /dev/null
+++ b/utils/katex_script.py
@@ -0,0 +1,190 @@
+KATEX_CSS_JS = """
+
+
+
+
+
+
+
+
+"""
\ No newline at end of file
diff --git a/utils/table_utils.py b/utils/table_utils.py
new file mode 100644
index 0000000..3fa5b72
--- /dev/null
+++ b/utils/table_utils.py
@@ -0,0 +1,19 @@
+from translations.lang_mappings import UI_TRANSLATIONS
+
+def display_tables(current_tables, language=None):
+ """Version simplifiée qui ignore le paramètre language"""
+ if not current_tables:
+ return None
+
+ html = ""
+ for table in current_tables:
+ table_data = table.get('data', '')
+ html += f"""
+
+
{table.get('caption', 'Tableau')}
+
Source: {table.get('source', 'N/A')}, Page: {table.get('page', 'N/A')}
+
{table_data}
+
+ """
+
+ return html if html else None
\ No newline at end of file