Add utility modules and configuration settings for chatbot application
This commit is contained in:
parent
d4518a89dd
commit
cb43b1176f
Binary file not shown.
31
app.py
Normal file
31
app.py
Normal file
@ -0,0 +1,31 @@
|
||||
# filepath: f:\Dev\Rag\chat_bot_rag\app.py
|
||||
|
||||
import gradio as gr
|
||||
from services.rag_service import initialize_rag_bot
|
||||
from components.chatbot import process_query, reset_conversation, change_model, change_collection
|
||||
from components.ui import build_interface, update_ui_language_elements
|
||||
|
||||
def main():
|
||||
"""Main entry point for the chatbot application"""
|
||||
# Initialize the RAG chatbot
|
||||
initialize_rag_bot()
|
||||
|
||||
# Construire l'interface
|
||||
interface = build_interface(
|
||||
process_query_fn=process_query,
|
||||
reset_conversation_fn=reset_conversation,
|
||||
change_model_fn=change_model,
|
||||
change_collection_fn=change_collection,
|
||||
update_ui_language_fn=update_ui_language_elements # Ajout du paramètre manquant
|
||||
)
|
||||
|
||||
# Lancer l'appli Gradio
|
||||
interface.launch(
|
||||
share=False,
|
||||
inbrowser=True,
|
||||
server_name="localhost",
|
||||
server_port=7860
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
2
components/__init__.py
Normal file
2
components/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
from .chatbot import process_query, reset_conversation, change_model, change_collection
|
||||
from .callbacks import GradioStreamingHandler
|
||||
12
components/callbacks.py
Normal file
12
components/callbacks.py
Normal file
@ -0,0 +1,12 @@
|
||||
import queue
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
|
||||
# Handler personnalisé pour capturer les tokens en streaming
|
||||
class GradioStreamingHandler(BaseCallbackHandler):
|
||||
def __init__(self):
|
||||
self.tokens_queue = queue.Queue()
|
||||
self.full_text = ""
|
||||
|
||||
def on_llm_new_token(self, token, **kwargs):
|
||||
self.tokens_queue.put(token)
|
||||
self.full_text += token
|
||||
385
components/chatbot.py
Normal file
385
components/chatbot.py
Normal file
@ -0,0 +1,385 @@
|
||||
import traceback
|
||||
import threading
|
||||
import queue
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain_ollama import ChatOllama
|
||||
from rag_chatbot import MultimodalRAGChatbot
|
||||
from config.settings import QDRANT_URL, QDRANT_COLLECTION_NAME, EMBEDDING_MODEL, OLLAMA_URL, DEFAULT_MODEL
|
||||
from translations.lang_mappings import LANGUAGE_MAPPING
|
||||
from utils.image_utils import base64_to_image
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
import re
|
||||
|
||||
def clean_llm_response(text):
|
||||
"""Nettoie la réponse du LLM en enlevant les balises de pensée et autres éléments non désirés."""
|
||||
# Supprimer les blocs de pensée (<think>...</think>)
|
||||
text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
|
||||
# Supprimer les espaces supplémentaires au début de la réponse
|
||||
text = text.lstrip()
|
||||
return text
|
||||
# Handler personnalisé pour le streaming
|
||||
class GradioStreamingHandler(BaseCallbackHandler):
|
||||
def __init__(self):
|
||||
self.tokens_queue = queue.Queue()
|
||||
self.full_text = ""
|
||||
|
||||
def on_llm_new_token(self, token, **kwargs):
|
||||
self.tokens_queue.put(token)
|
||||
self.full_text += token
|
||||
|
||||
# Initialiser le chatbot
|
||||
rag_bot = MultimodalRAGChatbot(
|
||||
qdrant_url=QDRANT_URL,
|
||||
qdrant_collection_name=QDRANT_COLLECTION_NAME,
|
||||
ollama_model=DEFAULT_MODEL,
|
||||
embedding_model=EMBEDDING_MODEL,
|
||||
ollama_url=OLLAMA_URL
|
||||
)
|
||||
print(f"Chatbot initialisé avec modèle: {DEFAULT_MODEL}")
|
||||
|
||||
# Variables globales
|
||||
current_images = []
|
||||
current_tables = []
|
||||
|
||||
# Fonctions utilitaires
|
||||
def display_images(images_list=None):
|
||||
"""Crée une liste de tuples (image, caption) pour Gradio Gallery"""
|
||||
images_to_use = images_list if images_list is not None else current_images
|
||||
|
||||
if not images_to_use:
|
||||
return None
|
||||
|
||||
gallery = []
|
||||
for img_data in images_to_use:
|
||||
image = img_data["image"]
|
||||
if image:
|
||||
caption = f"{img_data['caption']} (Source: {img_data['source']}, Page: {img_data['page']})"
|
||||
gallery.append((image, caption))
|
||||
|
||||
return gallery if gallery else None
|
||||
|
||||
def display_tables(tables_list=None, language=None):
|
||||
"""Crée le HTML pour afficher les tableaux"""
|
||||
tables_to_use = tables_list if tables_list is not None else current_tables
|
||||
|
||||
if not tables_to_use:
|
||||
return None
|
||||
|
||||
html = ""
|
||||
for idx, table in enumerate(tables_to_use):
|
||||
table_data = table['data']
|
||||
table_html = ""
|
||||
|
||||
try:
|
||||
if isinstance(table_data, str):
|
||||
if '|' in table_data:
|
||||
rows = table_data.strip().split('\n')
|
||||
table_html = '<div class="table-container"><table>'
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
if i == 1 and all(c in ':-|' for c in row):
|
||||
continue
|
||||
|
||||
cells = row.split('|')
|
||||
|
||||
if cells and cells[0].strip() == '':
|
||||
cells = cells[1:]
|
||||
if cells and cells[-1].strip() == '':
|
||||
cells = cells[:-1]
|
||||
|
||||
if cells:
|
||||
is_header = (i == 0)
|
||||
table_html += '<tr>'
|
||||
for cell in cells:
|
||||
cell_content = cell.strip()
|
||||
if is_header:
|
||||
table_html += f'<th>{cell_content}</th>'
|
||||
else:
|
||||
table_html += f'<td>{cell_content}</td>'
|
||||
table_html += '</tr>'
|
||||
|
||||
table_html += '</table></div>'
|
||||
else:
|
||||
table_html = f'<pre>{table_data}</pre>'
|
||||
else:
|
||||
table_html = f'<pre>{table_data}</pre>'
|
||||
except Exception as e:
|
||||
print(f"Error formatting table {idx}: {e}")
|
||||
table_html = f'<pre>{table_data}</pre>'
|
||||
|
||||
html += f"""
|
||||
<div style="margin-bottom: 20px; border: 1px solid #ddd; padding: 15px; border-radius: 8px;">
|
||||
<h3>{table.get('caption', 'Tableau')}</h3>
|
||||
<p style="color:#666; font-size:0.9em;">Source: {table.get('source', 'N/A')}, Page: {table.get('page', 'N/A')}</p>
|
||||
<p><strong>Description:</strong> {table.get('description', '')}</p>
|
||||
{table_html}
|
||||
</div>
|
||||
"""
|
||||
|
||||
return html if html else None
|
||||
|
||||
# Fonction pour changer de modèle
|
||||
def change_model(model_name, language="Français"):
|
||||
global rag_bot
|
||||
|
||||
try:
|
||||
rag_bot = MultimodalRAGChatbot(
|
||||
qdrant_url=QDRANT_URL,
|
||||
qdrant_collection_name=QDRANT_COLLECTION_NAME,
|
||||
ollama_model=model_name,
|
||||
embedding_model=EMBEDDING_MODEL,
|
||||
ollama_url=OLLAMA_URL
|
||||
)
|
||||
print(f"Modèle changé pour: {model_name}")
|
||||
return f"✅ Modèle changé pour: {model_name}"
|
||||
except Exception as e:
|
||||
print(f"Erreur lors du changement de modèle: {e}")
|
||||
return f"❌ Erreur: {str(e)}"
|
||||
|
||||
# Fonction pour changer de collection
|
||||
def change_collection(collection_name, language="Français"):
|
||||
global rag_bot
|
||||
|
||||
try:
|
||||
rag_bot = MultimodalRAGChatbot(
|
||||
qdrant_url=QDRANT_URL,
|
||||
qdrant_collection_name=collection_name,
|
||||
ollama_model=rag_bot.llm.model,
|
||||
embedding_model=EMBEDDING_MODEL,
|
||||
ollama_url=OLLAMA_URL
|
||||
)
|
||||
print(f"Collection changée pour: {collection_name}")
|
||||
return f"✅ Collection changée pour: {collection_name}"
|
||||
except Exception as e:
|
||||
print(f"Erreur lors du changement de collection: {e}")
|
||||
return f"❌ Erreur: {str(e)}"
|
||||
|
||||
# Fonction de traitement de requête
|
||||
def process_query(message, history, streaming, show_sources, max_images, language):
|
||||
global current_images, current_tables
|
||||
|
||||
if not message.strip():
|
||||
return history, "", None, None
|
||||
|
||||
current_images = []
|
||||
current_tables = []
|
||||
print(f"Traitement du message: {message}")
|
||||
print(f"Streaming: {streaming}")
|
||||
|
||||
try:
|
||||
if streaming:
|
||||
# Version avec streaming dans Gradio
|
||||
history = history + [(message, "")]
|
||||
|
||||
# 1. Récupérer les documents pertinents
|
||||
docs = rag_bot._retrieve_relevant_documents(message)
|
||||
|
||||
# 2. Préparer le contexte et l'historique
|
||||
context = rag_bot._format_documents(docs)
|
||||
history_text = rag_bot._format_chat_history()
|
||||
|
||||
# 3. Préparer le prompt
|
||||
prompt_template = ChatPromptTemplate.from_template("""
|
||||
Tu es un assistant documentaire spécialisé qui utilise toutes les informations disponibles dans le contexte fourni.
|
||||
|
||||
TRÈS IMPORTANT: Tu dois répondre EXCLUSIVEMENT en {language}. Ne réponds JAMAIS dans une autre langue.
|
||||
|
||||
Instructions spécifiques:
|
||||
1. Pour chaque image mentionnée dans le contexte, inclue TOUJOURS dans ta réponse:
|
||||
- La légende/caption exacte de l'image
|
||||
- La source et le numéro de page
|
||||
- Une description brève de ce qu'elle montre
|
||||
|
||||
2. Pour chaque tableau mentionné dans le contexte, inclue TOUJOURS:
|
||||
- Le titre/caption exact du tableau
|
||||
- La source et le numéro de page
|
||||
- Ce que contient et signifie le tableau
|
||||
|
||||
3. Lorsque tu cites des équations mathématiques:
|
||||
- Utilise la syntaxe LaTeX exacte comme dans le document ($...$ ou $$...$$)
|
||||
- Reproduis-les fidèlement sans modification
|
||||
|
||||
4. IMPORTANT: Ne pas inventer d'informations - si une donnée n'est pas explicitement fournie dans le contexte,
|
||||
indique clairement que cette information n'est pas disponible dans les documents fournis.
|
||||
|
||||
5. Cite précisément les sources pour chaque élément d'information (format: [Source, Page]).
|
||||
|
||||
6. CRUCIAL: Ta réponse doit être UNIQUEMENT et INTÉGRALEMENT en {language}, quelle que soit la langue de la question.
|
||||
|
||||
Historique de conversation:
|
||||
{chat_history}
|
||||
|
||||
Contexte (à utiliser pour répondre):
|
||||
{context}
|
||||
|
||||
Question: {question}
|
||||
|
||||
Réponds de façon structurée et précise en intégrant activement les images, tableaux et équations disponibles dans le contexte.
|
||||
Ta réponse doit être exclusivement en {language}.
|
||||
""")
|
||||
|
||||
# 4. Formater les messages pour le LLM
|
||||
messages = prompt_template.format_messages(
|
||||
chat_history=history_text,
|
||||
context=context,
|
||||
question=message,
|
||||
language=LANGUAGE_MAPPING.get(language, "français")
|
||||
)
|
||||
|
||||
# 5. Créer un handler de streaming personnalisé
|
||||
handler = GradioStreamingHandler()
|
||||
|
||||
# 6. Créer un modèle LLM avec notre handler
|
||||
streaming_llm = ChatOllama(
|
||||
model=rag_bot.llm.model,
|
||||
base_url=rag_bot.llm.base_url,
|
||||
streaming=True,
|
||||
callbacks=[handler]
|
||||
)
|
||||
|
||||
# 7. Lancer la génération dans un thread pour ne pas bloquer l'UI
|
||||
def generate_response():
|
||||
streaming_llm.invoke(messages)
|
||||
|
||||
thread = threading.Thread(target=generate_response)
|
||||
thread.start()
|
||||
|
||||
# 8. Récupérer les tokens et mettre à jour l'interface
|
||||
partial_response = ""
|
||||
|
||||
# Attendre les tokens avec un timeout
|
||||
while thread.is_alive() or not handler.tokens_queue.empty():
|
||||
try:
|
||||
token = handler.tokens_queue.get(timeout=0.05)
|
||||
partial_response += token
|
||||
|
||||
# Nettoyer la réponse uniquement pour l'affichage (pas pour l'historique interne)
|
||||
clean_response = clean_llm_response(partial_response)
|
||||
history[-1] = (message, clean_response)
|
||||
yield history, "", None, None
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
# Après la boucle, nettoyer la réponse complète pour l'historique interne
|
||||
partial_response = clean_llm_response(partial_response)
|
||||
rag_bot.chat_history.append({"role": "user", "content": message})
|
||||
rag_bot.chat_history.append({"role": "assistant", "content": partial_response})
|
||||
|
||||
# 10. Récupérer les sources, images, tableaux
|
||||
texts, images, tables = rag_bot._process_documents(docs)
|
||||
|
||||
# Préparer les informations sur les sources
|
||||
source_info = ""
|
||||
if texts:
|
||||
source_info += f"📚 {len(texts)} textes • "
|
||||
if images:
|
||||
source_info += f"🖼️ {len(images)} images • "
|
||||
if tables:
|
||||
source_info += f"📊 {len(tables)} tableaux"
|
||||
|
||||
if source_info:
|
||||
source_info = "Sources trouvées: " + source_info
|
||||
|
||||
# 11. Traiter les images
|
||||
if show_sources and images:
|
||||
images = images[:max_images]
|
||||
for img in images:
|
||||
img_data = img.get("image_data")
|
||||
if img_data:
|
||||
image = base64_to_image(img_data)
|
||||
if image:
|
||||
current_images.append({
|
||||
"image": image,
|
||||
"caption": img.get("caption", ""),
|
||||
"source": img.get("source", ""),
|
||||
"page": img.get("page", ""),
|
||||
"description": img.get("description", "")
|
||||
})
|
||||
|
||||
# 12. Traiter les tableaux
|
||||
if show_sources and tables:
|
||||
for table in tables:
|
||||
current_tables.append({
|
||||
"data": rag_bot.format_table(table.get("table_data", "")),
|
||||
"caption": table.get("caption", ""),
|
||||
"source": table.get("source", ""),
|
||||
"page": table.get("page", ""),
|
||||
"description": table.get("description", "")
|
||||
})
|
||||
|
||||
# 13. Retourner les résultats finaux
|
||||
images_display = display_images()
|
||||
tables_display = display_tables()
|
||||
yield history, source_info, images_display, tables_display
|
||||
|
||||
else:
|
||||
# Version sans streaming
|
||||
print("Mode non-streaming activé")
|
||||
source_info = ""
|
||||
|
||||
result = rag_bot.chat(message, stream=False)
|
||||
# Nettoyer la réponse des balises <think>
|
||||
result["response"] = clean_llm_response(result["response"])
|
||||
history = history + [(message, result["response"])]
|
||||
|
||||
# Mise à jour de l'historique interne
|
||||
rag_bot.chat_history.append({"role": "user", "content": message})
|
||||
rag_bot.chat_history.append({"role": "assistant", "content": result["response"]})
|
||||
|
||||
# Traiter les sources
|
||||
if "texts" in result:
|
||||
source_info += f"📚 {len(result['texts'])} textes • "
|
||||
if "images" in result:
|
||||
source_info += f"🖼️ {len(result['images'])} images • "
|
||||
if "tables" in result:
|
||||
source_info += f"📊 {len(result['tables'])} tableaux"
|
||||
|
||||
if source_info:
|
||||
source_info = "Sources trouvées: " + source_info
|
||||
|
||||
# Traiter les images et tableaux
|
||||
if show_sources and "images" in result and result["images"]:
|
||||
images = result["images"][:max_images]
|
||||
for img in images:
|
||||
img_data = img.get("image_data")
|
||||
if img_data:
|
||||
image = base64_to_image(img_data)
|
||||
if image:
|
||||
current_images.append({
|
||||
"image": image,
|
||||
"caption": img.get("caption", ""),
|
||||
"source": img.get("source", ""),
|
||||
"page": img.get("page", ""),
|
||||
"description": img.get("description", "")
|
||||
})
|
||||
|
||||
if show_sources and "tables" in result and result["tables"]:
|
||||
tables = result["tables"]
|
||||
for table in tables:
|
||||
current_tables.append({
|
||||
"data": rag_bot.format_table(table.get("table_data", "")),
|
||||
"caption": table.get("caption", ""),
|
||||
"source": table.get("source", ""),
|
||||
"page": table.get("page", ""),
|
||||
"description": table.get("description", "")
|
||||
})
|
||||
|
||||
yield history, source_info, display_images(), display_tables()
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Une erreur est survenue: {str(e)}"
|
||||
traceback_text = traceback.format_exc()
|
||||
print(error_msg)
|
||||
print(traceback_text)
|
||||
history = history + [(message, error_msg)]
|
||||
yield history, "Erreur lors du traitement de la requête", None, None
|
||||
|
||||
# Fonction pour réinitialiser la conversation
|
||||
def reset_conversation():
|
||||
global current_images, current_tables
|
||||
current_images = []
|
||||
current_tables = []
|
||||
|
||||
rag_bot.clear_history()
|
||||
|
||||
return [], "", None, None
|
||||
198
components/ui.py
Normal file
198
components/ui.py
Normal file
@ -0,0 +1,198 @@
|
||||
import gradio as gr
|
||||
from config.settings import DEFAULT_MODEL, QDRANT_COLLECTION_NAME, AVAILABLE_MODELS
|
||||
from translations.lang_mappings import UI_TRANSLATIONS, UI_SUPPORTED_LANGUAGES
|
||||
from utils.katex_script import KATEX_CSS_JS
|
||||
|
||||
def update_ui_language_elements(language):
|
||||
"""Met à jour les éléments de l'interface utilisateur en fonction de la langue sélectionnée"""
|
||||
pass # Implémentez selon vos besoins
|
||||
|
||||
def build_interface(
|
||||
process_query_fn,
|
||||
reset_conversation_fn,
|
||||
change_model_fn,
|
||||
change_collection_fn,
|
||||
update_ui_language_fn
|
||||
):
|
||||
"""Construit l'interface utilisateur avec Gradio."""
|
||||
with gr.Blocks(css=KATEX_CSS_JS, theme=gr.themes.Soft(primary_hue="blue")) as interface:
|
||||
gr.Markdown("# 📚 Assistant documentaire intelligent")
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=2):
|
||||
# Chatbot principal
|
||||
chat_interface = gr.Chatbot(
|
||||
height=600,
|
||||
show_label=False,
|
||||
layout="bubble",
|
||||
elem_id="chatbot"
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
msg = gr.Textbox(
|
||||
show_label=False,
|
||||
placeholder="Posez votre question...",
|
||||
container=False,
|
||||
scale=4
|
||||
)
|
||||
submit_btn = gr.Button("Envoyer", variant="primary", scale=1)
|
||||
|
||||
clear_btn = gr.Button("Effacer la conversation")
|
||||
source_info = gr.Markdown("", elem_id="sources_info")
|
||||
|
||||
with gr.Column(scale=1):
|
||||
with gr.Accordion("Options", open=True):
|
||||
# Sélecteur de modèle
|
||||
model_selector = gr.Dropdown(
|
||||
choices=AVAILABLE_MODELS,
|
||||
value=DEFAULT_MODEL,
|
||||
label="Modèle Ollama",
|
||||
info="Choisir le modèle de language à utiliser"
|
||||
)
|
||||
model_status = gr.Markdown(f"Modèle actuel: **{DEFAULT_MODEL}**")
|
||||
|
||||
# Sélecteur de langue
|
||||
language_selector = gr.Dropdown(
|
||||
choices=UI_SUPPORTED_LANGUAGES,
|
||||
value=UI_SUPPORTED_LANGUAGES[0],
|
||||
label="Langue des réponses",
|
||||
info="Choisir la langue dans laquelle l'assistant répondra"
|
||||
)
|
||||
|
||||
# Sélecteur de collection Qdrant
|
||||
collection_name_input = gr.Textbox(
|
||||
value=QDRANT_COLLECTION_NAME,
|
||||
label="Collection Qdrant",
|
||||
info="Nom de la collection de documents à utiliser"
|
||||
)
|
||||
collection_status = gr.Markdown(f"Collection actuelle: **{QDRANT_COLLECTION_NAME}**")
|
||||
|
||||
# Bouton d'application de la collection
|
||||
apply_collection_btn = gr.Button("Appliquer la collection")
|
||||
|
||||
streaming = gr.Checkbox(
|
||||
label="Mode streaming",
|
||||
value=True,
|
||||
info="Voir les réponses s'afficher progressivement"
|
||||
)
|
||||
show_sources = gr.Checkbox(label="Afficher les sources", value=True)
|
||||
max_images = gr.Slider(
|
||||
minimum=1,
|
||||
maximum=10,
|
||||
value=3,
|
||||
step=1,
|
||||
label="Nombre max d'images"
|
||||
)
|
||||
|
||||
gr.Markdown("---")
|
||||
|
||||
gr.Markdown("### 🖼️ Images pertinentes")
|
||||
image_gallery = gr.Gallery(
|
||||
label="Images pertinentes",
|
||||
show_label=False,
|
||||
columns=2,
|
||||
height=300,
|
||||
object_fit="contain"
|
||||
)
|
||||
|
||||
gr.Markdown("### 📊 Tableaux")
|
||||
tables_display = gr.HTML()
|
||||
|
||||
# Connecter le changement de modèle
|
||||
model_selector.change(
|
||||
fn=change_model_fn,
|
||||
inputs=model_selector,
|
||||
outputs=model_status
|
||||
)
|
||||
|
||||
# Connecter le changement de collection
|
||||
apply_collection_btn.click(
|
||||
fn=change_collection_fn,
|
||||
inputs=collection_name_input,
|
||||
outputs=collection_status
|
||||
)
|
||||
|
||||
# Fonction pour effacer l'entrée
|
||||
def clear_input():
|
||||
return ""
|
||||
|
||||
# Configuration des actions principales
|
||||
msg.submit(
|
||||
process_query_fn,
|
||||
inputs=[msg, chat_interface, streaming, show_sources, max_images, language_selector],
|
||||
outputs=[chat_interface, source_info, image_gallery, tables_display]
|
||||
).then(clear_input, None, msg)
|
||||
|
||||
submit_btn.click(
|
||||
process_query_fn,
|
||||
inputs=[msg, chat_interface, streaming, show_sources, max_images, language_selector],
|
||||
outputs=[chat_interface, source_info, image_gallery, tables_display]
|
||||
).then(clear_input, None, msg)
|
||||
|
||||
clear_btn.click(
|
||||
reset_conversation_fn,
|
||||
outputs=[chat_interface, source_info, image_gallery, tables_display]
|
||||
)
|
||||
|
||||
# Style KaTeX et amélioration du design
|
||||
gr.Markdown("""
|
||||
<style>
|
||||
.gradio-container {max-width: 1200px !important}
|
||||
#chatbot {height: 600px; overflow-y: auto;}
|
||||
#sources_info {margin-top: 10px; color: #666;}
|
||||
|
||||
/* Improved styles for equations */
|
||||
.katex { font-size: 1.1em !important; }
|
||||
.math-inline { background: #f8f9fa; padding: 2px 5px; border-radius: 4px; }
|
||||
.math-display { background: #f8f9f9; margin: 10px 0; padding: 10px; border-radius: 5px; overflow-x: auto; text-align: center; }
|
||||
|
||||
/* Table styles */
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 15px 0;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
table, th, td {
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
th, td {
|
||||
padding: 8px 12px;
|
||||
text-align: left;
|
||||
}
|
||||
th {
|
||||
background-color: #f2f2f2;
|
||||
}
|
||||
tr:nth-child(even) {
|
||||
background-color: #f9f9f9;
|
||||
}
|
||||
.table-container {
|
||||
overflow-x: auto;
|
||||
margin-top: 10px;
|
||||
}
|
||||
</style>
|
||||
|
||||
<!-- Loading KaTeX -->
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/katex.min.css">
|
||||
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/katex.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/contrib/auto-render.min.js"></script>
|
||||
|
||||
<script>
|
||||
// Script pour rendre les équations mathématiques avec KaTeX
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
setTimeout(function() {
|
||||
if (window.renderMathInElement) {
|
||||
renderMathInElement(document.body, {
|
||||
delimiters: [
|
||||
{left: '$$', right: '$$', display: true},
|
||||
{left: '$', right: '$', display: false}
|
||||
],
|
||||
throwOnError: false
|
||||
});
|
||||
}
|
||||
}, 1000);
|
||||
});
|
||||
</script>
|
||||
""")
|
||||
|
||||
return interface
|
||||
18
config/settings.py
Normal file
18
config/settings.py
Normal file
@ -0,0 +1,18 @@
|
||||
import gradio as gr
|
||||
|
||||
# Configuration settings for the application
|
||||
|
||||
# URLs and connection settings
|
||||
QDRANT_URL = "http://localhost:6333"
|
||||
QDRANT_COLLECTION_NAME = "my_custom_collection"
|
||||
OLLAMA_URL = "http://127.0.0.1:11434"
|
||||
|
||||
# Model settings
|
||||
EMBEDDING_MODEL = "mxbai-embed-large"
|
||||
DEFAULT_MODEL = "llama3.2"
|
||||
|
||||
# Available models
|
||||
AVAILABLE_MODELS = ["llama3.1", "llama3.2", "deepseek-r1:7b", "deepseek-r1:14b"]
|
||||
|
||||
# Default theme
|
||||
DEFAULT_THEME = gr.themes.Soft(primary_hue="blue")
|
||||
50
config/translations.py
Normal file
50
config/translations.py
Normal file
@ -0,0 +1,50 @@
|
||||
# Mapping des langues pour une meilleure compréhension par le LLM
|
||||
LANGUAGE_MAPPING = {
|
||||
"Français": "français",
|
||||
"English": "English",
|
||||
"Español": "español",
|
||||
"Deutsch": "Deutsch",
|
||||
"Italiano": "italiano",
|
||||
"中文": "Chinese",
|
||||
"日本語": "Japanese",
|
||||
"العربية": "Arabic",
|
||||
"فارسی": "Persian"
|
||||
}
|
||||
|
||||
# Dictionnaire de traductions pour l'interface
|
||||
UI_TRANSLATIONS = {
|
||||
"Français": {
|
||||
"title": "📚 Assistant documentaire intelligent",
|
||||
"placeholder": "Posez votre question...",
|
||||
"send_btn": "Envoyer",
|
||||
"clear_btn": "Effacer la conversation",
|
||||
"model_selector": "Modèle Ollama",
|
||||
"model_info": "Choisir le modèle de language à utiliser",
|
||||
"model_current": "Modèle actuel",
|
||||
"language_selector": "Langue des réponses",
|
||||
"language_info": "Choisir la langue dans laquelle l'assistant répondra",
|
||||
"collection_input": "Collection Qdrant",
|
||||
"collection_info": "Nom de la collection de documents à utiliser",
|
||||
"collection_current": "Collection actuelle",
|
||||
"apply_btn": "Appliquer la collection",
|
||||
"streaming_label": "Mode streaming",
|
||||
"streaming_info": "Voir les réponses s'afficher progressivement",
|
||||
"sources_label": "Afficher les sources",
|
||||
"max_images_label": "Nombre max d'images",
|
||||
"images_title": "🖼️ Images pertinentes",
|
||||
"tables_title": "📊 Tableaux",
|
||||
"sources_found": "Sources trouvées",
|
||||
"texts": "textes",
|
||||
"images": "images",
|
||||
"tables": "tableaux",
|
||||
"error_msg": "Une erreur est survenue"
|
||||
},
|
||||
"English": {
|
||||
"title": "📚 Intelligent Document Assistant",
|
||||
"placeholder": "Ask your question...",
|
||||
"send_btn": "Send",
|
||||
"clear_btn": "Clear conversation",
|
||||
# Ajoutez les autres traductions anglaises ici
|
||||
}
|
||||
# Ajoutez d'autres langues au besoin
|
||||
}
|
||||
142
test_mistral.ipynb
Normal file
142
test_mistral.ipynb
Normal file
@ -0,0 +1,142 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "SDKError",
|
||||
"evalue": "API error occurred: Status 401\n{\n \"message\":\"Unauthorized\",\n \"request_id\":\"11390a73fd79bc1a934c5858569caa3a\"\n}",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mSDKError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[11]\u001b[39m\u001b[32m, line 8\u001b[39m\n\u001b[32m 4\u001b[39m api_key =\u001b[33m\"\u001b[39m\u001b[33mxmM3IG80Y97Hg8kJVUPy1ijyIhmS2H9j\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 6\u001b[39m client = Mistral(api_key=api_key)\n\u001b[32m----> \u001b[39m\u001b[32m8\u001b[39m uploaded_pdf = \u001b[43mclient\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfiles\u001b[49m\u001b[43m.\u001b[49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 9\u001b[39m \u001b[43m \u001b[49m\u001b[43mfile\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m 10\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfile_name\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m11_chapitre3.pdf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[33;43mr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mF:\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43mDev\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43mRag\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43mRag_Modeling\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43mdocument\u001b[39;49m\u001b[33;43m\\\u001b[39;49m\u001b[33;43m11_chapitre3.pdf\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrb\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 12\u001b[39m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 13\u001b[39m \u001b[43m \u001b[49m\u001b[43mpurpose\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mocr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m 14\u001b[39m \u001b[43m)\u001b[49m \n\u001b[32m 15\u001b[39m client.files.retrieve(file_id=uploaded_pdf.id) \n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32mf:\\Dev\\Rag\\chat_bot_rag\\.venv\\Lib\\site-packages\\mistralai\\files.py:101\u001b[39m, in \u001b[36mFiles.upload\u001b[39m\u001b[34m(self, file, purpose, retries, server_url, timeout_ms, http_headers)\u001b[39m\n\u001b[32m 99\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m utils.match_response(http_res, \u001b[33m\"\u001b[39m\u001b[33m4XX\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m*\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 100\u001b[39m http_res_text = utils.stream_to_text(http_res)\n\u001b[32m--> \u001b[39m\u001b[32m101\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m models.SDKError(\n\u001b[32m 102\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mAPI error occurred\u001b[39m\u001b[33m\"\u001b[39m, http_res.status_code, http_res_text, http_res\n\u001b[32m 103\u001b[39m )\n\u001b[32m 104\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m utils.match_response(http_res, \u001b[33m\"\u001b[39m\u001b[33m5XX\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m*\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 105\u001b[39m http_res_text = utils.stream_to_text(http_res)\n",
|
||||
"\u001b[31mSDKError\u001b[39m: API error occurred: Status 401\n{\n \"message\":\"Unauthorized\",\n \"request_id\":\"11390a73fd79bc1a934c5858569caa3a\"\n}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from mistralai import Mistral\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"api_key =\"xmM3IG80Y97Hg8kJVUPy1ijyIhmS2H9j\"\n",
|
||||
"\n",
|
||||
"client = Mistral(api_key=api_key)\n",
|
||||
"\n",
|
||||
"uploaded_pdf = client.files.upload(\n",
|
||||
" file={\n",
|
||||
" \"file_name\": \"11_chapitre3.pdf\",\n",
|
||||
" \"content\": open(r\"F:\\Dev\\Rag\\Rag_Modeling\\document\\11_chapitre3.pdf\", \"rb\"),\n",
|
||||
" },\n",
|
||||
" purpose=\"ocr\"\n",
|
||||
") \n",
|
||||
"client.files.retrieve(file_id=uploaded_pdf.id) \n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "SDKError",
|
||||
"evalue": "API error occurred: Status 401\n{\n \"message\":\"Unauthorized\",\n \"request_id\":\"bf40e3105e1f257ec16fc233e4d0396b\"\n}",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mSDKError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m 1\u001b[39m model = \u001b[33m\"\u001b[39m\u001b[33mmistral-large-latest\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 3\u001b[39m client = Mistral(api_key=api_key)\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m chat_response = \u001b[43mclient\u001b[49m\u001b[43m.\u001b[49m\u001b[43mchat\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcomplete\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m 9\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrole\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 10\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mWhat is the best French cheese?\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 12\u001b[39m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[32m 13\u001b[39m \u001b[43m)\u001b[49m\n\u001b[32m 14\u001b[39m \u001b[38;5;28mprint\u001b[39m(chat_response.choices[\u001b[32m0\u001b[39m].message.content)\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32mf:\\Dev\\Rag\\chat_bot_rag\\.venv\\Lib\\site-packages\\mistralai\\chat.py:235\u001b[39m, in \u001b[36mChat.complete\u001b[39m\u001b[34m(self, model, messages, temperature, top_p, max_tokens, stream, stop, random_seed, response_format, tools, tool_choice, presence_penalty, frequency_penalty, n, prediction, safe_prompt, retries, server_url, timeout_ms, http_headers)\u001b[39m\n\u001b[32m 233\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m utils.match_response(http_res, \u001b[33m\"\u001b[39m\u001b[33m4XX\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m*\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 234\u001b[39m http_res_text = utils.stream_to_text(http_res)\n\u001b[32m--> \u001b[39m\u001b[32m235\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m models.SDKError(\n\u001b[32m 236\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mAPI error occurred\u001b[39m\u001b[33m\"\u001b[39m, http_res.status_code, http_res_text, http_res\n\u001b[32m 237\u001b[39m )\n\u001b[32m 238\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m utils.match_response(http_res, \u001b[33m\"\u001b[39m\u001b[33m5XX\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m*\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m 239\u001b[39m http_res_text = utils.stream_to_text(http_res)\n",
|
||||
"\u001b[31mSDKError\u001b[39m: API error occurred: Status 401\n{\n \"message\":\"Unauthorized\",\n \"request_id\":\"bf40e3105e1f257ec16fc233e4d0396b\"\n}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = \"mistral-large-latest\"\n",
|
||||
"\n",
|
||||
"client = Mistral(api_key=api_key)\n",
|
||||
"\n",
|
||||
"chat_response = client.chat.complete(\n",
|
||||
" model= model,\n",
|
||||
" messages = [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": \"What is the best French cheese?\",\n",
|
||||
" },\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"print(chat_response.choices[0].message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<_io.BufferedReader name='F:\\\\Dev\\\\Rag\\\\Rag_Modeling\\\\document\\\\11_chapitre3.pdf'>"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"open(r\"F:\\Dev\\Rag\\Rag_Modeling\\document\\11_chapitre3.pdf\", \"rb\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"MISTRAL_API_KEY: None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"print(\"MISTRAL_API_KEY:\", repr(os.environ.get(\"MISTRAL_API_KEY\")))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
105
translations/lang_mappings.py
Normal file
105
translations/lang_mappings.py
Normal file
@ -0,0 +1,105 @@
|
||||
# Mapping des langues pour une meilleure compréhension par le LLM
|
||||
LANGUAGE_MAPPING = {
|
||||
"Français": "français",
|
||||
"English": "English",
|
||||
"Español": "español",
|
||||
"Deutsch": "Deutsch",
|
||||
"Italiano": "italiano",
|
||||
"中文": "Chinese",
|
||||
"日本語": "Japanese",
|
||||
"العربية": "Arabic"
|
||||
}
|
||||
|
||||
# Dictionnaire de traductions pour l'interface
|
||||
UI_TRANSLATIONS = {
|
||||
"Français": {
|
||||
"title": "📚 Assistant documentaire intelligent",
|
||||
"placeholder": "Posez votre question...",
|
||||
"send_btn": "Envoyer",
|
||||
"clear_btn": "Effacer la conversation",
|
||||
"model_selector": "Modèle Ollama",
|
||||
"model_info": "Choisir le modèle de language à utiliser",
|
||||
"model_current": "Modèle actuel",
|
||||
"language_selector": "Langue des réponses",
|
||||
"language_info": "Choisir la langue dans laquelle l'assistant répondra",
|
||||
"collection_input": "Collection Qdrant",
|
||||
"collection_info": "Nom de la collection de documents à utiliser",
|
||||
"collection_current": "Collection actuelle",
|
||||
"apply_btn": "Appliquer la collection",
|
||||
"streaming_label": "Mode streaming",
|
||||
"streaming_info": "Voir les réponses s'afficher progressivement",
|
||||
"sources_label": "Afficher les sources",
|
||||
"max_images_label": "Nombre max d'images",
|
||||
"images_title": "🖼️ Images pertinentes",
|
||||
"tables_title": "📊 Tableaux",
|
||||
"sources_found": "Sources trouvées",
|
||||
"texts": "textes",
|
||||
"images": "images",
|
||||
"tables": "tableaux",
|
||||
"error_msg": "Une erreur est survenue",
|
||||
"processing_error": "Erreur lors du traitement de la requête",
|
||||
"table_translation": "Traduction",
|
||||
"table_description": "Ce tableau présente des données sur"
|
||||
},
|
||||
"English": {
|
||||
"title": "📚 Intelligent Document Assistant",
|
||||
"placeholder": "Ask your question...",
|
||||
"send_btn": "Send",
|
||||
"clear_btn": "Clear conversation",
|
||||
"model_selector": "Ollama Model",
|
||||
"model_info": "Choose the language model to use",
|
||||
"model_current": "Current model",
|
||||
"language_selector": "Response language",
|
||||
"language_info": "Choose the language in which the assistant will respond",
|
||||
"collection_input": "Qdrant Collection",
|
||||
"collection_info": "Name of the document collection to use",
|
||||
"collection_current": "Current collection",
|
||||
"apply_btn": "Apply collection",
|
||||
"streaming_label": "Streaming mode",
|
||||
"streaming_info": "See responses appear progressively",
|
||||
"sources_label": "Show sources",
|
||||
"max_images_label": "Maximum number of images",
|
||||
"images_title": "🖼️ Relevant images",
|
||||
"tables_title": "📊 Tables",
|
||||
"sources_found": "Sources found",
|
||||
"texts": "texts",
|
||||
"images": "images",
|
||||
"tables": "tables",
|
||||
"error_msg": "An error occurred",
|
||||
"processing_error": "Error processing request",
|
||||
"table_translation": "Translation",
|
||||
"table_description": "This table presents data on"
|
||||
},
|
||||
"Español": {
|
||||
"title": "📚 Asistente documental inteligente",
|
||||
"placeholder": "Haz tu pregunta...",
|
||||
"send_btn": "Enviar",
|
||||
"clear_btn": "Borrar conversación",
|
||||
"model_selector": "Modelo Ollama",
|
||||
"model_info": "Elegir el modelo de lenguaje a utilizar",
|
||||
"model_current": "Modelo actual",
|
||||
"language_selector": "Idioma de respuesta",
|
||||
"language_info": "Elegir el idioma en el que responderá el asistente",
|
||||
"collection_input": "Colección Qdrant",
|
||||
"collection_info": "Nombre de la colección de documentos a utilizar",
|
||||
"collection_current": "Colección actual",
|
||||
"apply_btn": "Aplicar colección",
|
||||
"streaming_label": "Modo streaming",
|
||||
"streaming_info": "Ver las respuestas aparecer progresivamente",
|
||||
"sources_label": "Mostrar fuentes",
|
||||
"max_images_label": "Número máximo de imágenes",
|
||||
"images_title": "🖼️ Imágenes relevantes",
|
||||
"tables_title": "📊 Tablas",
|
||||
"sources_found": "Fuentes encontradas",
|
||||
"texts": "textos",
|
||||
"images": "imágenes",
|
||||
"tables": "tablas",
|
||||
"error_msg": "Se ha producido un error",
|
||||
"processing_error": "Error al procesar la solicitud",
|
||||
"table_translation": "Traducción",
|
||||
"table_description": "Esta tabla presenta datos sobre"
|
||||
}
|
||||
}
|
||||
|
||||
# Définir les langues supportées par l'interface
|
||||
UI_SUPPORTED_LANGUAGES = ["Français", "English", "Español"]
|
||||
1
utils/__init__.py
Normal file
1
utils/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Package initialization for display utilities
|
||||
15
utils/conversion.py
Normal file
15
utils/conversion.py
Normal file
@ -0,0 +1,15 @@
|
||||
import base64
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
|
||||
def base64_to_image(base64_data):
|
||||
"""Convertit une image base64 en objet Image pour l'affichage direct"""
|
||||
try:
|
||||
if not base64_data:
|
||||
return None
|
||||
image_bytes = base64.b64decode(base64_data)
|
||||
image = Image.open(BytesIO(image_bytes))
|
||||
return image
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de la conversion d'image: {e}")
|
||||
return None
|
||||
40
utils/display.py
Normal file
40
utils/display.py
Normal file
@ -0,0 +1,40 @@
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
import base64
|
||||
|
||||
def base64_to_image(base64_data):
|
||||
"""Convert base64 image data to PIL Image"""
|
||||
try:
|
||||
if not base64_data:
|
||||
return None
|
||||
image_bytes = base64.b64decode(base64_data)
|
||||
return Image.open(BytesIO(image_bytes))
|
||||
except Exception as e:
|
||||
print(f"Image conversion error: {e}")
|
||||
return None
|
||||
|
||||
def display_images(current_images):
|
||||
"""Format images for Gradio gallery display"""
|
||||
if not current_images:
|
||||
return None
|
||||
return [
|
||||
(img["image"], f"{img['caption']} (Source: {img['source']}, Page: {img['page']})")
|
||||
for img in current_images
|
||||
if img.get("image")
|
||||
]
|
||||
|
||||
def display_tables(current_tables):
|
||||
"""Format tables for HTML display"""
|
||||
if not current_tables:
|
||||
return None
|
||||
|
||||
html = ""
|
||||
for table in current_tables:
|
||||
html += f"""
|
||||
<div style="margin-bottom: 20px; border: 1px solid #ddd; padding: 15px; border-radius: 8px;">
|
||||
<h3>{table['caption']}</h3>
|
||||
<p style="color:#666; font-size:0.9em;">Source: {table['source']}, Page: {table['page']}</p>
|
||||
<div class="table-container">{table.get('data', '')}</div>
|
||||
</div>
|
||||
"""
|
||||
return html if html else None
|
||||
29
utils/image_utils.py
Normal file
29
utils/image_utils.py
Normal file
@ -0,0 +1,29 @@
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
import base64
|
||||
|
||||
def base64_to_image(base64_data):
|
||||
"""Convertit une image base64 en objet Image pour l'affichage direct"""
|
||||
try:
|
||||
if not base64_data:
|
||||
return None
|
||||
image_bytes = base64.b64decode(base64_data)
|
||||
image = Image.open(BytesIO(image_bytes))
|
||||
return image
|
||||
except Exception as e:
|
||||
print(f"Erreur lors de la conversion d'image: {e}")
|
||||
return None
|
||||
|
||||
def display_images(current_images):
|
||||
"""Prépare les images pour l'affichage dans la galerie Gradio"""
|
||||
if not current_images:
|
||||
return None
|
||||
|
||||
gallery = []
|
||||
for img_data in current_images:
|
||||
image = img_data["image"]
|
||||
if image:
|
||||
caption = f"{img_data['caption']} (Source: {img_data['source']}, Page: {img_data['page']})"
|
||||
gallery.append((image, caption))
|
||||
|
||||
return gallery if gallery else None
|
||||
190
utils/katex_script.py
Normal file
190
utils/katex_script.py
Normal file
@ -0,0 +1,190 @@
|
||||
KATEX_CSS_JS = """
|
||||
<style>
|
||||
.gradio-container {max-width: 1200px !important}
|
||||
#chatbot {height: 600px; overflow-y: auto;}
|
||||
#sources_info {margin-top: 10px; color: #666;}
|
||||
|
||||
/* Improved styles for equations */
|
||||
.katex { font-size: 1.1em !important; }
|
||||
.math-inline { background: #f8f9fa; padding: 2px 5px; border-radius: 4px; }
|
||||
.math-display { background: #f8f9f9; margin: 10px 0; padding: 10px; border-radius: 5px; overflow-x: auto; text-align: center; }
|
||||
|
||||
/* Table styles */
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 15px 0;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
table, th, td {
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
th, td {
|
||||
padding: 8px 12px;
|
||||
text-align: left;
|
||||
}
|
||||
th {
|
||||
background-color: #f2f2f2;
|
||||
}
|
||||
tr:nth-child(even) {
|
||||
background-color: #f9f9f9;
|
||||
}
|
||||
.table-container {
|
||||
overflow-x: auto;
|
||||
margin-top: 10px;
|
||||
}
|
||||
</style>
|
||||
|
||||
<!-- Loading KaTeX -->
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/katex.min.css">
|
||||
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/katex.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.8/dist/contrib/auto-render.min.js"></script>
|
||||
|
||||
<script>
|
||||
// Function to process math equations with KaTeX
|
||||
function renderMathInElement(element) {
|
||||
if (!window.renderMathInElement) return;
|
||||
|
||||
try {
|
||||
window.renderMathInElement(element, {
|
||||
delimiters: [
|
||||
{left: '$$', right: '$$', display: true},
|
||||
{left: '$', right: '$', display: false},
|
||||
{left: '\\\\(', right: '\\\\)', display: false},
|
||||
{left: '\\\\[', right: '\\\\]', display: true}
|
||||
],
|
||||
throwOnError: false,
|
||||
trust: true,
|
||||
strict: false,
|
||||
macros: {
|
||||
"\\\\R": "\\\\mathbb{R}",
|
||||
"\\\\N": "\\\\mathbb{N}"
|
||||
}
|
||||
});
|
||||
} catch (e) {
|
||||
console.error("KaTeX rendering error:", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Function to fix and prepare text for LaTeX rendering
|
||||
function prepareTextForLatex(text) {
|
||||
if (!text) return text;
|
||||
|
||||
// Don't modify code blocks
|
||||
if (text.indexOf('<pre>') !== -1) {
|
||||
const parts = text.split(/<pre>|<\/pre>/);
|
||||
for (let i = 0; i < parts.length; i++) {
|
||||
// Only process odd-indexed parts (non-code)
|
||||
if (i % 2 === 0) {
|
||||
parts[i] = prepareLatexInText(parts[i]);
|
||||
}
|
||||
}
|
||||
return parts.join('');
|
||||
}
|
||||
|
||||
return prepareLatexInText(text);
|
||||
}
|
||||
|
||||
// Helper to process LaTeX in regular text
|
||||
function prepareLatexInText(text) {
|
||||
// Make sure dollar signs used for math have proper spacing
|
||||
// First, protect existing well-formed math expressions
|
||||
text = text.replace(/(\\$\\$[^\\$]+\\$\\$)/g, '<protect>$1</protect>'); // protect display math
|
||||
text = text.replace(/(\\$[^\\$\\n]+\\$)/g, '<protect>$1</protect>'); // protect inline math
|
||||
|
||||
// Fix common LaTeX formatting issues outside protected regions
|
||||
text = text.replace(/([^<]protect[^>]*)(\\$)([^\\s])/g, '$1$2 $3'); // Add space after $ if needed
|
||||
text = text.replace(/([^\\s])(\\$)([^<]protect[^>]*)/g, '$1 $2$3'); // Add space before $ if needed
|
||||
|
||||
// Handle subscripts: transform x_1 into x_{1} for better LaTeX compatibility
|
||||
text = text.replace(/([a-zA-Z])_([0-9a-zA-Z])/g, '$1_{$2}');
|
||||
|
||||
// Restore protected content
|
||||
text = text.replace(/<protect>(.*?)<\/protect>/g, '$1');
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
// Enhanced message processor for KaTeX rendering
|
||||
function processMessage(message) {
|
||||
if (!message) return;
|
||||
|
||||
try {
|
||||
// Get direct textual content when possible
|
||||
const elements = message.querySelectorAll('p, li, h1, h2, h3, h4, h5, span');
|
||||
elements.forEach(el => {
|
||||
const originalText = el.innerHTML;
|
||||
const preparedText = prepareTextForLatex(originalText);
|
||||
|
||||
// Only update if changes were made
|
||||
if (preparedText !== originalText) {
|
||||
el.innerHTML = preparedText;
|
||||
}
|
||||
|
||||
// Render equations in this element
|
||||
renderMathInElement(el);
|
||||
});
|
||||
|
||||
// Also try to render on the entire message as fallback
|
||||
renderMathInElement(message);
|
||||
} catch (e) {
|
||||
console.error("Error processing message for LaTeX:", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Function to monitor for new messages
|
||||
function setupMathObserver() {
|
||||
const chatElement = document.getElementById('chatbot');
|
||||
if (!chatElement) {
|
||||
setTimeout(setupMathObserver, 500);
|
||||
return;
|
||||
}
|
||||
|
||||
// Process any existing messages
|
||||
chatElement.querySelectorAll('.message').forEach(processMessage);
|
||||
|
||||
// Set up observer for new content
|
||||
const observer = new MutationObserver((mutations) => {
|
||||
for (const mutation of mutations) {
|
||||
if (mutation.addedNodes.length > 0 || mutation.type === 'characterData') {
|
||||
chatElement.querySelectorAll('.message').forEach(processMessage);
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
observer.observe(chatElement, {
|
||||
childList: true,
|
||||
subtree: true,
|
||||
characterData: true
|
||||
});
|
||||
|
||||
console.log("LaTeX rendering observer set up successfully");
|
||||
}
|
||||
|
||||
// Initialize once the document is fully loaded
|
||||
function initializeRendering() {
|
||||
if (window.renderMathInElement) {
|
||||
setupMathObserver();
|
||||
} else {
|
||||
// If KaTeX isn't loaded yet, wait for it
|
||||
const katexScript = document.querySelector('script[src*="auto-render.min.js"]');
|
||||
if (katexScript) {
|
||||
katexScript.onload = setupMathObserver;
|
||||
} else {
|
||||
// Last resort: try again later
|
||||
setTimeout(initializeRendering, 500);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set up multiple trigger points to ensure it loads
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
setTimeout(initializeRendering, 800);
|
||||
});
|
||||
|
||||
window.addEventListener('load', function() {
|
||||
setTimeout(initializeRendering, 1200);
|
||||
});
|
||||
</script>
|
||||
"""
|
||||
19
utils/table_utils.py
Normal file
19
utils/table_utils.py
Normal file
@ -0,0 +1,19 @@
|
||||
from translations.lang_mappings import UI_TRANSLATIONS
|
||||
|
||||
def display_tables(current_tables, language=None):
|
||||
"""Version simplifiée qui ignore le paramètre language"""
|
||||
if not current_tables:
|
||||
return None
|
||||
|
||||
html = ""
|
||||
for table in current_tables:
|
||||
table_data = table.get('data', '')
|
||||
html += f"""
|
||||
<div style="margin-bottom: 20px; border: 1px solid #ddd; padding: 15px; border-radius: 8px;">
|
||||
<h3>{table.get('caption', 'Tableau')}</h3>
|
||||
<p style="color:#666; font-size:0.9em;">Source: {table.get('source', 'N/A')}, Page: {table.get('page', 'N/A')}</p>
|
||||
<pre>{table_data}</pre>
|
||||
</div>
|
||||
"""
|
||||
|
||||
return html if html else None
|
||||
Loading…
x
Reference in New Issue
Block a user