Initial commit

This commit is contained in:
2025-03-01 08:15:30 +01:00
commit 0d396d9bd9
14 changed files with 689 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
# Exemple d'utilisation
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src/document_processing')))
from pdf_processor import process_pdf_document
pdf_path = r"F:\Dev\Rag\Rag_Modeling\document\04Extrait_Methodologie_Experimentale.pdf"
result = process_pdf_document(
pdf_path,
ocr_enabled=True,
extract_tables=True,
extract_images=True,
chunk_size=1000,
chunk_overlap=200
)
# Accès aux différentes parties du résultat
text_chunks = result["chunks"]
tables = result["tables"]
images = result["images"]