rag/test/pdf_processing_test.py
2025-03-01 08:15:30 +01:00

19 lines
579 B
Python

# Exemple d'utilisation
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src/document_processing')))
from pdf_processor import process_pdf_document
pdf_path = r"F:\Dev\Rag\Rag_Modeling\document\04Extrait_Methodologie_Experimentale.pdf"
result = process_pdf_document(
pdf_path,
ocr_enabled=True,
extract_tables=True,
extract_images=True,
chunk_size=1000,
chunk_overlap=200
)
# Accès aux différentes parties du résultat
text_chunks = result["chunks"]
tables = result["tables"]
images = result["images"]