19 lines
579 B
Python
19 lines
579 B
Python
# Exemple d'utilisation
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src/document_processing')))
|
|
from pdf_processor import process_pdf_document
|
|
pdf_path = r"F:\Dev\Rag\Rag_Modeling\document\04Extrait_Methodologie_Experimentale.pdf"
|
|
result = process_pdf_document(
|
|
pdf_path,
|
|
ocr_enabled=True,
|
|
extract_tables=True,
|
|
extract_images=True,
|
|
chunk_size=1000,
|
|
chunk_overlap=200
|
|
)
|
|
|
|
# Accès aux différentes parties du résultat
|
|
text_chunks = result["chunks"]
|
|
tables = result["tables"]
|
|
images = result["images"] |