Initial commit
This commit is contained in:
19
test/pdf_processing_test.py
Normal file
19
test/pdf_processing_test.py
Normal file
@@ -0,0 +1,19 @@
|
||||
# Exemple d'utilisation
|
||||
import sys
|
||||
import os
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../src/document_processing')))
|
||||
from pdf_processor import process_pdf_document
|
||||
pdf_path = r"F:\Dev\Rag\Rag_Modeling\document\04Extrait_Methodologie_Experimentale.pdf"
|
||||
result = process_pdf_document(
|
||||
pdf_path,
|
||||
ocr_enabled=True,
|
||||
extract_tables=True,
|
||||
extract_images=True,
|
||||
chunk_size=1000,
|
||||
chunk_overlap=200
|
||||
)
|
||||
|
||||
# Accès aux différentes parties du résultat
|
||||
text_chunks = result["chunks"]
|
||||
tables = result["tables"]
|
||||
images = result["images"]
|
||||
Reference in New Issue
Block a user