42 lines
727 B
Plaintext
42 lines
727 B
Plaintext
# Core RAG and LLM libraries
|
|
langchain>=0.0.267
|
|
langchain-community>=0.0.10
|
|
transformers>=4.30.0
|
|
langchain_community
|
|
|
|
# Document processing
|
|
unstructured>=0.10.0
|
|
pdf2image>=1.16.3
|
|
pypdf2>=3.0.0
|
|
pdfminer.six>=20221105
|
|
|
|
# OCR and image processing
|
|
pytesseract>=0.3.10
|
|
Pillow>=9.5.0
|
|
opencv-python>=4.8.0
|
|
|
|
# Table extraction
|
|
camelot-py>=0.11.0
|
|
tabula-py>=2.7.0
|
|
|
|
# Data manipulation
|
|
pandas>=2.0.0
|
|
numpy
|
|
|
|
# Visualization
|
|
matplotlib>=3.7.0
|
|
|
|
# Optional but commonly used with RAG
|
|
scikit-learn>=1.2.0
|
|
sentence-transformers>=2.2.2
|
|
|
|
# Vector database connections (common choices, uncomment as needed)
|
|
# chromadb>=0.4.6
|
|
# pinecone-client>=2.2.2
|
|
# qdrant-client>=1.3.0
|
|
# faiss-cpu>=1.7.4
|
|
|
|
# Utilities
|
|
tqdm>=4.65.0
|
|
python-dotenv>=1.0.0
|
|
pi_heif |