diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b46aa94 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,40 @@ +# Core RAG and LLM libraries +langchain>=0.0.267 +langchain-community>=0.0.10 +transformers>=4.30.0 + +# Document processing +unstructured>=0.10.0 +pdf2image>=1.16.3 +pypdf2>=3.0.0 +pdfminer.six>=20221105 + +# OCR and image processing +pytesseract>=0.3.10 +Pillow>=9.5.0 +opencv-python>=4.8.0 + +# Table extraction +camelot-py>=0.11.0 +tabula-py>=2.7.0 + +# Data manipulation +pandas>=2.0.0 +numpy>=1.24.0 + +# Visualization +matplotlib>=3.7.0 + +# Optional but commonly used with RAG +scikit-learn>=1.2.0 +sentence-transformers>=2.2.2 + +# Vector database connections (common choices, uncomment as needed) +# chromadb>=0.4.6 +# pinecone-client>=2.2.2 +# qdrant-client>=1.3.0 +# faiss-cpu>=1.7.4 + +# Utilities +tqdm>=4.65.0 +python-dotenv>=1.0.0 \ No newline at end of file