rishi002 commited on
Commit
1fd98cd
·
verified ·
1 Parent(s): 1b47abe

Create embeddings.py

Browse files
Files changed (1) hide show
  1. embeddings.py +19 -0
embeddings.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ import os
4
+
5
+ def load_pdf_files(directory):
6
+ documents = []
7
+ for filename in os.listdir(directory):
8
+ if filename.endswith('.pdf'):
9
+ file_path = os.path.join(directory, filename)
10
+ loader = PyPDFLoader(file_path)
11
+ documents.extend(loader.load())
12
+ return documents
13
+
14
+ def create_chunks(documents):
15
+ text_splitter = RecursiveCharacterTextSplitter(
16
+ chunk_size=1000,
17
+ chunk_overlap=200
18
+ )
19
+ return text_splitter.split_documents(documents)