Spaces:
Runtime error
Runtime error
from langchain.document_loaders import PyPDFDirectoryLoader | |
loader = PyPDFDirectoryLoader("./data") | |
docs = loader.load() | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size = 100, | |
chunk_overlap = 20, | |
length_function = len, | |
) | |
documents = text_splitter.split_documents(docs) | |
import os | |
os.environ["OPENAI_API_KEY"] = "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" | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
embeddings = OpenAIEmbeddings() | |
from langchain.vectorstores import Chroma | |
persist_directory = "vector_db" | |
vectordb = Chroma.from_documents(docs, embedding = embeddings, persist_directory=persist_directory) | |
vectordb.persist() | |
vectordb = None | |
# As you can see when you run the following cell - | |
# loaded the persisted vectore store is much quicker than reinstantiating it - | |
# and that is the benefit of persist_directory! | |
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings) | |
# Bring up ChatOpenAI | |
from langchain.chat_models import ChatOpenAI | |
llm = ChatOpenAI(model_name = "gpt-3.5-turbo") | |
# Set up the document vector store as a Retriever tool | |
doc_retriever = vectordb.as_retriever() | |
# Now setup the RetrievalQA chain and leverage all the documents in the Vector DB | |
from langchain.chains import RetrievalQA | |
EN_17272_qa = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=doc_retriever) | |
def make_answer(query): | |
return EN_17272_qa.run(query) | |
if __name__ == "__main__": | |
# make a gradio intgerface | |
import gradio as gr | |
gr.Interface( | |
make_answer, | |
[gr.inputs.Textbox(lines=2, label="Input a question")], | |
gr.outputs.Textbox(labels="Answer"), | |
title="EN-17272 & Efficacy Reports", | |
descriptions="EN-17272 & Efficacy reports is a generative model that gives answers based on the documents", | |
).launch() |