Spaces:
Runtime error
Runtime error
File size: 1,925 Bytes
aa3ec35 3348ed6 2a677e6 3348ed6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
from langchain.document_loaders import PyPDFDirectoryLoader
loader = PyPDFDirectoryLoader("./data")
docs = loader.load()
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 100,
chunk_overlap = 20,
length_function = len,
)
documents = text_splitter.split_documents(docs)
import os
os.environ["OPENAI_API_KEY"] = "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"
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
from langchain.vectorstores import Chroma
persist_directory = "vector_db"
vectordb = Chroma.from_documents(docs, embedding = embeddings, persist_directory=persist_directory)
vectordb.persist()
vectordb = None
# As you can see when you run the following cell -
# loaded the persisted vectore store is much quicker than reinstantiating it -
# and that is the benefit of persist_directory!
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
# Bring up ChatOpenAI
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name = "gpt-3.5-turbo")
# Set up the document vector store as a Retriever tool
doc_retriever = vectordb.as_retriever()
# Now setup the RetrievalQA chain and leverage all the documents in the Vector DB
from langchain.chains import RetrievalQA
EN_17272_qa = RetrievalQA.from_chain_type(llm, chain_type="stuff", retriever=doc_retriever)
def make_answer(query):
return EN_17272_qa.run(query)
if __name__ == "__main__":
# make a gradio intgerface
import gradio as gr
gr.Interface(
make_answer,
[gr.inputs.Textbox(lines=2, label="Input a question")],
gr.outputs.Textbox(labels="Answer"),
title="EN-17272 & Efficacy Reports",
descriptions="EN-17272 & Efficacy reports is a generative model that gives answers based on the documents",
).launch() |