import openai import os import gradio as gr import chromadb from langchain.document_loaders import PyPDFLoader from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.indexes import VectorstoreIndexCreator from langchain.chains import ConversationalRetrievalChain from langchain.prompts import PromptTemplate from langchain.chat_models import ChatOpenAI from langchain.llms import OpenAI def load_document(Document): # loads a PDF document if not Document: return "Merci de fournir un document PDF" if not Document.name.endswith('.pdf'): return ("Merci de fournir un document PDF") loader = PyPDFLoader(Document.name) docs = loader.load() global k k = len(docs) # Create embeddings embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OpenaiKey']) # Write in DB global docsearch docsearch = Chroma.from_documents(docs, embeddings, ids=["page" + str(d.metadata["page"]) for d in docs], k=1) global chat_history chat_history = [] return "Endodage créé" def get_chat_history(inputs) -> str: res = [] for human, ai in inputs: res.append(f"Question : {human}\nRéponse : {ai}") return "\n".join(res) def question_document(Question): if "docsearch" not in globals(): return "Merci d'encoder un document PDF" # Define LLM turbo = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key = os.environ['OpenaiKey']) davinci = OpenAI(model_name = "text-davinci-003", openai_api_key = os.environ['OpenaiKey']) # Customize map_reduce prompts #question_template = """{context} #Precise the number starting the above text in your answer. It corresponds to its page number in the document it is from. Label this number as "page". #Also make sure to answer in the same langage than the following question. #QUESTION : {question} #ANSWER : #""" #combine_template = """{summaries} #Note that the above text is based on transient extracts from one source document. #So make sure to not mention different documents or extracts or passages or portions or texts. There is only one, entire document. #Also make sure to answer in the same langage than the following question. #QUESTION : {question}. #ANSWER : #""" #question_prompt = PromptTemplate(template = question_template, input_variables=['context', 'question']) #combine_prompt = PromptTemplate(template = combine_template, input_variables=['summaries', 'question']) # Define chain #chain_type_kwargs = { "combine_prompt" : combine_prompt, "question_prompt" : question_prompt} #, "return_intermediate_steps" : True} #qa = RetrievalQAWithSourcesChain.from_chain_type(llm = llm, chain_type = "map_reduce", chain_type_kwargs = chain_type_kwargs, retriever=docsearch.as_retriever(), return_source_documents = True) vectordbkwargs = {"search_distance": 10} search_kwargs={"k" : k} qa = ConversationalRetrievalChain.from_llm(llm = turbo, chain_type = "map_reduce",retriever=docsearch.as_retriever(search_kwargs = search_kwargs), get_chat_history = get_chat_history, return_source_documents = True) answer = qa({"question" : Question,"chat_history":chat_history, "vectordbkwargs": vectordbkwargs}, return_only_outputs = True) chat_history.append((Question, answer["answer"])) #answer = qa({"question" : Question}, ) print(answer) return "".join(get_chat_history(chat_history)) with gr.Blocks() as demo: gr.Markdown( """ # Interrogateur de PDF par Nicolas et Alex """) with gr.Row(): with gr.Column(): input_file = gr.inputs.File(label="Charger un document") greet_btnee = gr.Button("Encoder le document") output_words = gr.outputs.Textbox(label="Encodage") greet_btnee.click(fn=load_document, inputs=input_file, outputs = output_words) with gr.Column(): text = gr.inputs.Textbox(label="Question") greet_btn = gr.Button("Poser une question") answer = gr.Textbox(label = "Réponse", lines = 8) greet_btn.click(fn = question_document, inputs = text, outputs = answer) demo.launch()