|
import openai |
|
import os |
|
import gradio as gr |
|
import chromadb |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.vectorstores import Chroma |
|
from langchain.indexes import VectorstoreIndexCreator |
|
from langchain.chains import ConversationalRetrievalChain |
|
from langchain.prompts import PromptTemplate |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.llms import OpenAI |
|
|
|
def load_document(Document): |
|
|
|
|
|
if not Document: |
|
return "Merci de fournir un document PDF" |
|
if not Document.name.endswith('.pdf'): |
|
return ("Merci de fournir un document PDF") |
|
|
|
loader = PyPDFLoader(Document.name) |
|
docs = loader.load() |
|
global k |
|
k = len(docs) |
|
|
|
|
|
embeddings = OpenAIEmbeddings(openai_api_key = os.environ['OpenaiKey']) |
|
|
|
|
|
global docsearch |
|
docsearch = Chroma.from_documents(docs, embeddings, ids=["page" + str(d.metadata["page"]) for d in docs], k=1) |
|
global chat_history |
|
chat_history = [] |
|
|
|
return "Endodage créé" |
|
|
|
def get_chat_history(inputs) -> str: |
|
res = [] |
|
for human, ai in inputs: |
|
res.append(f"Question : {human}\nRéponse : {ai}") |
|
return "\n".join(res) |
|
|
|
def question_document(Question): |
|
|
|
if "docsearch" not in globals(): |
|
return "Merci d'encoder un document PDF" |
|
|
|
|
|
turbo = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key = os.environ['OpenaiKey']) |
|
davinci = OpenAI(model_name = "text-davinci-003", openai_api_key = os.environ['OpenaiKey']) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vectordbkwargs = {"search_distance": 10} |
|
search_kwargs={"k" : k} |
|
|
|
qa = ConversationalRetrievalChain.from_llm(llm = turbo, chain_type = "map_reduce",retriever=docsearch.as_retriever(search_kwargs = search_kwargs), get_chat_history = get_chat_history, return_source_documents = True) |
|
answer = qa({"question" : Question,"chat_history":chat_history, "vectordbkwargs": vectordbkwargs}, return_only_outputs = True) |
|
chat_history.append((Question, answer["answer"])) |
|
|
|
print(answer) |
|
return "".join(get_chat_history(chat_history)) |
|
|
|
with gr.Blocks() as demo: |
|
|
|
gr.Markdown( |
|
""" |
|
# Interrogateur de PDF |
|
par Nicolas et Alex |
|
""") |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(): |
|
input_file = gr.inputs.File(label="Charger un document") |
|
greet_btnee = gr.Button("Encoder le document") |
|
output_words = gr.outputs.Textbox(label="Encodage") |
|
greet_btnee.click(fn=load_document, inputs=input_file, outputs = output_words) |
|
|
|
with gr.Column(): |
|
text = gr.inputs.Textbox(label="Question") |
|
greet_btn = gr.Button("Poser une question") |
|
answer = gr.Textbox(label = "Réponse", lines = 8) |
|
greet_btn.click(fn = question_document, inputs = text, outputs = answer) |
|
|
|
|
|
demo.launch() |