File size: 3,339 Bytes
8df1e9f
 
 
 
 
 
 
 
 
 
 
 
fcbb419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8df1e9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcbb419
 
 
 
 
 
 
8df1e9f
 
 
 
 
fcbb419
8df1e9f
 
 
 
 
 
fcbb419
8df1e9f
 
 
 
fcbb419
 
 
8df1e9f
fcbb419
 
 
 
8df1e9f
 
fcbb419
8df1e9f
 
 
 
 
 
 
 
 
 
fcbb419
8df1e9f
 
 
 
 
fcbb419
8df1e9f
fcbb419
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import streamlit as st
from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.chat_models import ChatOpenAI
from dotenv import load_dotenv
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from download_chart import construct_plot
from langchain_core.runnables import RunnablePassthrough
from langchain import hub
from langchain_core.prompts.prompt import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_experimental.text_splitter import SemanticChunker
load_dotenv() 

def get_docs_from_pdf(file):
    loader = PyPDFLoader(file)
    docs = loader.load_and_split()
    return docs

def get_doc_chunks(docs):
    text_splitter = SemanticChunker(OpenAIEmbeddings())
    chunks = text_splitter.split_documents(docs)
    return chunks 

def get_vectorstore_from_docs(doc_chunks):
    embedding = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(documents=doc_chunks, embedding=embedding)
    return vectorstore

def get_conversation_chain(vectorstore):
    llm = ChatOpenAI(model="gpt-4o",temperature=0.5, max_tokens=2048)
    retriever=vectorstore.as_retriever()

    prompt = hub.pull("rlm/rag-prompt")
    # Chain
    rag_chain = (
        {"context": retriever , "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return rag_chain

def create_db(file):
    docs = get_docs_from_pdf(file)
    doc_chunks = get_doc_chunks(docs)
    vectorstore = get_vectorstore_from_docs(doc_chunks)
    return vectorstore

def get_response(chain,user_query, chat_history):

    template = """
    Chat history: {chat_history}
    User question: {user_question}
    """
    

    question = ChatPromptTemplate.from_template(template)
    question = question.format(chat_history=chat_history, user_question=user_query)

    return chain.stream(question)

def display_chat_te():
    # app config
    st.title("Chatbot")

    # session state
    if "chat_history_te" not in st.session_state:
        st.session_state.chat_history_te = [
            AIMessage(content="Salut, posez-moi vos question sur la transistion ecologique."),
        ]
    if "chain" not in st.session_state:
        db=create_db("DATA_bziiit/op.pdf")
        chain = get_conversation_chain(db)
        st.session_state.chain = chain
        
    # conversation
    for message in st.session_state.chat_history_te:
        if isinstance(message, AIMessage):
            with st.chat_message("AI"):
                st.write(message.content)
        elif isinstance(message, HumanMessage):
            with st.chat_message("Moi"):
                st.write(message.content)

    # user input
    user_query = st.chat_input("Par ici...")
    if user_query is not None and user_query != "":
        st.session_state.chat_history_te.append(HumanMessage(content=user_query))

        with st.chat_message("Moi"):
            st.markdown(user_query)

        with st.chat_message("AI"):
            response = st.write_stream(get_response(st.session_state.chain,user_query, st.session_state.chat_history_te))

        st.session_state.chat_history_te.append(AIMessage(content=response))