Spaces:
Sleeping
Sleeping
from PyPDF2 import PdfReader | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.llms import OpenAI | |
import streamlit as st | |
from streamlit_chat import message | |
import extra_streamlit_components as stx | |
import os | |
import datetime | |
import openai | |
import random | |
# Get your API keys from openai, you will need to create an account. | |
# Here is the link to get the keys: https://platform.openai.com/account/billing/overview | |
os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"] | |
def get_manager(key): | |
return stx.CookieManager(key=key) | |
cookie_manager = get_manager(key=0) | |
# cookie = cookie_manager.get(cookie="actchat") | |
# cookie_manager = get_manager(key=1) | |
# openai_cookie = cookie_manager.get(cookie="openaikey") | |
user_limit_cookie = None | |
cookies = cookie_manager.get_all() | |
if cookies: | |
if "actchat" in cookies: | |
user_limit_cookie = cookies["actchat"] | |
def read_data(): | |
# location of the pdf file/files. | |
reader = PdfReader("The-AI-Act.pdf") | |
# read data from the file and put them into a variable called raw_text | |
raw_text = "" | |
for i, page in enumerate(reader.pages): | |
text = page.extract_text() | |
if text: | |
raw_text += text | |
return raw_text | |
# We need to split the text that we read into smaller chunks so that during | |
# information retreival we don't hit the token size limits. | |
def split_document(raw_text): | |
text_splitter = CharacterTextSplitter( | |
separator="\n", | |
chunk_size=1000, | |
chunk_overlap=200, | |
length_function=len, | |
) | |
texts = text_splitter.split_text(raw_text) | |
return texts | |
# Download embeddings from OpenAI | |
def load_openai_embeddings(): | |
embeddings = OpenAIEmbeddings() | |
return embeddings | |
def init_docsearch(texts, _embeddings): | |
docsearch = FAISS.from_texts(texts, _embeddings) | |
return docsearch | |
def init_qa_chain(): | |
chain = load_qa_chain(OpenAI(temperature=1), chain_type="stuff") | |
return chain | |
raw_text = read_data() | |
texts = split_document(raw_text) | |
embeddings = load_openai_embeddings() | |
docsearch = init_docsearch(texts, embeddings) | |
chain = init_qa_chain() | |
avatars = [ | |
"avataaars", | |
"big-ears", | |
"big-ears-neutral", | |
"big-smile", | |
"identicon", | |
"initials", | |
"lorelei", | |
"lorelei-neutral", | |
"micah", | |
"miniavs", | |
"open-peeps", | |
"personas", | |
"pixel-art", | |
"pixel-art-neutral", | |
"shapes", | |
"thumbs", | |
] | |
user_avatar = avatars[random.randint(0, len(avatars) - 1)] | |
st.title("EU AI ACT GPT🤖") | |
st.write( | |
"""The AI Act is a proposed European law on artificial intelligence (AI) – | |
the first law on AI by a major regulator anywhere.""" | |
) | |
st.markdown( | |
"""The EU AI Act is expected to be voted during the 12-15 June session of | |
the EU Parliament. We at [NannyML](https://github.com/NannyML/nannyml) | |
finetuned GPT-4 with all the **107 pages** in the document so you can ask | |
all the necessary questions and be informed about it. | |
""" | |
) | |
st.markdown( | |
"""If you are a data scientist and are interested in learning how the EU AI Act might affect the field. | |
Check out [Understanding the EU AI Act as a Data Scientist](https://www.nannyml.com/blog/eu-ai-act-guide-data-science). | |
""" | |
) | |
# create state sessions | |
if "text_input" not in st.session_state: | |
st.session_state["text_input"] = "" | |
if "generated" not in st.session_state: | |
st.session_state["generated"] = [] | |
if "messages" not in st.session_state: | |
st.session_state["messages"] = [] | |
if "openaikey" not in st.session_state: | |
st.session_state["openaikey"] = [] | |
if "disabled" not in st.session_state: | |
st.session_state["disabled"] = False | |
if user_limit_cookie == "01234" and len(st.session_state["openaikey"]) == 0: | |
st.session_state["disabled"] = True | |
else: | |
st.session_state["disabled"] = False | |
if len(st.session_state["openaikey"]) != 0: | |
openai.api_key = st.session_state["openaikey"] | |
if "avatar" not in st.session_state: | |
st.session_state["avatar"] = user_avatar | |
def disable(): | |
st.session_state["disabled"] = True | |
if "history" not in st.session_state: | |
st.session_state["history"] = "" | |
def submit(): | |
st.session_state["text_input"] = st.session_state["text_area"] | |
st.session_state["text_area"] = "" | |
# Template prompt to establish the behaviour and the persona of the chatbot | |
def template(history, query): | |
return """ | |
You are an assistant and expert in the EU AI Act. Based on your expertise, | |
you need to assist and provide the answer to the business questions about the EU AI Act. | |
Your answer has to be clear and easy to understand for the user. | |
Your answer has to be detailed and fact-checked informations based on the act. | |
Don't hesitate, if necessary create very detailed answer which exceeds 300 words. | |
Be sure to ask any additional information you may need, to provide an accurate answer. | |
Refer to the coverstation history if necessary. | |
Be friendly and polite to the user. | |
Coversation history : | |
{} | |
User question : {} | |
Assistant :""".format( | |
history, query | |
) | |
def generate_response(question): | |
docs = docsearch.similarity_search(question) | |
response = chain.run(input_documents=docs, question=question) | |
st.session_state["generated"].append({"role": "assistant", "content": response}) | |
st.session_state["history"] += "User question : " + question + "/" | |
st.session_state["history"] += "Assistant : " + response + "/" | |
response_container = st.container() | |
prompt = st.text_area( | |
"Enter your question here about the EU AI Act", | |
disabled=st.session_state["disabled"], | |
key="text_area", | |
on_change=submit, | |
) | |
prompt = st.session_state["text_input"] | |
send_button = st.button("Send", disabled=st.session_state["disabled"]) | |
if send_button and prompt: | |
st.session_state["messages"].append({"role": "user", "content": prompt}) | |
history = st.session_state["history"] | |
# if statement to only keep 6000 chars ~ 1200 words in the history | |
if len(history) > 6000: | |
# idx of the closest full message | |
idx = history.find('/') | |
# reduce the length of the history to the 6000 char | |
history = history[len(history)-6000:] | |
history = history[idx:] | |
question = template(history, prompt) | |
with st.spinner("Generating response..."): | |
generate_response(question) | |
# try: | |
# generate_response(prompt) | |
# except: | |
# st.error("There is an error with your API key. Or you might ran out of quota.") | |
if st.session_state["messages"]: | |
with response_container: | |
for i in range(len(st.session_state["generated"])): | |
message( | |
st.session_state["messages"][i]["content"], | |
is_user=True, | |
key=str(i) + "_user", | |
avatar_style=st.session_state["avatar"], | |
) | |
message(st.session_state["generated"][i]["content"], key=str(i)) | |
if len(st.session_state["messages"]) > 4: | |
cookie_manager.set( | |
"actchat", val="01234", expires_at=datetime.datetime(year=2025, month=1, day=1) | |
) | |
if user_limit_cookie == "01234" and len(st.session_state["openaikey"]) == 0: | |
st.markdown("##### Provide your own OpenAI API Key") | |
st.write( | |
""" | |
Due to limitations in api request calls per user to continoue the | |
converstation, please provide your personal OpenAI API key. | |
For more info on how to get and API Key visit | |
[OpenAI docs](https://platform.openai.com/account/api-keys) | |
about it.""" | |
) | |
# disable() | |
openaikey = st.text_input("OPENAI_API_KEY:") | |
api_button = st.button("Add") | |
if api_button: | |
st.session_state["disabled"] = False | |
st.session_state["openaikey"] = openaikey | |
openai.api_key = openaikey | |
else: | |
st.session_state["disabled"] = False | |
st.markdown( | |
"""##### Sample questions to ask it | |
* What are the objectives of the EU AI Act? | |
* What are the potential fines that a company may face for failing to comply with the EU AI Act? | |
* Explain in simple words the different risk levels in the EU AI Act. | |
""" | |
) | |
st.text("") | |
st.markdown( | |
"""`Created by` [santiviquez](https://twitter.com/santiviquez) and | |
[maciejbalawejder](https://www.linkedin.com/in/maciej-balawejder-rt8015/) | |
from [NannyML](https://github.com/NannyML/nannyml) — | |
The open-source library to estimate model performance in production | |
*without ground truth*.""" | |
) | |