danicafisher commited on
Commit
036f779
1 Parent(s): 412d04a

Updates requirements

Browse files
Files changed (3) hide show
  1. app.py +21 -9
  2. requirements.txt +2 -1
  3. synthetic_data_generation.ipynb +4 -12
app.py CHANGED
@@ -14,34 +14,46 @@ import chainlit as cl
14
  import asyncio
15
  import nest_asyncio
16
  nest_asyncio.apply()
17
-
18
  from langchain_community.document_loaders import PyMuPDFLoader
 
 
19
 
20
  filepath_NIST = "data/NIST.AI.600-1.pdf"
21
  filepath_Blueprint = "data/Blueprint-for-an-AI-Bill-of-Rights.pdf"
22
 
23
  documents_NIST = PyMuPDFLoader(filepath_NIST).load()
24
  documents_Blueprint = PyMuPDFLoader(filepath_Blueprint).load()
 
25
 
26
  # pdf_loader_NIST = PDFFileLoader("data/NIST.AI.600-1.pdf")
27
  # pdf_loader_Blueprint = PDFFileLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf")
28
  # documents_NIST = pdf_loader_NIST.load_documents()
29
  # documents_Blueprint = pdf_loader_Blueprint.load_documents()
30
 
31
- text_splitter = CharacterTextSplitter()
32
- split_documents_NIST = text_splitter.split_texts(documents_NIST)
33
- split_documents_Blueprint = text_splitter.split_texts(documents_Blueprint)
 
 
 
 
 
 
 
34
 
 
35
 
36
- RAG_PROMPT_TEMPLATE = """ \
37
- Use the provided context to answer the user's query.
38
 
39
- You may not answer the user's query unless there is specific context in the following text.
40
 
41
- If you do not know the answer, or cannot answer, please respond with "I don't know".
 
42
  """
43
 
44
- rag_prompt = SystemRolePrompt(RAG_PROMPT_TEMPLATE)
45
 
46
  USER_PROMPT_TEMPLATE = """ \
47
  Context:
 
14
  import asyncio
15
  import nest_asyncio
16
  nest_asyncio.apply()
17
+ import langchain_community
18
  from langchain_community.document_loaders import PyMuPDFLoader
19
+ import langchain
20
+ from langchain.prompts import ChatPromptTemplate
21
 
22
  filepath_NIST = "data/NIST.AI.600-1.pdf"
23
  filepath_Blueprint = "data/Blueprint-for-an-AI-Bill-of-Rights.pdf"
24
 
25
  documents_NIST = PyMuPDFLoader(filepath_NIST).load()
26
  documents_Blueprint = PyMuPDFLoader(filepath_Blueprint).load()
27
+ documents = documents_NIST + documents_Blueprint
28
 
29
  # pdf_loader_NIST = PDFFileLoader("data/NIST.AI.600-1.pdf")
30
  # pdf_loader_Blueprint = PDFFileLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf")
31
  # documents_NIST = pdf_loader_NIST.load_documents()
32
  # documents_Blueprint = pdf_loader_Blueprint.load_documents()
33
 
34
+ # text_splitter = CharacterTextSplitter()
35
+ # split_documents_NIST = text_splitter.split_texts(documents_NIST)
36
+ # split_documents_Blueprint = text_splitter.split_texts(documents_Blueprint)
37
+
38
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
39
+
40
+ text_splitter = RecursiveCharacterTextSplitter(
41
+ chunk_size = 500,
42
+ chunk_overlap = 50
43
+ )
44
 
45
+ rag_documents = text_splitter.split_documents(documents)
46
 
47
+ RAG_PROMPT = """\
48
+ Given a provided context and question, you must answer the question based only on context.
49
 
50
+ If you cannot answer the question based on the context - you must say "I don't know".
51
 
52
+ Context: {context}
53
+ Question: {question}
54
  """
55
 
56
+ rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
57
 
58
  USER_PROMPT_TEMPLATE = """ \
59
  Context:
requirements.txt CHANGED
@@ -2,4 +2,5 @@ numpy
2
  chainlit==0.7.700
3
  openai
4
  PyPDF2
5
- langchain_community
 
 
2
  chainlit==0.7.700
3
  openai
4
  PyPDF2
5
+ langchain_community
6
+ langchain
synthetic_data_generation.ipynb CHANGED
@@ -865,7 +865,7 @@
865
  "\n",
866
  "generator_llm = ChatOpenAI(model=\"gpt-3.5-turbo\")\n",
867
  "critic_llm = ChatOpenAI(model=\"gpt-4o-mini\", tags=[\"base_llm\"]) \n",
868
- "embeddings = OpenAIEmbeddings()\n",
869
  "\n",
870
  "generator = TestsetGenerator.from_langchain(\n",
871
  " generator_llm,\n",
@@ -982,21 +982,13 @@
982
  " | rag_prompt | llm | StrOutputParser()\n",
983
  ")\n",
984
  "\n",
985
- "retrieval_augmented_qa_chain = (\n",
986
- " # INVOKE CHAIN WITH: {\"question\" : \"<<SOME USER QUESTION>>\"}\n",
987
- " # \"question\" : populated by getting the value of the \"question\" key\n",
988
- " # \"context\" : populated by getting the value of the \"question\" key and chaining it into the base_retriever\n",
989
  " {\"context\": itemgetter(\"question\") | retriever, \"question\": itemgetter(\"question\")}\n",
990
- " # \"context\" : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)\n",
991
- " # by getting the value of the \"context\" key from the previous step\n",
992
  " | RunnablePassthrough.assign(context=itemgetter(\"context\"))\n",
993
- " # \"response\" : the \"context\" and \"question\" values are used to format our prompt object and then piped\n",
994
- " # into the LLM and stored in a key called \"response\"\n",
995
- " # \"context\" : populated by getting the value of the \"context\" key from the previous step\n",
996
  " | {\"response\": rag_prompt | llm, \"context\": itemgetter(\"context\")}\n",
997
  ")\n",
998
  "\n",
999
- "result = retrieval_augmented_qa_chain.invoke({\"question\" : \"Is AI a threat to humanity?\"})\n",
1000
  "print(result)"
1001
  ]
1002
  },
@@ -1010,7 +1002,7 @@
1010
  "contexts = []\n",
1011
  "\n",
1012
  "for question in test_questions:\n",
1013
- " response = retrieval_augmented_qa_chain.invoke({\"question\" : question})\n",
1014
  " answers.append(response[\"response\"].content)\n",
1015
  " contexts.append([context.page_content for context in response[\"context\"]])"
1016
  ]
 
865
  "\n",
866
  "generator_llm = ChatOpenAI(model=\"gpt-3.5-turbo\")\n",
867
  "critic_llm = ChatOpenAI(model=\"gpt-4o-mini\", tags=[\"base_llm\"]) \n",
868
+ "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n",
869
  "\n",
870
  "generator = TestsetGenerator.from_langchain(\n",
871
  " generator_llm,\n",
 
982
  " | rag_prompt | llm | StrOutputParser()\n",
983
  ")\n",
984
  "\n",
985
+ "rag_qa_chain = (\n",
 
 
 
986
  " {\"context\": itemgetter(\"question\") | retriever, \"question\": itemgetter(\"question\")}\n",
 
 
987
  " | RunnablePassthrough.assign(context=itemgetter(\"context\"))\n",
 
 
 
988
  " | {\"response\": rag_prompt | llm, \"context\": itemgetter(\"context\")}\n",
989
  ")\n",
990
  "\n",
991
+ "result = rag_qa_chain.invoke({\"question\" : \"Is AI a threat to humanity?\"})\n",
992
  "print(result)"
993
  ]
994
  },
 
1002
  "contexts = []\n",
1003
  "\n",
1004
  "for question in test_questions:\n",
1005
+ " response = rag_qa_chain.invoke({\"question\" : question})\n",
1006
  " answers.append(response[\"response\"].content)\n",
1007
  " contexts.append([context.page_content for context in response[\"context\"]])"
1008
  ]