Spaces:

danicafisher
/

implications-of-AI

Sleeping

App Files Files Community

danicafisher commited on Sep 19

Commit

036f779

•

1 Parent(s): 412d04a

Updates requirements

Browse files

Files changed (3) hide show

app.py +21 -9
requirements.txt +2 -1
synthetic_data_generation.ipynb +4 -12

app.py CHANGED Viewed

@@ -14,34 +14,46 @@ import chainlit as cl
 import asyncio
 import nest_asyncio
 nest_asyncio.apply()
 from langchain_community.document_loaders import PyMuPDFLoader
 filepath_NIST = "data/NIST.AI.600-1.pdf"
 filepath_Blueprint = "data/Blueprint-for-an-AI-Bill-of-Rights.pdf"
 documents_NIST = PyMuPDFLoader(filepath_NIST).load()
 documents_Blueprint = PyMuPDFLoader(filepath_Blueprint).load()
 # pdf_loader_NIST = PDFFileLoader("data/NIST.AI.600-1.pdf")
 # pdf_loader_Blueprint = PDFFileLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf")
 # documents_NIST = pdf_loader_NIST.load_documents()
 # documents_Blueprint = pdf_loader_Blueprint.load_documents()
-text_splitter = CharacterTextSplitter()
-split_documents_NIST = text_splitter.split_texts(documents_NIST)
-split_documents_Blueprint = text_splitter.split_texts(documents_Blueprint)
-RAG_PROMPT_TEMPLATE = """ \
-Use the provided context to answer the user's query.
-You may not answer the user's query unless there is specific context in the following text.
-If you do not know the answer, or cannot answer, please respond with "I don't know".
 """
-rag_prompt = SystemRolePrompt(RAG_PROMPT_TEMPLATE)
 USER_PROMPT_TEMPLATE = """ \
 Context:

 import asyncio
 import nest_asyncio
 nest_asyncio.apply()
+import langchain_community
 from langchain_community.document_loaders import PyMuPDFLoader
+import langchain
+from langchain.prompts import ChatPromptTemplate
 filepath_NIST = "data/NIST.AI.600-1.pdf"
 filepath_Blueprint = "data/Blueprint-for-an-AI-Bill-of-Rights.pdf"
 documents_NIST = PyMuPDFLoader(filepath_NIST).load()
 documents_Blueprint = PyMuPDFLoader(filepath_Blueprint).load()
+documents = documents_NIST + documents_Blueprint
 # pdf_loader_NIST = PDFFileLoader("data/NIST.AI.600-1.pdf")
 # pdf_loader_Blueprint = PDFFileLoader("data/Blueprint-for-an-AI-Bill-of-Rights.pdf")
 # documents_NIST = pdf_loader_NIST.load_documents()
 # documents_Blueprint = pdf_loader_Blueprint.load_documents()
+# text_splitter = CharacterTextSplitter()
+# split_documents_NIST = text_splitter.split_texts(documents_NIST)
+# split_documents_Blueprint = text_splitter.split_texts(documents_Blueprint)
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size = 500,
+    chunk_overlap = 50
+)
+rag_documents = text_splitter.split_documents(documents)
+RAG_PROMPT = """\
+Given a provided context and question, you must answer the question based only on context.
+If you cannot answer the question based on the context - you must say "I don't know".
+Context: {context}
+Question: {question}
 """
+rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
 USER_PROMPT_TEMPLATE = """ \
 Context:

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ numpy
 chainlit==0.7.700
 openai
 PyPDF2
-langchain_community

 chainlit==0.7.700
 openai
 PyPDF2
+langchain_community
+langchain

synthetic_data_generation.ipynb CHANGED Viewed

@@ -865,7 +865,7 @@
     "\n",
     "generator_llm = ChatOpenAI(model=\"gpt-3.5-turbo\")\n",
     "critic_llm = ChatOpenAI(model=\"gpt-4o-mini\", tags=[\"base_llm\"]) \n",
-    "embeddings = OpenAIEmbeddings()\n",
     "\n",
     "generator = TestsetGenerator.from_langchain(\n",
     "    generator_llm,\n",
@@ -982,21 +982,13 @@
     "    | rag_prompt | llm | StrOutputParser()\n",
     ")\n",
     "\n",
-    "retrieval_augmented_qa_chain = (\n",
-    "    # INVOKE CHAIN WITH: {\"question\" : \"<<SOME USER QUESTION>>\"}\n",
-    "    # \"question\" : populated by getting the value of the \"question\" key\n",
-    "    # \"context\"  : populated by getting the value of the \"question\" key and chaining it into the base_retriever\n",
     "    {\"context\": itemgetter(\"question\") | retriever, \"question\": itemgetter(\"question\")}\n",
-    "    # \"context\"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)\n",
-    "    #              by getting the value of the \"context\" key from the previous step\n",
     "    | RunnablePassthrough.assign(context=itemgetter(\"context\"))\n",
-    "    # \"response\" : the \"context\" and \"question\" values are used to format our prompt object and then piped\n",
-    "    #              into the LLM and stored in a key called \"response\"\n",
-    "    # \"context\"  : populated by getting the value of the \"context\" key from the previous step\n",
     "    | {\"response\": rag_prompt | llm, \"context\": itemgetter(\"context\")}\n",
     ")\n",
     "\n",
-    "result = retrieval_augmented_qa_chain.invoke({\"question\" : \"Is AI a threat to humanity?\"})\n",
     "print(result)"
    ]
   },
@@ -1010,7 +1002,7 @@
     "contexts = []\n",
     "\n",
     "for question in test_questions:\n",
-    "  response = retrieval_augmented_qa_chain.invoke({\"question\" : question})\n",
     "  answers.append(response[\"response\"].content)\n",
     "  contexts.append([context.page_content for context in response[\"context\"]])"
    ]

     "\n",
     "generator_llm = ChatOpenAI(model=\"gpt-3.5-turbo\")\n",
     "critic_llm = ChatOpenAI(model=\"gpt-4o-mini\", tags=[\"base_llm\"]) \n",
+    "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n",
     "\n",
     "generator = TestsetGenerator.from_langchain(\n",
     "    generator_llm,\n",
     "    | rag_prompt | llm | StrOutputParser()\n",
     ")\n",
     "\n",
+    "rag_qa_chain = (\n",
     "    {\"context\": itemgetter(\"question\") | retriever, \"question\": itemgetter(\"question\")}\n",
     "    | RunnablePassthrough.assign(context=itemgetter(\"context\"))\n",
     "    | {\"response\": rag_prompt | llm, \"context\": itemgetter(\"context\")}\n",
     ")\n",
     "\n",
+    "result = rag_qa_chain.invoke({\"question\" : \"Is AI a threat to humanity?\"})\n",
     "print(result)"
    ]
   },
     "contexts = []\n",
     "\n",
     "for question in test_questions:\n",
+    "  response = rag_qa_chain.invoke({\"question\" : question})\n",
     "  answers.append(response[\"response\"].content)\n",
     "  contexts.append([context.page_content for context in response[\"context\"]])"
    ]