Jawachan commited on
Commit
a1ece96
·
verified ·
1 Parent(s): 1d87e65

Adding the local app.py code to the repo

Browse files
Files changed (1) hide show
  1. app.py +258 -46
app.py CHANGED
@@ -1,63 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
27
 
28
- response = ""
 
 
 
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  temperature=temperature,
35
  top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
 
 
 
38
 
39
- response += token
40
- yield response
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
  )
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
1
+ from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader, Docx2txtLoader
2
+ from pathlib import Path
3
+ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
4
+ from langchain_community.vectorstores import Chroma
5
+ from itertools import combinations
6
+ import numpy as np
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain.chains import RetrievalQA
10
+
11
+ from langchain_community.llms import HuggingFaceEndpoint
12
  import gradio as gr
 
13
 
14
+ import os
15
+ from dotenv import load_dotenv
16
+ # from llama.api import HuggingFaceEndpoint
17
+ load_dotenv()
18
+
19
+
20
+ LOCAL_VECTOR_STORE_DIR = Path('./data')
21
+
22
+
23
+ def langchain_document_loader(TMP_DIR):
24
+ """
25
+ Load documents from the temporary directory (TMP_DIR).
26
+ Files can be in txt, pdf, CSV or docx format.
27
+ """
28
+
29
+ documents = []
30
+
31
+ # txt_loader = DirectoryLoader(
32
+ # TMP_DIR.as_posix(), glob="**/*.txt", loader_cls=TextLoader, show_progress=True
33
+ # )
34
+ # documents.extend(txt_loader.load())
35
+
36
+ pdf_loader = DirectoryLoader(
37
+ TMP_DIR.as_posix(), glob="**/*.pdf", loader_cls=PyPDFLoader, show_progress=True
38
+ )
39
+ documents.extend(pdf_loader.load())
40
+
41
+ # csv_loader = DirectoryLoader(
42
+ # TMP_DIR.as_posix(), glob="**/*.csv", loader_cls=CSVLoader, show_progress=True,
43
+ # loader_kwargs={"encoding":"utf8"}
44
+ # )
45
+ # documents.extend(csv_loader.load())
46
+
47
+ doc_loader = DirectoryLoader(
48
+ TMP_DIR.as_posix(),
49
+ glob="**/*.docx",
50
+ loader_cls=Docx2txtLoader,
51
+ show_progress=True,
52
+ )
53
+ documents.extend(doc_loader.load())
54
+ return documents
55
+
56
 
57
+ directory_path = 'course reviews'
58
+ TMP_DIR = Path(directory_path)
59
+ documents = langchain_document_loader(TMP_DIR)
60
 
61
+ HUGGING_FACE_API_KEY = os.getenv("HUGGING_FACE_API_KEY") # Using our secret API key from the .env file
62
+ def select_embedding_model():
63
+ # embedding = OllamaEmbeddings(model='nomic-embed-text')
64
+ embedding = HuggingFaceInferenceAPIEmbeddings(
65
+ api_key=HUGGING_FACE_API_KEY,
66
+ model_name="sentence-transformers/all-MiniLM-L6-v2" #This is the embedding model
67
+ )
68
+ return embedding
 
69
 
70
+ embeddings = select_embedding_model() # Calling the function to select the model
 
 
 
 
71
 
 
72
 
73
+ def create_vectorstore(embeddings,documents,vectorstore_name):
74
+ """Create a Chroma vector database."""
75
+ persist_directory = (LOCAL_VECTOR_STORE_DIR.as_posix() + "/" + vectorstore_name)
76
+ vector_store = Chroma.from_documents(
77
+ documents=documents,
78
+ embedding=embeddings,
79
+ persist_directory=persist_directory
80
+ )
81
+ return vector_store
82
 
83
+
84
+ create_vectorstores = True # change to True to create vectorstores
85
+
86
+ if create_vectorstores:
87
+ vector_store = create_vectorstore(embeddings,documents,"vector_store")
88
+ print("Vector store created")
89
+ print("")
90
+
91
+
92
+
93
+ vector_store = Chroma(persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/vector_store",
94
+ embedding_function=embeddings)
95
+ print("vector_store:",vector_store._collection.count(),"chunks.")
96
+
97
+
98
+ def Vectorstore_backed_retriever(vectorstore,search_type="mmr",k=6,score_threshold=None):
99
+ """create a vectorsore-backed retriever
100
+ Parameters:
101
+ search_type: Defines the type of search that the Retriever should perform.
102
+ Can be "similarity" (default), "mmr", or "similarity_score_threshold"
103
+ k: number of documents to return (Default: 4)
104
+ score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None)
105
+ """
106
+ search_kwargs={}
107
+ if k is not None:
108
+ search_kwargs['k'] = k
109
+ if score_threshold is not None:
110
+ search_kwargs['score_threshold'] = score_threshold
111
+
112
+ retriever = vectorstore.as_retriever(
113
+ search_type=search_type,
114
+ search_kwargs=search_kwargs
115
+ )
116
+ return retriever
117
+
118
+
119
+ # Similarity search
120
+ retriever = Vectorstore_backed_retriever(vector_store,search_type="similarity",k=4)
121
+
122
+
123
+
124
+ def instantiate_LLM(api_key,temperature=0.5,top_p=0.95,model_name=None):
125
+ """Instantiate LLM in Langchain.
126
+ Parameters:
127
+ LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"]
128
+ model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview",
129
+ "gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"].
130
+ api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token
131
+ temperature (float): Range: 0.0 - 1.0; default = 0.5
132
+ top_p (float): : Range: 0.0 - 1.0; default = 1.
133
+ """
134
+
135
+
136
+ llm = HuggingFaceEndpoint(
137
+ # repo_id = "openai-community/gpt2-large",
138
+ # repo_id = "google/gemma-2b-it",
139
+ repo_id="mistralai/Mistral-7B-Instruct-v0.2", # working
140
+ # repo_id = "NexaAIDev/Octopus-v4",
141
+ # repo_id="Snowflake/snowflake-arctic-instruct",
142
+ # repo_id="apple/OpenELM-3B-Instruct", # erros: remote trust something
143
+ # repo_id="meta-llama/Meta-Llama-3-8B-Instruct", # Takes too long
144
+ # repo_id="mistralai/Mixtral-8x22B-Instruct-v0.1", # RAM insufficient
145
+ # repo_id=model_name,
146
+ huggingfacehub_api_token=api_key,
147
+ # model_kwargs={
148
+ # "temperature":temperature,
149
+ # "top_p": top_p,
150
+ # "do_sample": True,
151
+ # "max_new_tokens":1024
152
+ # },
153
+ # model_kwargs={stop: "Human:", "stop_sequence": "Human:"},
154
+
155
+ stop_sequences = ["Human:"],
156
  temperature=temperature,
157
  top_p=top_p,
158
+ do_sample=True,
159
+ max_new_tokens=1024,
160
+ trust_remote_code=True
161
+ )
162
+ return llm
163
 
164
+ # get the API key from .env file
165
+ llm = instantiate_LLM(api_key=HUGGING_FACE_API_KEY)
166
 
167
+
168
+
169
+ def create_memory():
170
+ """Creates a ConversationSummaryBufferMemory for our model
171
+ Creates a ConversationBufferWindowMemory for our models."""
172
+
173
+ memory = ConversationBufferMemory(
174
+ memory_key="history",
175
+ input_key="question",
176
+ return_messages=True,
177
+ k=3
178
+ )
179
+
180
+ return memory
181
+
182
+ memory = create_memory()
183
+
184
+
185
+ memory.save_context(
186
+ {"question": "What can you do?"},
187
+ {"output": "I can answer queries based on the past reviews and course outlines of various courses offered at LUMS."}
188
+ )
189
+
190
+ context_qa = """
191
+ You are a professional chatbot assistant for helping students at LUMS regarding course selection.
192
+
193
+ Please follow the following rules:
194
+
195
+ 1. Answer the question in your own words from the context given to you.
196
+ 2. If you don't know the answer, don't try to make up an answer.
197
+ 3. If you don't have a course's review or outline, just say that you do not know about this course.
198
+ 4. If a user enters a course code (e.g. ECON100 or CS370), match it with reviews with that course code. If the user enters a course name (e.g. Introduction to Economics or Database Systems), match it with reviews with that course name.
199
+ 5. If you do not have information of a course, do not make up a course or suggest courses from universities other than LUMS.
200
+
201
+ Context: {context}
202
+
203
+ You are having a converation with a student at LUMS.
204
+
205
+ Chat History: {history}
206
+
207
+ Human: {question}
208
+
209
+ Assistant:
210
  """
211
+
212
+ prompt = PromptTemplate(
213
+ input_variables=["history", "context", "question"],
214
+ template=context_qa
215
+ )
216
+
217
+
218
+ qa = RetrievalQA.from_chain_type(
219
+ llm=llm,
220
+ retriever=retriever,
221
+ verbose=False,
222
+ return_source_documents=False,
223
+ chain_type_kwargs={
224
+ "prompt": prompt,
225
+ "memory": memory
226
+ },
227
  )
228
 
229
 
230
+ # Global list to store chat history
231
+ chat_history = []
232
+
233
+ def print_documents(docs,search_with_score=False):
234
+ """helper function to print documents."""
235
+ if search_with_score:
236
+ # used for similarity_search_with_score
237
+ print(
238
+ f"\n{'-' * 100}\n".join(
239
+ [f"Document {i+1}:\n\n" + doc[0].page_content +"\n\nscore:"+str(round(doc[-1],3))+"\n"
240
+ for i, doc in enumerate(docs)]
241
+ )
242
+ )
243
+ else:
244
+ # used for similarity_search or max_marginal_relevance_search
245
+ print(
246
+ f"\n{'-' * 100}\n".join(
247
+ [f"Document {i+1}:\n\n" + doc.page_content
248
+ for i, doc in enumerate(docs)]
249
+ )
250
+ )
251
+
252
+ def rag_model(query):
253
+ # Your RAG model code here
254
+ result = qa({'query': query})
255
+
256
+ relevant_docs = retriever.get_relevant_documents(query)
257
+ print_documents(relevant_docs)
258
+ # Extract the answer from the result
259
+ answer = result['result']
260
+ # print(result)
261
+
262
+
263
+ # Append the query and answer to the chat history
264
+ chat_history.append(f'User: {query}\nAssistant: {answer}\n')
265
+
266
+ # Join the chat history into a string
267
+ chat_string = '\n'.join(chat_history)
268
+
269
+ return chat_string
270
+
271
+ # This is for Gradio interface
272
+ gradio_app = gr.Interface(fn=rag_model, inputs="text", outputs="text", title="RAGs to Riches", theme=gr.themes.Soft(), description="This is a RAG model that can answer queries based on the past reviews and course outlines of various courses offered at LUMS.")
273
+
274
  if __name__ == "__main__":
275
+ gradio_app.launch()