rgenius commited on
Commit
c6dc868
1 Parent(s): f7e8599

Upload app_vGPU.py

Browse files
Files changed (1) hide show
  1. app_vGPU.py +371 -0
app_vGPU.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from uuid import uuid4
4
+ from huggingface_hub import snapshot_download
5
+ from langchain.document_loaders import (
6
+ CSVLoader,
7
+ EverNoteLoader,
8
+ PDFMinerLoader,
9
+ TextLoader,
10
+ UnstructuredEmailLoader,
11
+ UnstructuredEPubLoader,
12
+ UnstructuredHTMLLoader,
13
+ UnstructuredMarkdownLoader,
14
+ UnstructuredODTLoader,
15
+ UnstructuredPowerPointLoader,
16
+ UnstructuredWordDocumentLoader,
17
+ )
18
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
19
+ from langchain.vectorstores import Chroma
20
+ from langchain.embeddings import HuggingFaceEmbeddings
21
+ from langchain.docstore.document import Document
22
+ from chromadb.config import Settings
23
+ from llama_cpp import Llama
24
+ from langchain.llms import LlamaCpp
25
+
26
+
27
+ TITLE = 'ЛисумGPT'
28
+ FAVICON_PATH = 'https://space-course.ru/wp-content/uploads/2023/06/Fox_logo_512-2.png'
29
+ SYSTEM_PROMPT = "Ты — Лисум, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
30
+ SYSTEM_TOKEN = 1788
31
+ USER_TOKEN = 1404
32
+ BOT_TOKEN = 9225
33
+ LINEBREAK_TOKEN = 13
34
+
35
+ ROLE_TOKENS = {
36
+ "user": USER_TOKEN,
37
+ "bot": BOT_TOKEN,
38
+ "system": SYSTEM_TOKEN
39
+ }
40
+
41
+ LOADER_MAPPING = {
42
+ ".csv": (CSVLoader, {}),
43
+ ".doc": (UnstructuredWordDocumentLoader, {}),
44
+ ".docx": (UnstructuredWordDocumentLoader, {}),
45
+ ".enex": (EverNoteLoader, {}),
46
+ ".epub": (UnstructuredEPubLoader, {}),
47
+ ".html": (UnstructuredHTMLLoader, {}),
48
+ ".md": (UnstructuredMarkdownLoader, {}),
49
+ ".odt": (UnstructuredODTLoader, {}),
50
+ ".pdf": (PDFMinerLoader, {}),
51
+ ".ppt": (UnstructuredPowerPointLoader, {}),
52
+ ".pptx": (UnstructuredPowerPointLoader, {}),
53
+ ".txt": (TextLoader, {"encoding": "utf8"}),
54
+ }
55
+
56
+
57
+ repo_name = "IlyaGusev/saiga_13b_lora_llamacpp"
58
+ #model_name = "ggml-model-q4_1.bin"
59
+ model_name = "ggml-model-q8_0.bin"
60
+ embedder_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
61
+
62
+ snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
63
+
64
+ n_gpu_layers = 25
65
+ n_batch = 512
66
+
67
+ model = Llama(
68
+ model_path=model_name,
69
+ n_ctx=2000,
70
+ n_parts=1,
71
+ n_gpu_layers=n_gpu_layers,
72
+ n_batch=n_batch,
73
+ verbose=True,
74
+ )
75
+
76
+
77
+ max_new_tokens = 1500
78
+ embeddings = HuggingFaceEmbeddings(model_name=embedder_name)
79
+
80
+ def get_uuid():
81
+ return str(uuid4())
82
+
83
+
84
+ def load_single_document(file_path: str) -> Document:
85
+ ext = "." + file_path.rsplit(".", 1)[-1]
86
+ assert ext in LOADER_MAPPING
87
+ loader_class, loader_args = LOADER_MAPPING[ext]
88
+ loader = loader_class(file_path, **loader_args)
89
+ return loader.load()[0]
90
+
91
+
92
+ def get_message_tokens(model, role, content):
93
+ message_tokens = model.tokenize(content.encode("utf-8"))
94
+ message_tokens.insert(1, ROLE_TOKENS[role])
95
+ message_tokens.insert(2, LINEBREAK_TOKEN)
96
+ message_tokens.append(model.token_eos())
97
+ return message_tokens
98
+
99
+
100
+ def get_system_tokens(model):
101
+ system_message = {"role": "system", "content": SYSTEM_PROMPT}
102
+ return get_message_tokens(model, **system_message)
103
+
104
+
105
+ def upload_files(files, file_paths):
106
+ file_paths = [f.name for f in files]
107
+ return file_paths
108
+
109
+
110
+ def process_text(text):
111
+ lines = text.split("\n")
112
+ lines = [line for line in lines if len(line.strip()) > 2]
113
+ text = "\n".join(lines).strip()
114
+ if len(text) < 10:
115
+ return None
116
+ return text
117
+
118
+
119
+ def build_index(file_paths, db, chunk_size, chunk_overlap, file_warning):
120
+ documents = [load_single_document(path) for path in file_paths]
121
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
122
+ documents = text_splitter.split_documents(documents)
123
+ fixed_documents = []
124
+ for doc in documents:
125
+ doc.page_content = process_text(doc.page_content)
126
+ if not doc.page_content:
127
+ continue
128
+ fixed_documents.append(doc)
129
+
130
+ db = Chroma.from_documents(
131
+ fixed_documents,
132
+ embeddings,
133
+ client_settings=Settings(
134
+ anonymized_telemetry=False
135
+ )
136
+ )
137
+ file_warning = f"Загружено {len(fixed_documents)} фрагментов! Можно задавать вопросы."
138
+ return db, file_warning
139
+
140
+
141
+ def user(message, history, system_prompt):
142
+ new_history = history + [[message, None]]
143
+ return "", new_history
144
+
145
+
146
+ def retrieve(history, db, retrieved_docs, k_documents):
147
+ context = ""
148
+ if db:
149
+ last_user_message = history[-1][0]
150
+ retriever = db.as_retriever(search_kwargs={"k": k_documents})
151
+ docs = retriever.get_relevant_documents(last_user_message)
152
+ retrieved_docs = "\n\n".join([doc.page_content for doc in docs])
153
+ return retrieved_docs
154
+
155
+
156
+ def bot(history, system_prompt, conversation_id, retrieved_docs, top_p, top_k, temp):
157
+ if not history:
158
+ return
159
+
160
+ tokens = get_system_tokens(model)[:]
161
+ tokens.append(LINEBREAK_TOKEN)
162
+
163
+ for user_message, bot_message in history[:-1]:
164
+ message_tokens = get_message_tokens(model=model, role="user", content=user_message)
165
+ tokens.extend(message_tokens)
166
+ if bot_message:
167
+ message_tokens = get_message_tokens(model=model, role="bot", content=bot_message)
168
+ tokens.extend(message_tokens)
169
+
170
+ last_user_message = history[-1][0]
171
+ if retrieved_docs:
172
+ last_user_message = f"Контекст: {retrieved_docs}\n\nИспользуя наиболее подходящие фрагменты контекста, ответь на вопрос: {last_user_message}"
173
+ message_tokens = get_message_tokens(model=model, role="user", content=last_user_message)
174
+ tokens.extend(message_tokens)
175
+
176
+ role_tokens = [model.token_bos(), BOT_TOKEN, LINEBREAK_TOKEN]
177
+ tokens.extend(role_tokens)
178
+ generator = model.generate(
179
+ tokens,
180
+ top_k=top_k,
181
+ top_p=top_p,
182
+ temp=temp
183
+ )
184
+
185
+ partial_text = ""
186
+ for i, token in enumerate(generator):
187
+ if token == model.token_eos() or (max_new_tokens is not None and i >= max_new_tokens):
188
+ break
189
+ partial_text += model.detokenize([token]).decode("utf-8", "ignore")
190
+ history[-1][1] = partial_text
191
+ yield history
192
+
193
+
194
+ with gr.Blocks(theme=gr.themes.Soft(), title=TITLE, css="footer {visibility: hidden}") as demo:
195
+ db = gr.State(None)
196
+ conversation_id = gr.State(get_uuid)
197
+ favicon = f'<img src="{FAVICON_PATH}" width="48px" style="display: inline">'
198
+ gr.Markdown(
199
+ f"""<h1><center>{favicon} Я Лисум, текстовый ассистент на основе GPT</center></h1>
200
+ <p>Я быстро учусь новому. Просто загрузи свои файлы и задавай любые вопросы.</p>
201
+ """
202
+ )
203
+
204
+ with gr.Row():
205
+ with gr.Column(scale=5):
206
+ file_output = gr.File(file_count="multiple", label="Загрузка файлов")
207
+ file_paths = gr.State([])
208
+ file_warning = gr.Markdown(f"Фрагменты ещё не загружены!")
209
+
210
+ with gr.Column(visible=False, min_width=200, scale=3):
211
+ with gr.Tab(label="Параметры нарезки"):
212
+ chunk_size = gr.Slider(
213
+ minimum=50,
214
+ maximum=2000,
215
+ value=1000,
216
+ step=50,
217
+ interactive=True,
218
+ label="Размер фрагментов",
219
+ )
220
+ chunk_overlap = gr.Slider(
221
+ minimum=0,
222
+ maximum=500,
223
+ value=100,
224
+ step=10,
225
+ interactive=True,
226
+ label="Пересечение"
227
+ )
228
+
229
+
230
+ with gr.Row(visible=False):
231
+ k_documents = gr.Slider(
232
+ minimum=1,
233
+ maximum=10,
234
+ value=3,
235
+ step=1,
236
+ interactive=True,
237
+ label="Кол-во фрагментов для контекста"
238
+ )
239
+ with gr.Row():
240
+ retrieved_docs = gr.Textbox(
241
+ lines=6,
242
+ label="Извлеченные фрагменты",
243
+ placeholder="Появятся после загрузки файлов и отправки сообщений",
244
+ interactive=False
245
+ )
246
+ with gr.Row():
247
+ with gr.Column(scale=5):
248
+ system_prompt = gr.Textbox(visible=False, label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
249
+ chatbot = gr.Chatbot(label="Диалог").style(height=400)
250
+ with gr.Column(visible=False, min_width=80, scale=1):
251
+ with gr.Tab(label="Параметры генерации"):
252
+ top_p = gr.Slider(
253
+ minimum=0.0,
254
+ maximum=1.0,
255
+ value=0.9,
256
+ step=0.05,
257
+ interactive=True,
258
+ label="Top-p",
259
+ )
260
+ top_k = gr.Slider(
261
+ minimum=10,
262
+ maximum=100,
263
+ value=30,
264
+ step=5,
265
+ interactive=True,
266
+ label="Top-k",
267
+ )
268
+ temp = gr.Slider(
269
+ minimum=0.0,
270
+ maximum=2.0,
271
+ value=0.1,
272
+ step=0.1,
273
+ interactive=True,
274
+ label="Temp"
275
+ )
276
+
277
+ with gr.Row():
278
+ with gr.Column():
279
+ msg = gr.Textbox(
280
+ label="Отправить сообщение",
281
+ placeholder="Отправить сообщение",
282
+ show_label=False,
283
+ ).style(container=False)
284
+ with gr.Column():
285
+ with gr.Row():
286
+ submit = gr.Button("Отправить")
287
+ stop = gr.Button("Остановить")
288
+ clear = gr.Button("Очистить")
289
+
290
+ # Upload files
291
+ upload_event = file_output.change(
292
+ fn=upload_files,
293
+ inputs=[file_output, file_paths],
294
+ outputs=[file_paths],
295
+ queue=True,
296
+ ).success(
297
+ fn=build_index,
298
+ inputs=[file_paths, db, chunk_size, chunk_overlap, file_warning],
299
+ outputs=[db, file_warning],
300
+ queue=True
301
+ )
302
+
303
+ # Pressing Enter
304
+ submit_event = msg.submit(
305
+ fn=user,
306
+ inputs=[msg, chatbot, system_prompt],
307
+ outputs=[msg, chatbot],
308
+ queue=False,
309
+ ).success(
310
+ fn=retrieve,
311
+ inputs=[chatbot, db, retrieved_docs, k_documents],
312
+ outputs=[retrieved_docs],
313
+ queue=True,
314
+ ).success(
315
+ fn=bot,
316
+ inputs=[
317
+ chatbot,
318
+ system_prompt,
319
+ conversation_id,
320
+ retrieved_docs,
321
+ top_p,
322
+ top_k,
323
+ temp
324
+ ],
325
+ outputs=chatbot,
326
+ queue=True,
327
+ )
328
+
329
+ # Pressing the button
330
+ submit_click_event = submit.click(
331
+ fn=user,
332
+ inputs=[msg, chatbot, system_prompt],
333
+ outputs=[msg, chatbot],
334
+ queue=False,
335
+ ).success(
336
+ fn=retrieve,
337
+ inputs=[chatbot, db, retrieved_docs, k_documents],
338
+ outputs=[retrieved_docs],
339
+ queue=True,
340
+ ).success(
341
+ fn=bot,
342
+ inputs=[
343
+ chatbot,
344
+ system_prompt,
345
+ conversation_id,
346
+ retrieved_docs,
347
+ top_p,
348
+ top_k,
349
+ temp
350
+ ],
351
+ outputs=chatbot,
352
+ queue=True,
353
+ )
354
+
355
+ # Stop generation
356
+ stop.click(
357
+ fn=None,
358
+ inputs=None,
359
+ outputs=None,
360
+ cancels=[submit_event, submit_click_event],
361
+ queue=False,
362
+ )
363
+
364
+ # Clear history
365
+ clear.click(lambda: None, None, chatbot, queue=False)
366
+
367
+ demo.queue(max_size=128, concurrency_count=1)
368
+ demo.launch(
369
+ server_name="0.0.0.0",
370
+ # ssl_verify=False,
371
+ )