AdarshHF3115 commited on
Commit
36e9c57
·
verified ·
1 Parent(s): 9b1aae8
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. 1.png +3 -0
  3. 2.png +0 -0
  4. 3.png +0 -0
  5. app.py +326 -0
  6. requirements.txt +16 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ 1.png filter=lfs diff=lfs merge=lfs -text
1.png ADDED

Git LFS Details

  • SHA256: 7b904161bcf028fe9f2f534ca897b5ab4b4a1d9aa1505d82d716d14d8bf7ef64
  • Pointer size: 131 Bytes
  • Size of remote file: 206 kB
2.png ADDED
3.png ADDED
app.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import time
4
+ from typing import List, Tuple, Optional
5
+ from pathlib import Path
6
+ from threading import Thread
7
+ from langchain_community.vectorstores import FAISS
8
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader
9
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ from langchain_community.embeddings import HuggingFaceEmbeddings
12
+ from langchain_community.llms import HuggingFacePipeline
13
+ from langchain.memory import ConversationBufferMemory
14
+ from langchain.docstore.document import Document
15
+ from transformers import (
16
+ AutoModelForCausalLM,
17
+ AutoTokenizer,
18
+ pipeline,
19
+ BitsAndBytesConfig,
20
+ StoppingCriteria,
21
+ StoppingCriteriaList,
22
+ )
23
+ import torch
24
+
25
+ EMBEDDING_MODEL = "BAAI/bge-m3"
26
+ MODEL_NAME = "agentica-org/DeepScaleR-1.5B-Preview"
27
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
28
+ MAX_CONTEXT_LENGTH = 8192
29
+
30
+ bnb_config = (
31
+ BitsAndBytesConfig(
32
+ load_in_4bit=True,
33
+ bnb_4bit_use_double_quant=True,
34
+ bnb_4bit_quant_type="nf4",
35
+ bnb_4bit_compute_dtype=torch.float16,
36
+ )
37
+ if DEVICE == "cuda"
38
+ else None
39
+ )
40
+
41
+
42
+ class StopOnTokens(StoppingCriteria):
43
+ def __call__(
44
+ self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs
45
+ ) -> bool:
46
+ stop_ids = [0]
47
+ return input_ids[0][-1] in stop_ids
48
+
49
+
50
+ def validate_file_paths(file_paths: List[str]) -> List[str]:
51
+ valid_paths = []
52
+ for path in file_paths:
53
+ try:
54
+ if Path(path).exists() and Path(path).suffix.lower() in [".pdf", ".txt"]:
55
+ valid_paths.append(path)
56
+ except (OSError, PermissionError) as e:
57
+ print(f"File validation error: {str(e)}")
58
+ return valid_paths
59
+
60
+
61
+ def load_documents(file_paths: List[str]) -> List[Document]:
62
+ documents = []
63
+ valid_paths = validate_file_paths(file_paths)
64
+
65
+ if not valid_paths:
66
+ raise ValueError("No valid PDF/TXT files found!")
67
+
68
+ for path in valid_paths:
69
+ try:
70
+ if path.endswith(".pdf"):
71
+ loader = PyPDFLoader(path)
72
+ elif path.endswith(".txt"):
73
+ loader = TextLoader(path)
74
+ docs = loader.load()
75
+ if docs:
76
+ documents.extend(docs)
77
+ except Exception as e:
78
+ print(f"Error loading {Path(path).name}: {str(e)}")
79
+
80
+ if not documents:
81
+ raise ValueError("All documents failed to load.")
82
+
83
+ text_splitter = RecursiveCharacterTextSplitter(
84
+ chunk_size=1024,
85
+ chunk_overlap=128,
86
+ length_function=len,
87
+ add_start_index=True,
88
+ separators=["\n\n", "\n", "。", " ", ""],
89
+ )
90
+ return text_splitter.split_documents(documents)
91
+
92
+
93
+ def create_vector_store(documents: List[Document]) -> FAISS:
94
+ if not documents:
95
+ raise ValueError("No documents to index.")
96
+
97
+ embeddings = HuggingFaceEmbeddings(
98
+ model_name=EMBEDDING_MODEL,
99
+ model_kwargs={"device": DEVICE},
100
+ encode_kwargs={"normalize_embeddings": True},
101
+ )
102
+
103
+ return FAISS.from_documents(documents, embeddings)
104
+
105
+
106
+ def initialize_deepseek_model(
107
+ vector_store: FAISS,
108
+ temperature: float = 0.7,
109
+ max_new_tokens: int = 1024,
110
+ top_k: int = 50,
111
+ ) -> ConversationalRetrievalChain:
112
+ try:
113
+ tokenizer = AutoTokenizer.from_pretrained(
114
+ MODEL_NAME, use_fast=True, trust_remote_code=True
115
+ )
116
+
117
+ torch_dtype = torch.float16 if DEVICE == "cuda" else torch.float32
118
+
119
+ model = AutoModelForCausalLM.from_pretrained(
120
+ MODEL_NAME,
121
+ quantization_config=bnb_config,
122
+ device_map="auto" if DEVICE == "cuda" else None,
123
+ torch_dtype=torch_dtype,
124
+ trust_remote_code=True,
125
+ )
126
+
127
+ text_pipeline = pipeline(
128
+ "text-generation",
129
+ model=model,
130
+ tokenizer=tokenizer,
131
+ temperature=temperature,
132
+ max_new_tokens=max_new_tokens,
133
+ top_k=top_k,
134
+ repetition_penalty=1.1,
135
+ stopping_criteria=StoppingCriteriaList([StopOnTokens()]),
136
+ batch_size=1,
137
+ return_full_text=False,
138
+ )
139
+
140
+ llm = HuggingFacePipeline(
141
+ pipeline=text_pipeline, model_kwargs={"temperature": temperature}
142
+ )
143
+
144
+ memory = ConversationBufferMemory(
145
+ memory_key="chat_history",
146
+ return_messages=True,
147
+ output_key="answer",
148
+ input_key="question",
149
+ )
150
+
151
+ return ConversationalRetrievalChain.from_llm(
152
+ llm=llm,
153
+ retriever=vector_store.as_retriever(
154
+ search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10}
155
+ ),
156
+ memory=memory,
157
+ chain_type="stuff",
158
+ return_source_documents=True,
159
+ verbose=False,
160
+ max_tokens_limit=MAX_CONTEXT_LENGTH,
161
+ )
162
+
163
+ except Exception as e:
164
+ raise RuntimeError(f"Model initialization failed: {str(e)}")
165
+
166
+
167
+ def format_sources(source_docs: List[Document]) -> List[Tuple[str, int]]:
168
+ sources = []
169
+ try:
170
+ for doc in source_docs[:3]:
171
+ content = doc.page_content.strip()[:500] + "..."
172
+ page = doc.metadata.get("page", 0) + 1
173
+ sources.append((content, page))
174
+ while len(sources) < 3:
175
+ sources.append(("No source found", 0))
176
+ except Exception:
177
+ return [("Source processing error", 0)] * 3
178
+ return sources
179
+
180
+
181
+ def handle_conversation(
182
+ qa_chain: Optional[ConversationalRetrievalChain],
183
+ message: str,
184
+ history: List[Tuple[str, str]],
185
+ ) -> Tuple:
186
+ start_time = time.time()
187
+
188
+ if not qa_chain:
189
+ return None, "", history, *[("System Error", 0)] * 3
190
+
191
+ try:
192
+ response = qa_chain.invoke({"question": message, "chat_history": history})
193
+ answer = response["answer"].strip()
194
+ sources = format_sources(response.get("source_documents", []))
195
+
196
+ new_history = history + [(message, answer)]
197
+ elapsed = f"{(time.time() - start_time):.2f}s"
198
+ print(f"Response generated in {elapsed}")
199
+
200
+ return (
201
+ qa_chain,
202
+ "",
203
+ new_history,
204
+ *[item for sublist in sources for item in sublist],
205
+ )
206
+ except Exception as e:
207
+ error_msg = f"⚠️ Error: {str(e)}"
208
+ return qa_chain, "", history + [(message, error_msg)], *[("Error", 0)] * 3
209
+
210
+
211
+ def create_interface() -> gr.Blocks:
212
+ with gr.Blocks(theme=gr.themes.Default()) as interface:
213
+ qa_chain = gr.State()
214
+ vector_store = gr.State()
215
+
216
+ gr.Markdown(
217
+ """
218
+ <h1 style="text-align:center; color: #ooffff;">
219
+ DeepScale R1
220
+ </h1>
221
+ <p style="text-align:center; color: #008080;">
222
+ A Safe and Strong Local RAG System by Adarsh Pandey !!
223
+ </p>
224
+ """,
225
+ elem_id="header-section",
226
+ )
227
+
228
+ with gr.Row():
229
+ with gr.Column(scale=1, min_width=300):
230
+ gr.Markdown("### Step 1: Document Processing")
231
+ file_input = gr.Files(
232
+ file_types=[".pdf", ".txt"], file_count="multiple"
233
+ )
234
+ process_btn = gr.Button("Process Documents", variant="primary")
235
+ process_status = gr.Textbox(label="Status", interactive=False)
236
+
237
+ gr.Markdown("### Step 2: Model Configuration")
238
+ with gr.Accordion("Advanced Parameters", open=False):
239
+ temp_slider = gr.Slider(
240
+ minimum=0.1,
241
+ maximum=1.0,
242
+ value=0.7,
243
+ step=0.1,
244
+ label="Temperature",
245
+ )
246
+ token_slider = gr.Slider(
247
+ minimum=256,
248
+ maximum=4096,
249
+ value=1024,
250
+ step=128,
251
+ label="Response Length",
252
+ )
253
+ topk_slider = gr.Slider(
254
+ minimum=1, maximum=100, value=50, step=5, label="Top-K Sampling"
255
+ )
256
+ init_btn = gr.Button("Initialize Model", variant="primary")
257
+ model_status = gr.Textbox(label="Model Status", interactive=False)
258
+
259
+ with gr.Column(scale=1, min_width=500):
260
+ chatbot = gr.Chatbot(
261
+ label="Conversation History",
262
+ height=450,
263
+ avatar_images=["2.png", "3.png"],
264
+ )
265
+ msg_input = gr.Textbox(
266
+ label="Your Query",
267
+ placeholder="Ask a question about your documents...",
268
+ )
269
+ with gr.Row():
270
+ submit_btn = gr.Button("Submit", variant="primary")
271
+ clear_btn = gr.ClearButton([msg_input, chatbot], value="Clear Chat")
272
+
273
+ with gr.Accordion("Source References", open=True):
274
+ for i in range(3):
275
+ with gr.Row():
276
+ gr.Textbox(
277
+ label=f"Reference {i+1}", max_lines=4, interactive=False
278
+ )
279
+ gr.Number(label="Page", value=0, interactive=False)
280
+
281
+ process_btn.click(
282
+ fn=lambda files: (
283
+ create_vector_store(load_documents([f.name for f in files])),
284
+ "Documents processed successfully.",
285
+ ),
286
+ inputs=file_input,
287
+ outputs=[vector_store, process_status],
288
+ api_name="process_docs",
289
+ )
290
+
291
+ init_btn.click(
292
+ fn=lambda vs, temp, tokens, k: (
293
+ initialize_deepseek_model(vs, temp, tokens, k),
294
+ "Model initialized successfully.",
295
+ ),
296
+ inputs=[vector_store, temp_slider, token_slider, topk_slider],
297
+ outputs=[qa_chain, model_status],
298
+ api_name="init_model",
299
+ )
300
+
301
+ msg_input.submit(
302
+ fn=handle_conversation,
303
+ inputs=[qa_chain, msg_input, chatbot],
304
+ outputs=[qa_chain, msg_input, chatbot, *(gr.Textbox(), gr.Number()) * 3],
305
+ api_name="chat",
306
+ )
307
+
308
+ submit_btn.click(
309
+ fn=handle_conversation,
310
+ inputs=[qa_chain, msg_input, chatbot],
311
+ outputs=[qa_chain, msg_input, chatbot, *(gr.Textbox(), gr.Number()) * 3],
312
+ api_name="chat",
313
+ )
314
+
315
+ return interface
316
+
317
+
318
+ if __name__ == "__main__":
319
+ app = create_interface()
320
+ app.launch(
321
+ server_name="0.0.0.0" if os.getenv("DOCKER") else "localhost",
322
+ server_port=7860,
323
+ show_error=True,
324
+ share=True,
325
+ favicon_path="1.png",
326
+ )
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch # ⚠️ If you have a GPU, comment out this line and uncomment the GPU-enabled PyTorch line below
3
+ torchvision # ⚠️ If you have a GPU, comment out this line and uncomment the GPU-enabled PyTorch line below
4
+ torchaudio # ⚠️ If you have a GPU, comment out this line and uncomment the GPU-enabled PyTorch line below
5
+ transformers
6
+ accelerate
7
+ faiss-cpu
8
+ pypdf
9
+ tqdm
10
+ sentence-transformers
11
+ langchain
12
+ langchain-community
13
+ langchain-text-splitters
14
+ bitsandbytes
15
+ # 🔹 GPU Users: Uncomment the line below & comment the three torch lines above
16
+ # torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126