hackergeek commited on
Commit
98b95ab
·
verified ·
1 Parent(s): d25669e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -26
app.py CHANGED
@@ -1,4 +1,14 @@
1
  import os
 
 
 
 
 
 
 
 
 
 
2
  import torch
3
  import gradio as gr
4
  import faiss
@@ -6,32 +16,34 @@ import numpy as np
6
  from transformers import AutoTokenizer, AutoModelForCausalLM
7
  from sentence_transformers import SentenceTransformer
8
 
9
- # -------------------------------
10
- # 1. Model setup
11
- # -------------------------------
12
- GEN_MODEL = "hackergeek/qwen3-harrison-rag" # main generation model
13
- EMB_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # embedding model
14
 
15
  tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL)
16
  model = AutoModelForCausalLM.from_pretrained(
17
  GEN_MODEL,
 
18
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
19
- device_map="auto"
 
20
  )
21
 
22
- embedder = SentenceTransformer(EMB_MODEL)
23
 
24
- # -------------------------------
25
- # 2. Load and index docs
26
- # -------------------------------
27
  DOCS_DIR = "docs"
28
  os.makedirs(DOCS_DIR, exist_ok=True)
29
 
30
- # if no docs exist, create a small demo
31
  if not os.listdir(DOCS_DIR):
32
  with open(os.path.join(DOCS_DIR, "example.txt"), "w") as f:
33
  f.write(
34
- "HackerGeek Qwen3 Harrison RAG is a retrieval-augmented language model combining Qwen3 with document search."
35
  )
36
 
37
  docs = []
@@ -39,19 +51,19 @@ for fn in os.listdir(DOCS_DIR):
39
  with open(os.path.join(DOCS_DIR, fn), encoding="utf-8") as f:
40
  docs.append(f.read())
41
 
42
- # naive chunking
43
  chunks = []
44
  for doc in docs:
45
  for i in range(0, len(doc), 500):
46
  chunks.append(doc[i:i+500])
47
 
48
- embs = embedder.encode(chunks, convert_to_numpy=True)
49
  index = faiss.IndexFlatL2(embs.shape[1])
50
  index.add(embs)
51
 
52
- # -------------------------------
53
- # 3. Chat logic
54
- # -------------------------------
55
  def retrieve_context(query, k=5):
56
  q_emb = embedder.encode([query], convert_to_numpy=True)
57
  D, I = index.search(q_emb, k)
@@ -60,7 +72,7 @@ def retrieve_context(query, k=5):
60
  def generate_response(query, history):
61
  context = retrieve_context(query)
62
  system_prompt = (
63
- "You are a helpful assistant that answers based on the retrieved context.\n\n"
64
  f"Context:\n{context}\n\n"
65
  f"User: {query}\nAssistant:"
66
  )
@@ -74,11 +86,16 @@ def chat_fn(user_message, history):
74
  history = history + [(user_message, response)]
75
  return history, history
76
 
77
- # -------------------------------
78
- # 4. Gradio UI
79
- # -------------------------------
80
- with gr.Blocks(title="Qwen3 Harrison RAG Chatbot") as demo:
81
- gr.Markdown("# 🤖 Qwen3 Harrison RAG Chatbot\nAsk me anything based on retrieved context!")
 
 
 
 
 
82
  chatbot = gr.Chatbot(height=400)
83
  with gr.Row():
84
  msg = gr.Textbox(placeholder="Type your message here...", scale=4)
@@ -87,8 +104,8 @@ with gr.Blocks(title="Qwen3 Harrison RAG Chatbot") as demo:
87
  msg.submit(chat_fn, [msg, chatbot], [chatbot, chatbot])
88
  clear.click(lambda: None, None, chatbot, queue=False)
89
 
90
- # -------------------------------
91
- # 5. Launch for HF Spaces
92
- # -------------------------------
93
  if __name__ == "__main__":
94
  demo.launch()
 
1
  import os
2
+
3
+ # ======================================================
4
+ # OPTION A: Use ephemeral /tmp cache to avoid 50 GB quota
5
+ # ======================================================
6
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
7
+ os.environ["HF_HOME"] = "/tmp/hf_home"
8
+ os.environ["HF_DATASETS_CACHE"] = "/tmp/hf_datasets"
9
+ os.environ["HF_MODULES_CACHE"] = "/tmp/hf_modules"
10
+ # ======================================================
11
+
12
  import torch
13
  import gradio as gr
14
  import faiss
 
16
  from transformers import AutoTokenizer, AutoModelForCausalLM
17
  from sentence_transformers import SentenceTransformer
18
 
19
+ # ------------------------------------------------------
20
+ # 1️⃣ Model setup
21
+ # ------------------------------------------------------
22
+ GEN_MODEL = "hackergeek/qwen3-harrison-rag" # main generation model
23
+ EMB_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # embedding model (lightweight)
24
 
25
  tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL)
26
  model = AutoModelForCausalLM.from_pretrained(
27
  GEN_MODEL,
28
+ cache_dir="/tmp/hf_cache",
29
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
30
+ device_map="auto",
31
+ low_cpu_mem_usage=True
32
  )
33
 
34
+ embedder = SentenceTransformer(EMB_MODEL, cache_folder="/tmp/hf_cache")
35
 
36
+ # ------------------------------------------------------
37
+ # 2️⃣ Load and index documents
38
+ # ------------------------------------------------------
39
  DOCS_DIR = "docs"
40
  os.makedirs(DOCS_DIR, exist_ok=True)
41
 
42
+ # create a small demo doc if none exists
43
  if not os.listdir(DOCS_DIR):
44
  with open(os.path.join(DOCS_DIR, "example.txt"), "w") as f:
45
  f.write(
46
+ "Qwen3-Harrison-RAG combines the Qwen3 language model with retrieval-augmented generation for context-aware responses."
47
  )
48
 
49
  docs = []
 
51
  with open(os.path.join(DOCS_DIR, fn), encoding="utf-8") as f:
52
  docs.append(f.read())
53
 
54
+ # simple fixed-size chunking
55
  chunks = []
56
  for doc in docs:
57
  for i in range(0, len(doc), 500):
58
  chunks.append(doc[i:i+500])
59
 
60
+ embs = embedder.encode(chunks, convert_to_numpy=True, show_progress_bar=False)
61
  index = faiss.IndexFlatL2(embs.shape[1])
62
  index.add(embs)
63
 
64
+ # ------------------------------------------------------
65
+ # 3️⃣ Retrieval + generation logic
66
+ # ------------------------------------------------------
67
  def retrieve_context(query, k=5):
68
  q_emb = embedder.encode([query], convert_to_numpy=True)
69
  D, I = index.search(q_emb, k)
 
72
  def generate_response(query, history):
73
  context = retrieve_context(query)
74
  system_prompt = (
75
+ "You are a helpful assistant that uses the retrieved context to answer questions.\n\n"
76
  f"Context:\n{context}\n\n"
77
  f"User: {query}\nAssistant:"
78
  )
 
86
  history = history + [(user_message, response)]
87
  return history, history
88
 
89
+ # ------------------------------------------------------
90
+ # 4️⃣ Gradio UI
91
+ # ------------------------------------------------------
92
+ with gr.Blocks(title="Qwen3-Harrison-RAG Chatbot") as demo:
93
+ gr.Markdown(
94
+ """
95
+ # 🤖 Qwen3-Harrison-RAG Chatbot
96
+ Ask me anything — I’ll retrieve relevant context and answer!
97
+ """
98
+ )
99
  chatbot = gr.Chatbot(height=400)
100
  with gr.Row():
101
  msg = gr.Textbox(placeholder="Type your message here...", scale=4)
 
104
  msg.submit(chat_fn, [msg, chatbot], [chatbot, chatbot])
105
  clear.click(lambda: None, None, chatbot, queue=False)
106
 
107
+ # ------------------------------------------------------
108
+ # 5️⃣ Launch for Hugging Face Spaces
109
+ # ------------------------------------------------------
110
  if __name__ == "__main__":
111
  demo.launch()