Spaces:

hackergeek
/

HARRISON_GPT

Running

App Files Files Community

hackergeek commited on Nov 3

Commit

98b95ab

verified ·

1 Parent(s): d25669e

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -26

app.py CHANGED Viewed

@@ -1,4 +1,14 @@
 import os
 import torch
 import gradio as gr
 import faiss
@@ -6,32 +16,34 @@ import numpy as np
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from sentence_transformers import SentenceTransformer
-# -------------------------------
-# 1. Model setup
-# -------------------------------
-GEN_MODEL = "hackergeek/qwen3-harrison-rag"     # main generation model
-EMB_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # embedding model
 tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL)
 model = AutoModelForCausalLM.from_pretrained(
     GEN_MODEL,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto"
 )
-embedder = SentenceTransformer(EMB_MODEL)
-# -------------------------------
-# 2. Load and index docs
-# -------------------------------
 DOCS_DIR = "docs"
 os.makedirs(DOCS_DIR, exist_ok=True)
-# if no docs exist, create a small demo
 if not os.listdir(DOCS_DIR):
     with open(os.path.join(DOCS_DIR, "example.txt"), "w") as f:
         f.write(
-            "HackerGeek Qwen3 Harrison RAG is a retrieval-augmented language model combining Qwen3 with document search."
         )
 docs = []
@@ -39,19 +51,19 @@ for fn in os.listdir(DOCS_DIR):
     with open(os.path.join(DOCS_DIR, fn), encoding="utf-8") as f:
         docs.append(f.read())
-# naive chunking
 chunks = []
 for doc in docs:
     for i in range(0, len(doc), 500):
         chunks.append(doc[i:i+500])
-embs = embedder.encode(chunks, convert_to_numpy=True)
 index = faiss.IndexFlatL2(embs.shape[1])
 index.add(embs)
-# -------------------------------
-# 3. Chat logic
-# -------------------------------
 def retrieve_context(query, k=5):
     q_emb = embedder.encode([query], convert_to_numpy=True)
     D, I = index.search(q_emb, k)
@@ -60,7 +72,7 @@ def retrieve_context(query, k=5):
 def generate_response(query, history):
     context = retrieve_context(query)
     system_prompt = (
-        "You are a helpful assistant that answers based on the retrieved context.\n\n"
         f"Context:\n{context}\n\n"
         f"User: {query}\nAssistant:"
     )
@@ -74,11 +86,16 @@ def chat_fn(user_message, history):
     history = history + [(user_message, response)]
     return history, history
-# -------------------------------
-# 4. Gradio UI
-# -------------------------------
-with gr.Blocks(title="Qwen3 Harrison RAG Chatbot") as demo:
-    gr.Markdown("# 🤖 Qwen3 Harrison RAG Chatbot\nAsk me anything based on retrieved context!")
     chatbot = gr.Chatbot(height=400)
     with gr.Row():
         msg = gr.Textbox(placeholder="Type your message here...", scale=4)
@@ -87,8 +104,8 @@ with gr.Blocks(title="Qwen3 Harrison RAG Chatbot") as demo:
     msg.submit(chat_fn, [msg, chatbot], [chatbot, chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
-# -------------------------------
-# 5. Launch for HF Spaces
-# -------------------------------
 if __name__ == "__main__":
     demo.launch()

 import os
+# ======================================================
+# OPTION A: Use ephemeral /tmp cache to avoid 50 GB quota
+# ======================================================
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
+os.environ["HF_HOME"] = "/tmp/hf_home"
+os.environ["HF_DATASETS_CACHE"] = "/tmp/hf_datasets"
+os.environ["HF_MODULES_CACHE"] = "/tmp/hf_modules"
+# ======================================================
 import torch
 import gradio as gr
 import faiss
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from sentence_transformers import SentenceTransformer
+# ------------------------------------------------------
+# 1️⃣ Model setup
+# ------------------------------------------------------
+GEN_MODEL = "hackergeek/qwen3-harrison-rag"      # main generation model
+EMB_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # embedding model (lightweight)
 tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL)
 model = AutoModelForCausalLM.from_pretrained(
     GEN_MODEL,
+    cache_dir="/tmp/hf_cache",
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    device_map="auto",
+    low_cpu_mem_usage=True
 )
+embedder = SentenceTransformer(EMB_MODEL, cache_folder="/tmp/hf_cache")
+# ------------------------------------------------------
+# 2️⃣ Load and index documents
+# ------------------------------------------------------
 DOCS_DIR = "docs"
 os.makedirs(DOCS_DIR, exist_ok=True)
+# create a small demo doc if none exists
 if not os.listdir(DOCS_DIR):
     with open(os.path.join(DOCS_DIR, "example.txt"), "w") as f:
         f.write(
+            "Qwen3-Harrison-RAG combines the Qwen3 language model with retrieval-augmented generation for context-aware responses."
         )
 docs = []
     with open(os.path.join(DOCS_DIR, fn), encoding="utf-8") as f:
         docs.append(f.read())
+# simple fixed-size chunking
 chunks = []
 for doc in docs:
     for i in range(0, len(doc), 500):
         chunks.append(doc[i:i+500])
+embs = embedder.encode(chunks, convert_to_numpy=True, show_progress_bar=False)
 index = faiss.IndexFlatL2(embs.shape[1])
 index.add(embs)
+# ------------------------------------------------------
+# 3️⃣ Retrieval + generation logic
+# ------------------------------------------------------
 def retrieve_context(query, k=5):
     q_emb = embedder.encode([query], convert_to_numpy=True)
     D, I = index.search(q_emb, k)
 def generate_response(query, history):
     context = retrieve_context(query)
     system_prompt = (
+        "You are a helpful assistant that uses the retrieved context to answer questions.\n\n"
         f"Context:\n{context}\n\n"
         f"User: {query}\nAssistant:"
     )
     history = history + [(user_message, response)]
     return history, history
+# ------------------------------------------------------
+# 4️⃣ Gradio UI
+# ------------------------------------------------------
+with gr.Blocks(title="Qwen3-Harrison-RAG Chatbot") as demo:
+    gr.Markdown(
+        """
+        # 🤖 Qwen3-Harrison-RAG Chatbot
+        Ask me anything — I’ll retrieve relevant context and answer!
+        """
+    )
     chatbot = gr.Chatbot(height=400)
     with gr.Row():
         msg = gr.Textbox(placeholder="Type your message here...", scale=4)
     msg.submit(chat_fn, [msg, chatbot], [chatbot, chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
+# ------------------------------------------------------
+# 5️⃣ Launch for Hugging Face Spaces
+# ------------------------------------------------------
 if __name__ == "__main__":
     demo.launch()