Spaces:

hackergeek
/

HARRISON_GPT

Sleeping

App Files Files Community

HARRISON_GPT / app.py

hackergeek

Update app.py

98b95ab verified about 2 months ago

raw

history blame

4.06 kB

	import os

	# ======================================================
	# OPTION A: Use ephemeral /tmp cache to avoid 50 GB quota
	# ======================================================
	os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
	os.environ["HF_HOME"] = "/tmp/hf_home"
	os.environ["HF_DATASETS_CACHE"] = "/tmp/hf_datasets"
	os.environ["HF_MODULES_CACHE"] = "/tmp/hf_modules"
	# ======================================================

	import torch
	import gradio as gr
	import faiss
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from sentence_transformers import SentenceTransformer

	# ------------------------------------------------------
	# 1️⃣ Model setup
	# ------------------------------------------------------
	GEN_MODEL = "hackergeek/qwen3-harrison-rag" # main generation model
	EMB_MODEL = "sentence-transformers/all-MiniLM-L6-v2" # embedding model (lightweight)

	tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL)
	model = AutoModelForCausalLM.from_pretrained(
	GEN_MODEL,
	cache_dir="/tmp/hf_cache",
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto",
	low_cpu_mem_usage=True
	)

	embedder = SentenceTransformer(EMB_MODEL, cache_folder="/tmp/hf_cache")

	# ------------------------------------------------------
	# 2️⃣ Load and index documents
	# ------------------------------------------------------
	DOCS_DIR = "docs"
	os.makedirs(DOCS_DIR, exist_ok=True)

	# create a small demo doc if none exists
	if not os.listdir(DOCS_DIR):
	with open(os.path.join(DOCS_DIR, "example.txt"), "w") as f:
	f.write(
	"Qwen3-Harrison-RAG combines the Qwen3 language model with retrieval-augmented generation for context-aware responses."
	)

	docs = []
	for fn in os.listdir(DOCS_DIR):
	with open(os.path.join(DOCS_DIR, fn), encoding="utf-8") as f:
	docs.append(f.read())

	# simple fixed-size chunking
	chunks = []
	for doc in docs:
	for i in range(0, len(doc), 500):
	chunks.append(doc[i:i+500])

	embs = embedder.encode(chunks, convert_to_numpy=True, show_progress_bar=False)
	index = faiss.IndexFlatL2(embs.shape[1])
	index.add(embs)

	# ------------------------------------------------------
	# 3️⃣ Retrieval + generation logic
	# ------------------------------------------------------
	def retrieve_context(query, k=5):
	q_emb = embedder.encode([query], convert_to_numpy=True)
	D, I = index.search(q_emb, k)
	return "\n\n".join([chunks[i] for i in I[0]])

	def generate_response(query, history):
	context = retrieve_context(query)
	system_prompt = (
	"You are a helpful assistant that uses the retrieved context to answer questions.\n\n"
	f"Context:\n{context}\n\n"
	f"User: {query}\nAssistant:"
	)
	inputs = tokenizer(system_prompt, return_tensors="pt").to(model.device)
	output_ids = model.generate(**inputs, max_new_tokens=300)
	output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
	return output.split("Assistant:")[-1].strip()

	def chat_fn(user_message, history):
	response = generate_response(user_message, history)
	history = history + [(user_message, response)]
	return history, history

	# ------------------------------------------------------
	# 4️⃣ Gradio UI
	# ------------------------------------------------------
	with gr.Blocks(title="Qwen3-Harrison-RAG Chatbot") as demo:
	gr.Markdown(
	"""
	# 🤖 Qwen3-Harrison-RAG Chatbot
	Ask me anything — I’ll retrieve relevant context and answer!
	"""
	)
	chatbot = gr.Chatbot(height=400)
	with gr.Row():
	msg = gr.Textbox(placeholder="Type your message here...", scale=4)
	clear = gr.Button("Clear", scale=1)

	msg.submit(chat_fn, [msg, chatbot], [chatbot, chatbot])
	clear.click(lambda: None, None, chatbot, queue=False)

	# ------------------------------------------------------
	# 5️⃣ Launch for Hugging Face Spaces
	# ------------------------------------------------------
	if __name__ == "__main__":
	demo.launch()