Spaces:

MyEnny
/

Chat_bot

Sleeping

Update app.py

by seanerons - opened Aug 15

←

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import zipfile
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
@@ -17,12 +18,18 @@ if not os.path.exists("faiss_index") and os.path.exists("faiss_index.zip"):
 # --- Step 2: Load embedding and vectorstore ---
 embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
-vectordb = FAISS.load_local("faiss_index", embedding_model,allow_dangerous_deserialization=True)
 # --- Step 3: Load the LLM ---
 model_id = "tiiuae/falcon-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id)
 pipe = pipeline(
     "text-generation",
@@ -57,15 +64,18 @@ qa_chain = ConversationalRetrievalChain.from_llm(
 )
 UH_LOGO = "images/UH.png"
 # --- Step 5: Define chatbot logic ---
 def chat(message, history):
     result = qa_chain.invoke({"question": message})
     response = result.get("answer", "")
     response = response.split("Answer:")[-1].replace("<|assistant|>", "").strip()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
     return response
 # --- Step 6: UI ---
 sample_questions = [

 import os
 import zipfile
+import torch  # ✅ Import torch so empty_cache works
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # --- Step 2: Load embedding and vectorstore ---
 embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
+vectordb = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)
 # --- Step 3: Load the LLM ---
 model_id = "tiiuae/falcon-7b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+# ✅ Use device_map + float16 to save memory
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+    torch_dtype=torch.float16
+)
 pipe = pipeline(
     "text-generation",
 )
 UH_LOGO = "images/UH.png"
 # --- Step 5: Define chatbot logic ---
 def chat(message, history):
     result = qa_chain.invoke({"question": message})
     response = result.get("answer", "")
     response = response.split("Answer:")[-1].replace("<|assistant|>", "").strip()
+    # ✅ Actually clear unused GPU memory
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
     return response
 # --- Step 6: UI ---
 sample_questions = [