kajibuku3

Sleeping

App Files Files Community

Bofandra commited on Aug 6

Commit

b7e000c

verified ·

1 Parent(s): 9030e25

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -2

app.py CHANGED Viewed

@@ -18,12 +18,27 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 model = model.to(device)
 model.eval()
-def generate_answer(prompt):
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model.generate(**inputs, max_new_tokens=512)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 # Sentence embeddings
 embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
@@ -89,7 +104,17 @@ def ask_question(message, history, selected_titles):
                 f"(Page {page_numbers[i]}): {chunks[i]}" for i in I[0]
             ])
-            prompt = f"""Answer the question using only the context below.\n\nContext:\n{context}\n\nQuestion: {message}"""
             response = generate_answer(prompt)
             combined_answer += f"**{title}**:\n{response.strip()}\n\n"

 model = model.to(device)
 model.eval()
+"""def generate_answer(prompt):
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model.generate(**inputs, max_new_tokens=512)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)"""
+def generate_answer(prompt):
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=512,
+            temperature=0.9,            # Higher = more creative
+            repetition_penalty=1.1,     # Penalize repeating the same phrases
+            do_sample=True,             # Needed for temperature to work
+            top_k=50,                   # Sample from top 50 tokens
+            top_p=0.95                  # Nucleus sampling
+        )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 # Sentence embeddings
 embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
                 f"(Page {page_numbers[i]}): {chunks[i]}" for i in I[0]
             ])
+            #prompt = f"""Answer the question using only the context below.\n\nContext:\n{context}\n\nQuestion: {message}"""
+            prompt = f"""You are a helpful assistant. Provide a thorough and detailed answer to the following question using only the context.
+                Context:
+                {context}
+                Question: {message}
+                Answer in detail:
+                """
             response = generate_answer(prompt)
             combined_answer += f"**{title}**:\n{response.strip()}\n\n"