Spaces:

teapotai
/

teapotllm_discord_bot

Sleeping

App Files Files Community

zakerytclarke commited on Mar 26

Commit

bddccb9

verified ·

1 Parent(s): d97238d

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -25

app.py CHANGED Viewed

@@ -53,43 +53,43 @@ async def brave_search(query, count=1):
                 print(f"Error: {response.status}, {await response.text()}")
                 return []
-# @traceable
-# @log_time
-# def query_teapot(prompt, context, user_input):
-#     input_text = prompt + "\n" + context + "\n" + user_input
-#     start_time = time.time()
-#     inputs = tokenizer(input_text, return_tensors="pt")
-#     input_length = inputs["input_ids"].shape[1]
-#     output = model.generate(**inputs, max_new_tokens=512)
-#     output_text = tokenizer.decode(output[0], skip_special_tokens=True)
-#     total_length = output.shape[1]  # Includes both input and output tokens
-#     output_length = total_length - input_length  # Extract output token count
-#     end_time = time.time()
-#     elapsed_time = end_time - start_time
-#     tokens_per_second = total_length / elapsed_time if elapsed_time > 0 else float("inf")
-#     return output_text
-pipeline_lock = asyncio.Lock()
-@traceable
-@log_time
-async def query_teapot(prompt, context, user_input):
-    input_text = prompt + "\n" + context + "\n" + user_input
-    inputs = tokenizer(input_text, return_tensors="pt")
-    async with pipeline_lock:  # Ensure only one call runs at a time
-        output = await asyncio.to_thread(model.generate, **inputs, max_new_tokens=512)
-    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
-    return output_text
 @log_time
@@ -104,6 +104,10 @@ async def handle_chat(user_input):
     prompt = """You are Teapot, an open-source AI assistant optimized for low-end devices, providing short, accurate responses without hallucinating while excelling at information extraction and text summarization."""
     generation_start_time = time.time()
     response = await query_teapot(prompt, context, user_input)
     generation_end_time = time.time()
     debug_info = f"""
@@ -113,6 +117,9 @@ Prompt:
 Context:
 {context}
 Search time: {search_end_time - search_start_time:.2f} seconds
 Generation time: {generation_end_time - generation_start_time:.2f} seconds
 Response: {response}

                 print(f"Error: {response.status}, {await response.text()}")
                 return []
+@traceable
+@log_time
+def query_teapot(prompt, context, user_input):
+    input_text = prompt + "\n" + context + "\n" + user_input
+    start_time = time.time()
+    inputs = tokenizer(input_text, return_tensors="pt")
+    input_length = inputs["input_ids"].shape[1]
+    output = model.generate(**inputs, max_new_tokens=512)
+    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    total_length = output.shape[1]  # Includes both input and output tokens
+    output_length = total_length - input_length  # Extract output token count
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    tokens_per_second = total_length / elapsed_time if elapsed_time > 0 else float("inf")
+    return output_text
+# pipeline_lock = asyncio.Lock()
+# @traceable
+# @log_time
+# async def query_teapot(prompt, context, user_input):
+#     input_text = prompt + "\n" + context + "\n" + user_input
+#     inputs = tokenizer(input_text, return_tensors="pt")
+#     async with pipeline_lock:  # Ensure only one call runs at a time
+#         output = await asyncio.to_thread(model.generate, **inputs, max_new_tokens=512)
+#     output_text = tokenizer.decode(output[0], skip_special_tokens=True)
+#     return output_text
 @log_time
     prompt = """You are Teapot, an open-source AI assistant optimized for low-end devices, providing short, accurate responses without hallucinating while excelling at information extraction and text summarization."""
     generation_start_time = time.time()
     response = await query_teapot(prompt, context, user_input)
+    if len(results)==0:
+        response = "I'm sorry but I don't have any information on that."
     generation_end_time = time.time()
     debug_info = f"""
 Context:
 {context}
+Query:
+{user_input}
 Search time: {search_end_time - search_start_time:.2f} seconds
 Generation time: {generation_end_time - generation_start_time:.2f} seconds
 Response: {response}