Spaces:

teapotai
/

teapotllm_discord_bot

Sleeping

zakerytclarke commited on Mar 26

Commit

d97238d

verified ·

1 Parent(s): d6c6437

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -53,29 +53,45 @@ async def brave_search(query, count=1):
                 print(f"Error: {response.status}, {await response.text()}")
                 return []
-@traceable
-@log_time
-def query_teapot(prompt, context, user_input):
-    input_text = prompt + "\n" + context + "\n" + user_input
-    start_time = time.time()
-    inputs = tokenizer(input_text, return_tensors="pt")
-    input_length = inputs["input_ids"].shape[1]
-    output = model.generate(**inputs, max_new_tokens=512)
-    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
-    total_length = output.shape[1]  # Includes both input and output tokens
-    output_length = total_length - input_length  # Extract output token count
-    end_time = time.time()
-    elapsed_time = end_time - start_time
-    tokens_per_second = total_length / elapsed_time if elapsed_time > 0 else float("inf")
     return output_text
 @log_time
 async def handle_chat(user_input):
     search_start_time = time.time()

                 print(f"Error: {response.status}, {await response.text()}")
                 return []
+# @traceable
+# @log_time
+# def query_teapot(prompt, context, user_input):
+#     input_text = prompt + "\n" + context + "\n" + user_input
+#     start_time = time.time()
+#     inputs = tokenizer(input_text, return_tensors="pt")
+#     input_length = inputs["input_ids"].shape[1]
+#     output = model.generate(**inputs, max_new_tokens=512)
+#     output_text = tokenizer.decode(output[0], skip_special_tokens=True)
+#     total_length = output.shape[1]  # Includes both input and output tokens
+#     output_length = total_length - input_length  # Extract output token count
+#     end_time = time.time()
+#     elapsed_time = end_time - start_time
+#     tokens_per_second = total_length / elapsed_time if elapsed_time > 0 else float("inf")
+#     return output_text
+pipeline_lock = asyncio.Lock()
+@traceable
+@log_time
+async def query_teapot(prompt, context, user_input):
+    input_text = prompt + "\n" + context + "\n" + user_input
+    inputs = tokenizer(input_text, return_tensors="pt")
+    async with pipeline_lock:  # Ensure only one call runs at a time
+        output = await asyncio.to_thread(model.generate, **inputs, max_new_tokens=512)
+    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
     return output_text
 @log_time
 async def handle_chat(user_input):
     search_start_time = time.time()