zakerytclarke commited on
Commit
bddccb9
·
verified ·
1 Parent(s): d97238d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -25
app.py CHANGED
@@ -53,43 +53,43 @@ async def brave_search(query, count=1):
53
  print(f"Error: {response.status}, {await response.text()}")
54
  return []
55
 
56
- # @traceable
57
- # @log_time
58
- # def query_teapot(prompt, context, user_input):
59
- # input_text = prompt + "\n" + context + "\n" + user_input
60
 
61
- # start_time = time.time()
62
 
63
- # inputs = tokenizer(input_text, return_tensors="pt")
64
- # input_length = inputs["input_ids"].shape[1]
65
 
66
- # output = model.generate(**inputs, max_new_tokens=512)
67
 
68
- # output_text = tokenizer.decode(output[0], skip_special_tokens=True)
69
- # total_length = output.shape[1] # Includes both input and output tokens
70
- # output_length = total_length - input_length # Extract output token count
71
 
72
- # end_time = time.time()
73
 
74
- # elapsed_time = end_time - start_time
75
- # tokens_per_second = total_length / elapsed_time if elapsed_time > 0 else float("inf")
76
 
77
- # return output_text
78
 
79
 
80
- pipeline_lock = asyncio.Lock()
81
 
82
- @traceable
83
- @log_time
84
- async def query_teapot(prompt, context, user_input):
85
- input_text = prompt + "\n" + context + "\n" + user_input
86
- inputs = tokenizer(input_text, return_tensors="pt")
87
 
88
- async with pipeline_lock: # Ensure only one call runs at a time
89
- output = await asyncio.to_thread(model.generate, **inputs, max_new_tokens=512)
90
 
91
- output_text = tokenizer.decode(output[0], skip_special_tokens=True)
92
- return output_text
93
 
94
 
95
  @log_time
@@ -104,6 +104,10 @@ async def handle_chat(user_input):
104
  prompt = """You are Teapot, an open-source AI assistant optimized for low-end devices, providing short, accurate responses without hallucinating while excelling at information extraction and text summarization."""
105
  generation_start_time = time.time()
106
  response = await query_teapot(prompt, context, user_input)
 
 
 
 
107
  generation_end_time = time.time()
108
 
109
  debug_info = f"""
@@ -113,6 +117,9 @@ Prompt:
113
  Context:
114
  {context}
115
 
 
 
 
116
  Search time: {search_end_time - search_start_time:.2f} seconds
117
  Generation time: {generation_end_time - generation_start_time:.2f} seconds
118
  Response: {response}
 
53
  print(f"Error: {response.status}, {await response.text()}")
54
  return []
55
 
56
+ @traceable
57
+ @log_time
58
+ def query_teapot(prompt, context, user_input):
59
+ input_text = prompt + "\n" + context + "\n" + user_input
60
 
61
+ start_time = time.time()
62
 
63
+ inputs = tokenizer(input_text, return_tensors="pt")
64
+ input_length = inputs["input_ids"].shape[1]
65
 
66
+ output = model.generate(**inputs, max_new_tokens=512)
67
 
68
+ output_text = tokenizer.decode(output[0], skip_special_tokens=True)
69
+ total_length = output.shape[1] # Includes both input and output tokens
70
+ output_length = total_length - input_length # Extract output token count
71
 
72
+ end_time = time.time()
73
 
74
+ elapsed_time = end_time - start_time
75
+ tokens_per_second = total_length / elapsed_time if elapsed_time > 0 else float("inf")
76
 
77
+ return output_text
78
 
79
 
80
+ # pipeline_lock = asyncio.Lock()
81
 
82
+ # @traceable
83
+ # @log_time
84
+ # async def query_teapot(prompt, context, user_input):
85
+ # input_text = prompt + "\n" + context + "\n" + user_input
86
+ # inputs = tokenizer(input_text, return_tensors="pt")
87
 
88
+ # async with pipeline_lock: # Ensure only one call runs at a time
89
+ # output = await asyncio.to_thread(model.generate, **inputs, max_new_tokens=512)
90
 
91
+ # output_text = tokenizer.decode(output[0], skip_special_tokens=True)
92
+ # return output_text
93
 
94
 
95
  @log_time
 
104
  prompt = """You are Teapot, an open-source AI assistant optimized for low-end devices, providing short, accurate responses without hallucinating while excelling at information extraction and text summarization."""
105
  generation_start_time = time.time()
106
  response = await query_teapot(prompt, context, user_input)
107
+
108
+ if len(results)==0:
109
+ response = "I'm sorry but I don't have any information on that."
110
+
111
  generation_end_time = time.time()
112
 
113
  debug_info = f"""
 
117
  Context:
118
  {context}
119
 
120
+ Query:
121
+ {user_input}
122
+
123
  Search time: {search_end_time - search_start_time:.2f} seconds
124
  Generation time: {generation_end_time - generation_start_time:.2f} seconds
125
  Response: {response}