zakerytclarke commited on
Commit
207d7c6
·
verified ·
1 Parent(s): 506ca16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -70,10 +70,11 @@ def extract_first_url(query):
70
  return query, first_url
71
  return query, None
72
 
73
- def extract_text_from_html(url, max_words=250, max_chars=2000):
74
- # Fetch the HTML content from the URL
75
- response = urllib.request.urlopen(url)
76
- html_content = response.read().decode('utf-8')
 
77
 
78
  # Find all text within <p> tags using regular expression
79
  p_tag_content = re.findall(r'<p>(.*?)</p>', html_content, re.DOTALL)
@@ -87,15 +88,13 @@ def extract_text_from_html(url, max_words=250, max_chars=2000):
87
  # Join all paragraphs into one large string
88
  full_text = ' '.join(decoded_text)
89
 
90
- # Split the text into words and get the first 250 words
91
  words = full_text.split()
92
  first_words = ' '.join(words[:max_words])
93
 
94
- # Ensure the text does not exceed 1000 characters
95
- if len(first_words) > max_chars:
96
- first_words = first_words[:max_chars]
97
 
98
- return first_words
99
 
100
 
101
 
@@ -125,7 +124,7 @@ async def handle_chat(user_input):
125
 
126
  # If there's a URL, fetch the context
127
  if url:
128
- context = extract_text_from_html(url)
129
  user_input = processed_query
130
  else:
131
  # Custom prompt shims
@@ -137,7 +136,9 @@ async def handle_chat(user_input):
137
  search_start_time = time.time()
138
  results = await brave_search(user_input)
139
  search_end_time = time.time()
140
-
 
 
141
  documents = [desc.replace('<strong>', '').replace('</strong>', '') for _, desc, _ in results]
142
 
143
  context = "\n".join(documents)
@@ -145,8 +146,7 @@ async def handle_chat(user_input):
145
  generation_start_time = time.time()
146
  response = await query_teapot(prompt, context, user_input)
147
 
148
- if len(results)==0:
149
- response = "I'm sorry but I don't have any information on that."
150
 
151
  generation_end_time = time.time()
152
 
 
70
  return query, first_url
71
  return query, None
72
 
73
+ async def extract_text_from_html(url, max_words=250, max_chars=2000):
74
+ # Fetch the HTML content asynchronously
75
+ async with aiohttp.ClientSession() as session:
76
+ async with session.get(url) as response:
77
+ html_content = await response.text()
78
 
79
  # Find all text within <p> tags using regular expression
80
  p_tag_content = re.findall(r'<p>(.*?)</p>', html_content, re.DOTALL)
 
88
  # Join all paragraphs into one large string
89
  full_text = ' '.join(decoded_text)
90
 
91
+ # Split the text into words and get the first `max_words` words
92
  words = full_text.split()
93
  first_words = ' '.join(words[:max_words])
94
 
95
+ # Ensure the text does not exceed `max_chars` characters
96
+ return first_words[:max_chars]
 
97
 
 
98
 
99
 
100
 
 
124
 
125
  # If there's a URL, fetch the context
126
  if url:
127
+ context = await extract_text_from_html(url)
128
  user_input = processed_query
129
  else:
130
  # Custom prompt shims
 
136
  search_start_time = time.time()
137
  results = await brave_search(user_input)
138
  search_end_time = time.time()
139
+
140
+ if len(results)==0:
141
+ return "I'm sorry but I don't have any information on that.", ""
142
  documents = [desc.replace('<strong>', '').replace('</strong>', '') for _, desc, _ in results]
143
 
144
  context = "\n".join(documents)
 
146
  generation_start_time = time.time()
147
  response = await query_teapot(prompt, context, user_input)
148
 
149
+
 
150
 
151
  generation_end_time = time.time()
152