Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -70,10 +70,11 @@ def extract_first_url(query):
|
|
70 |
return query, first_url
|
71 |
return query, None
|
72 |
|
73 |
-
def extract_text_from_html(url, max_words=250, max_chars=2000):
|
74 |
-
# Fetch the HTML content
|
75 |
-
|
76 |
-
|
|
|
77 |
|
78 |
# Find all text within <p> tags using regular expression
|
79 |
p_tag_content = re.findall(r'<p>(.*?)</p>', html_content, re.DOTALL)
|
@@ -87,15 +88,13 @@ def extract_text_from_html(url, max_words=250, max_chars=2000):
|
|
87 |
# Join all paragraphs into one large string
|
88 |
full_text = ' '.join(decoded_text)
|
89 |
|
90 |
-
# Split the text into words and get the first
|
91 |
words = full_text.split()
|
92 |
first_words = ' '.join(words[:max_words])
|
93 |
|
94 |
-
# Ensure the text does not exceed
|
95 |
-
|
96 |
-
first_words = first_words[:max_chars]
|
97 |
|
98 |
-
return first_words
|
99 |
|
100 |
|
101 |
|
@@ -125,7 +124,7 @@ async def handle_chat(user_input):
|
|
125 |
|
126 |
# If there's a URL, fetch the context
|
127 |
if url:
|
128 |
-
context = extract_text_from_html(url)
|
129 |
user_input = processed_query
|
130 |
else:
|
131 |
# Custom prompt shims
|
@@ -137,7 +136,9 @@ async def handle_chat(user_input):
|
|
137 |
search_start_time = time.time()
|
138 |
results = await brave_search(user_input)
|
139 |
search_end_time = time.time()
|
140 |
-
|
|
|
|
|
141 |
documents = [desc.replace('<strong>', '').replace('</strong>', '') for _, desc, _ in results]
|
142 |
|
143 |
context = "\n".join(documents)
|
@@ -145,8 +146,7 @@ async def handle_chat(user_input):
|
|
145 |
generation_start_time = time.time()
|
146 |
response = await query_teapot(prompt, context, user_input)
|
147 |
|
148 |
-
|
149 |
-
response = "I'm sorry but I don't have any information on that."
|
150 |
|
151 |
generation_end_time = time.time()
|
152 |
|
|
|
70 |
return query, first_url
|
71 |
return query, None
|
72 |
|
73 |
+
async def extract_text_from_html(url, max_words=250, max_chars=2000):
|
74 |
+
# Fetch the HTML content asynchronously
|
75 |
+
async with aiohttp.ClientSession() as session:
|
76 |
+
async with session.get(url) as response:
|
77 |
+
html_content = await response.text()
|
78 |
|
79 |
# Find all text within <p> tags using regular expression
|
80 |
p_tag_content = re.findall(r'<p>(.*?)</p>', html_content, re.DOTALL)
|
|
|
88 |
# Join all paragraphs into one large string
|
89 |
full_text = ' '.join(decoded_text)
|
90 |
|
91 |
+
# Split the text into words and get the first `max_words` words
|
92 |
words = full_text.split()
|
93 |
first_words = ' '.join(words[:max_words])
|
94 |
|
95 |
+
# Ensure the text does not exceed `max_chars` characters
|
96 |
+
return first_words[:max_chars]
|
|
|
97 |
|
|
|
98 |
|
99 |
|
100 |
|
|
|
124 |
|
125 |
# If there's a URL, fetch the context
|
126 |
if url:
|
127 |
+
context = await extract_text_from_html(url)
|
128 |
user_input = processed_query
|
129 |
else:
|
130 |
# Custom prompt shims
|
|
|
136 |
search_start_time = time.time()
|
137 |
results = await brave_search(user_input)
|
138 |
search_end_time = time.time()
|
139 |
+
|
140 |
+
if len(results)==0:
|
141 |
+
return "I'm sorry but I don't have any information on that.", ""
|
142 |
documents = [desc.replace('<strong>', '').replace('</strong>', '') for _, desc, _ in results]
|
143 |
|
144 |
context = "\n".join(documents)
|
|
|
146 |
generation_start_time = time.time()
|
147 |
response = await query_teapot(prompt, context, user_input)
|
148 |
|
149 |
+
|
|
|
150 |
|
151 |
generation_end_time = time.time()
|
152 |
|