Spaces:

teapotai
/

teapotllm_discord_bot

Running

App Files Files Community

zakerytclarke commited on 28 days ago

Commit

207d7c6

verified ·

1 Parent(s): 506ca16

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -13

app.py CHANGED Viewed

@@ -70,10 +70,11 @@ def extract_first_url(query):
         return query, first_url
     return query, None
-def extract_text_from_html(url, max_words=250, max_chars=2000):
-    # Fetch the HTML content from the URL
-    response = urllib.request.urlopen(url)
-    html_content = response.read().decode('utf-8')
     # Find all text within <p> tags using regular expression
     p_tag_content = re.findall(r'<p>(.*?)</p>', html_content, re.DOTALL)
@@ -87,15 +88,13 @@ def extract_text_from_html(url, max_words=250, max_chars=2000):
     # Join all paragraphs into one large string
     full_text = ' '.join(decoded_text)
-    # Split the text into words and get the first 250 words
     words = full_text.split()
     first_words = ' '.join(words[:max_words])
-    # Ensure the text does not exceed 1000 characters
-    if len(first_words) > max_chars:
-        first_words = first_words[:max_chars]
-    return first_words
@@ -125,7 +124,7 @@ async def handle_chat(user_input):
     # If there's a URL, fetch the context
     if url:
-        context = extract_text_from_html(url)
         user_input = processed_query
     else:
         # Custom prompt shims
@@ -137,7 +136,9 @@ async def handle_chat(user_input):
             search_start_time = time.time()
             results = await brave_search(user_input)
             search_end_time = time.time()
             documents = [desc.replace('<strong>', '').replace('</strong>', '') for _, desc, _ in results]
             context = "\n".join(documents)
@@ -145,8 +146,7 @@ async def handle_chat(user_input):
     generation_start_time = time.time()
     response = await query_teapot(prompt, context, user_input)
-    if len(results)==0:
-        response = "I'm sorry but I don't have any information on that."
     generation_end_time = time.time()

         return query, first_url
     return query, None
+async def extract_text_from_html(url, max_words=250, max_chars=2000):
+    # Fetch the HTML content asynchronously
+    async with aiohttp.ClientSession() as session:
+        async with session.get(url) as response:
+            html_content = await response.text()
     # Find all text within <p> tags using regular expression
     p_tag_content = re.findall(r'<p>(.*?)</p>', html_content, re.DOTALL)
     # Join all paragraphs into one large string
     full_text = ' '.join(decoded_text)
+    # Split the text into words and get the first `max_words` words
     words = full_text.split()
     first_words = ' '.join(words[:max_words])
+    # Ensure the text does not exceed `max_chars` characters
+    return first_words[:max_chars]
     # If there's a URL, fetch the context
     if url:
+        context = await extract_text_from_html(url)
         user_input = processed_query
     else:
         # Custom prompt shims
             search_start_time = time.time()
             results = await brave_search(user_input)
             search_end_time = time.time()
+            if len(results)==0:
+            return "I'm sorry but I don't have any information on that.", ""
             documents = [desc.replace('<strong>', '').replace('</strong>', '') for _, desc, _ in results]
             context = "\n".join(documents)
     generation_start_time = time.time()
     response = await query_teapot(prompt, context, user_input)
     generation_end_time = time.time()