AutoReadmeAgent / app.py
bogeumkim's picture
Update app.py
7960667 verified
raw
history blame
1.76 kB
import re
import requests
from markdownify import markdownify
from requests.exceptions import RequestException
import gradio as gr
from utils.preprocessor import Preprocessor
def visit_webpage(url, max_output_length=40000):
"""
Fetch the webpage, convert to markdown, and use Preprocessor methods.
"""
try:
response = requests.get(url, timeout=20)
response.raise_for_status()
markdown_content = markdownify(response.text).strip()
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
if len(markdown_content) > max_output_length:
markdown_content = (
markdown_content[: max_output_length // 2]
+ f"\n..._This content has been truncated to stay below {max_output_length} characters_...\n"
+ markdown_content[-max_output_length // 2 :]
)
# Use Preprocessor class methods
section = Preprocessor.extract_section(markdown_content)
dir_paths, files = Preprocessor.extract_dirs_from_text(section)
# Format the result
result = (
f"paths: {dir_paths}\n\n"
f"files: {files}"
)
return result
except requests.exceptions.Timeout:
return "The request timed out. Please try again later or check the URL."
except RequestException as e:
return f"Error fetching the webpage: {str(e)}"
except Exception as e:
return f"An unexpected error occurred: {str(e)}"
demo = gr.Interface(
fn=visit_webpage,
inputs=gr.Textbox(label="Website URL"),
outputs=gr.Textbox(label="Extracted Section, Directory Paths, and File Paths"),
title="Webpage Section and Path Extractor"
)
if __name__ == "__main__":
demo.launch()