cyberosa commited on
Commit
e4f0a1d
·
1 Parent(s): d109a2d
Files changed (1) hide show
  1. visit_webpage.py +46 -0
visit_webpage.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+ from smolagents.tools import Tool
3
+ import re
4
+
5
+
6
+ class VisitWebpageTool(Tool):
7
+ name = "visit_webpage"
8
+ description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
9
+ inputs = {
10
+ "url": {"type": "string", "description": "The url of the webpage to visit."}
11
+ }
12
+ output_type = "string"
13
+
14
+ def forward(self, url: str) -> str:
15
+ try:
16
+ import requests
17
+ from markdownify import markdownify
18
+ from requests.exceptions import RequestException
19
+
20
+ from smolagents.utils import truncate_content
21
+ except ImportError as e:
22
+ raise ImportError(
23
+ "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
24
+ ) from e
25
+ try:
26
+ # Send a GET request to the URL with a 20-second timeout
27
+ response = requests.get(url, timeout=20)
28
+ response.raise_for_status() # Raise an exception for bad status codes
29
+
30
+ # Convert the HTML content to Markdown
31
+ markdown_content = markdownify(response.text).strip()
32
+
33
+ # Remove multiple line breaks
34
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
35
+
36
+ return truncate_content(markdown_content, 10000)
37
+
38
+ except requests.exceptions.Timeout:
39
+ return "The request timed out. Please try again later or check the URL."
40
+ except RequestException as e:
41
+ return f"Error fetching the webpage: {str(e)}"
42
+ except Exception as e:
43
+ return f"An unexpected error occurred: {str(e)}"
44
+
45
+ def __init__(self, *args, **kwargs):
46
+ self.is_initialized = False