Spaces:
Running
Running
import requests | |
from bs4 import BeautifulSoup | |
def get_url_content(url: str) -> str: | |
""" | |
Retrieve the content of a URL. | |
:param url: The URL to retrieve content from. | |
:return: The content of the URL as a string. | |
""" | |
try: | |
response = requests.get( | |
url, | |
headers={ | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" | |
} | |
) | |
if response.status_code != 200: | |
print(f"Failed to retrieve content from {url}. Status code: {response.status_code} - {response.reason}") | |
return "" | |
# parse the html content using BeautifulSoup | |
parser = BeautifulSoup(response.text, 'html.parser') | |
# extract text from the parsed HTML | |
return parser.text.strip() if parser.text else "" | |
except Exception as e: | |
print(f"An error occurred while retrieving content from {url}: {e}") | |
return "" | |