Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,12 +15,14 @@ def link_find(url):
|
|
| 15 |
rawt=soup.text
|
| 16 |
#out.append(rawp)
|
| 17 |
#out.append("HTML fragments: ")
|
|
|
|
|
|
|
| 18 |
q=("a","p","span","content","article")
|
| 19 |
for p in soup.find_all("a"):
|
| 20 |
-
|
| 21 |
else:
|
| 22 |
pass
|
| 23 |
-
return
|
| 24 |
#https://huggingface.co/spaces/Omnibus/crawl
|
| 25 |
|
| 26 |
def sitemap(url,level):
|
|
|
|
| 15 |
rawt=soup.text
|
| 16 |
#out.append(rawp)
|
| 17 |
#out.append("HTML fragments: ")
|
| 18 |
+
node1 = ({"URL":url,"TITLE":soup.title,"STRING":soup.description,"TEXT":rawt,"TREE":[]})
|
| 19 |
+
|
| 20 |
q=("a","p","span","content","article")
|
| 21 |
for p in soup.find_all("a"):
|
| 22 |
+
node1['TREE'].append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
|
| 23 |
else:
|
| 24 |
pass
|
| 25 |
+
return node1
|
| 26 |
#https://huggingface.co/spaces/Omnibus/crawl
|
| 27 |
|
| 28 |
def sitemap(url,level):
|