Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,13 +16,16 @@ def link_find(url):
|
|
| 16 |
rawt=soup.text
|
| 17 |
#out.append(rawp)
|
| 18 |
#out.append("HTML fragments: ")
|
| 19 |
-
node1 = {"URL":url,"TITLE":soup.title,"STRING":soup.description,"TEXT":rawt,"TREE":[]}
|
| 20 |
-
node2 = {"URL":url,"TREE":[]}
|
| 21 |
|
| 22 |
q=("a","p","span","content","article")
|
| 23 |
for p in soup.find_all("a"):
|
| 24 |
-
node1['
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
| 26 |
#out.append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
|
| 27 |
|
| 28 |
else:
|
|
|
|
| 16 |
rawt=soup.text
|
| 17 |
#out.append(rawp)
|
| 18 |
#out.append("HTML fragments: ")
|
| 19 |
+
node1 = {"URL":url,"TITLE":soup.title,"STRING":soup.description,"TEXT":rawt,"LINKS":[],"TREE":[]}
|
| 20 |
+
node2 = {"URL":url,"LINKS":[],"TREE":[]}
|
| 21 |
|
| 22 |
q=("a","p","span","content","article")
|
| 23 |
for p in soup.find_all("a"):
|
| 24 |
+
node1['LINKS'].append(p.get('href'))
|
| 25 |
+
node1['TREE'].append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","LINKS":[],"TREE":[]})
|
| 26 |
+
node2['TREE'].append({"URL":p.get('href'),"LINKS":[],"TREE":[]})
|
| 27 |
+
node2['LINKS'].append(p.get('href'))
|
| 28 |
+
|
| 29 |
#out.append({"URL":p.get('href'),"TITLE":p.get('title'),"STRING":p.string,"TEXT":"","TREE":[]})
|
| 30 |
|
| 31 |
else:
|