liuhua
liuhua
commited on
Commit
·
311da71
1
Parent(s):
1635b00
Fix potential SSRF attack vulnerability (#4334)
Browse files### What problem does this PR solve?
Fix potential SSRF attack vulnerability
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
Co-authored-by: liuhua <[email protected]>
- agent/component/crawler.py +1 -1
- api/utils/web_utils.py +24 -1
agent/component/crawler.py
CHANGED
@@ -41,7 +41,7 @@ class Crawler(ComponentBase, ABC):
|
|
41 |
ans = self.get_input()
|
42 |
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
43 |
if not is_valid_url(ans):
|
44 |
-
return Crawler.be_output("")
|
45 |
try:
|
46 |
result = asyncio.run(self.get_web(ans))
|
47 |
|
|
|
41 |
ans = self.get_input()
|
42 |
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
43 |
if not is_valid_url(ans):
|
44 |
+
return Crawler.be_output("URL not valid")
|
45 |
try:
|
46 |
result = asyncio.run(self.get_web(ans))
|
47 |
|
api/utils/web_utils.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
import re
|
|
|
|
|
|
|
2 |
import json
|
3 |
import base64
|
4 |
|
@@ -76,5 +79,25 @@ def __get_pdf_from_html(
|
|
76 |
return base64.b64decode(result["data"])
|
77 |
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def is_valid_url(url: str) -> bool:
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import re
|
2 |
+
import socket
|
3 |
+
from urllib.parse import urlparse
|
4 |
+
import ipaddress
|
5 |
import json
|
6 |
import base64
|
7 |
|
|
|
79 |
return base64.b64decode(result["data"])
|
80 |
|
81 |
|
82 |
+
def is_private_ip(ip: str) -> bool:
|
83 |
+
try:
|
84 |
+
ip_obj = ipaddress.ip_address(ip)
|
85 |
+
return ip_obj.is_private
|
86 |
+
except ValueError:
|
87 |
+
return False
|
88 |
+
|
89 |
def is_valid_url(url: str) -> bool:
|
90 |
+
if not re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url):
|
91 |
+
return False
|
92 |
+
parsed_url = urlparse(url)
|
93 |
+
hostname = parsed_url.hostname
|
94 |
+
|
95 |
+
if not hostname:
|
96 |
+
return False
|
97 |
+
try:
|
98 |
+
ip = socket.gethostbyname(hostname)
|
99 |
+
if is_private_ip(ip):
|
100 |
+
return False
|
101 |
+
except socket.gaierror:
|
102 |
+
return False
|
103 |
+
return True
|