Update RepoPipeline.py
Browse files- RepoPipeline.py +3 -3
RepoPipeline.py
CHANGED
@@ -113,7 +113,7 @@ def extract_information(repos, headers=None):
|
|
113 |
with tarfile.open(fileobj=response.raw, mode="r|gz") as tar:
|
114 |
for member in tar:
|
115 |
# 2. Extracting codes and docs.
|
116 |
-
if
|
117 |
try:
|
118 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
119 |
# extract_code_and_docs
|
@@ -127,7 +127,7 @@ def extract_information(repos, headers=None):
|
|
127 |
except SyntaxError as e:
|
128 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
129 |
# 3. Extracting readme.
|
130 |
-
elif (
|
131 |
try:
|
132 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
133 |
# extract readme
|
@@ -141,7 +141,7 @@ def extract_information(repos, headers=None):
|
|
141 |
except SyntaxError as e:
|
142 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
143 |
# 4. Extracting requirements.
|
144 |
-
elif
|
145 |
try:
|
146 |
lines = tar.extractfile(member).readlines().decode("utf-8")
|
147 |
# extract readme
|
|
|
113 |
with tarfile.open(fileobj=response.raw, mode="r|gz") as tar:
|
114 |
for member in tar:
|
115 |
# 2. Extracting codes and docs.
|
116 |
+
if member.name.endswith(".py") and member.isfile():
|
117 |
try:
|
118 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
119 |
# extract_code_and_docs
|
|
|
127 |
except SyntaxError as e:
|
128 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
129 |
# 3. Extracting readme.
|
130 |
+
elif (member.name == "README.md" or member.name == "README.rst") and member.isfile():
|
131 |
try:
|
132 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
133 |
# extract readme
|
|
|
141 |
except SyntaxError as e:
|
142 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
143 |
# 4. Extracting requirements.
|
144 |
+
elif member.name == "requirements.txt" and member.isfile():
|
145 |
try:
|
146 |
lines = tar.extractfile(member).readlines().decode("utf-8")
|
147 |
# extract readme
|