| import json | |
| import os | |
| DUMP_PATH = "/media/daniel/HDD/output" | |
| for file in os.listdir(DUMP_PATH): | |
| if file.endswith(".json") or file.endswith(".jsonl"): | |
| with open(f"{DUMP_PATH}/{file}", "r", encoding="utf-8") as f: | |
| for i, line in enumerate(f): | |
| data = json.loads(line) | |
| print(i, data.get("title"), "Text length:", len(data.get("text",""))) | |
| if i >= 5: | |
| break | |