English
SyvaAI-Bv1 / test.py
danielgrims's picture
Upload folder using huggingface_hub
1026698 verified
import json
import os
DUMP_PATH = "/media/daniel/HDD/output"
for file in os.listdir(DUMP_PATH):
if file.endswith(".json") or file.endswith(".jsonl"):
with open(f"{DUMP_PATH}/{file}", "r", encoding="utf-8") as f:
for i, line in enumerate(f):
data = json.loads(line)
print(i, data.get("title"), "Text length:", len(data.get("text","")))
if i >= 5:
break