import json, csv, os root = "/tmp/dataset/voxpopuli_es_500" with open(os.path.join(root, "metadata.json"), "r", encoding="utf-8") as f: data = json.load(f) with open(os.path.join(root, "metadata.csv"), "w", encoding="utf-8") as f: writer = csv.writer(f, delimiter="|", quoting=csv.QUOTE_NONE, escapechar="\\") for entry in data: # Quitar prefijo "voxpopuli_es_500/" si está presente path = entry["audio_filepath"] if path.startswith("voxpopuli_es_500/"): path = path.replace("voxpopuli_es_500/", "", 1) text = entry["text"].replace("\n", " ").strip() speaker = entry["speaker"] writer.writerow([path, text, speaker])