| from datasets import load_dataset | |
| import json | |
| # Load the first few examples of the dataset | |
| dataset = load_dataset("asierhv/composite_corpus_eu_v2.1", split="train", streaming=True) | |
| # Get the first example | |
| examples = [] | |
| for i, example in enumerate(dataset): | |
| if i >= 3: # Get first 3 examples | |
| break | |
| examples.append(example) | |
| # Print the structure and content | |
| for i, example in enumerate(examples): | |
| print(f"\nExample {i+1}:") | |
| for key, value in example.items(): | |
| if key == "audio": | |
| print(f"audio keys: {value.keys()}") | |
| for audio_key, audio_value in value.items(): | |
| if isinstance(audio_value, bytes) or isinstance(audio_value, memoryview): | |
| print(f" {audio_key}: <binary data>") | |
| else: | |
| print(f" {audio_key}: {audio_value}") | |
| else: | |
| print(f"{key}: {value}") |