from random import sample import soundfile as sf from datasets import load_dataset # dataset = load_dataset("keithito/lj_speech", split="train") dataset = load_dataset("parler-tts/mls_eng", split="train") Is = sample(list(range(len(dataset))), k=100000) print(dataset) for i, I in enumerate(Is): audio = dataset[I]["audio"] wav, sr = audio["array"], audio["sampling_rate"] sf.write(f"mls10keng/{i}.wav", wav, sr)