Spaces:
Running
on
Zero
Running
on
Zero
| from random import sample | |
| import soundfile as sf | |
| from datasets import load_dataset | |
| # dataset = load_dataset("keithito/lj_speech", split="train") | |
| #dataset = load_dataset("parler-tts/mls_eng", split="train") | |
| dataset = load_dataset("speechcolab/gigaspeech", "xl", split="train", token=True) | |
| Is = sample(list(range(len(dataset))), k=100000) | |
| print(dataset) | |
| for i, I in enumerate(Is): | |
| audio = dataset[I]["audio"] | |
| wav, sr = audio["array"], audio["sampling_rate"] | |
| sf.write(f"gigaspeech/{I}.wav", wav, sr) | |