Audio-Text-to-Text
Transformers
Safetensors
qwen2_audio
text2text-generation
Inference Endpoints
franken commited on
Commit
8805704
·
verified ·
1 Parent(s): c1773c1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -2
README.md CHANGED
@@ -53,8 +53,9 @@ model = Qwen2AudioForConditionalGeneration.from_pretrained(model_name, torch_dty
53
  # Load example audio
54
  wav_path = "test-mini-audios/3fe64f3d-282c-4bc8-a753-68f8f6c35652.wav" # from MMAU dataset
55
  waveform, sampling_rate = torchaudio.load(wav_path)
56
- assert sampling_rate == 16000
57
- audios = [waveform.numpy()]
 
58
 
59
  # Make prompt text
60
  question = "Based on the given audio, identify the source of the speaking voice."
 
53
  # Load example audio
54
  wav_path = "test-mini-audios/3fe64f3d-282c-4bc8-a753-68f8f6c35652.wav" # from MMAU dataset
55
  waveform, sampling_rate = torchaudio.load(wav_path)
56
+ if sampling_rate != 16000:
57
+ waveform = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000)(waveform)
58
+ audios = [waveform[0].numpy()]
59
 
60
  # Make prompt text
61
  question = "Based on the given audio, identify the source of the speaking voice."