TheStageAI
/

Elastic-musicgen-large

music-generation

Model card Files Files and versions Community

psynote123 commited on 18 days ago

Commit

e7d22c8

·

verified ·

1 Parent(s): 5e02d06

Update README.md

Files changed (1) hide show

README.md +6 -6

README.md CHANGED Viewed

@@ -49,6 +49,7 @@ To infer our MusicGen models, you primarily use the `elastic_models.transformers
 ```python
 import torch
 import scipy.io.wavfile
 from transformers import AutoProcessor
 from elastic_models.transformers import MusicgenForConditionalGeneration
@@ -57,7 +58,7 @@ elastic_mode = "S"
 prompt = "A groovy funk bassline with a tight drum beat"
 output_wav_path = "generated_audio_elastic_S.wav"
-hf_token = "YOUR_HF_TOKEN"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 processor = AutoProcessor.from_pretrained(model_name_hf, token=hf_token)
@@ -65,7 +66,7 @@ processor = AutoProcessor.from_pretrained(model_name_hf, token=hf_token)
 model = MusicgenForConditionalGeneration.from_pretrained(
     model_name_hf,
     token=hf_token,
-    torch_dtype=torch.float16,
     mode=elastic_mode,
     device=device,
     __full_patch=True,
@@ -83,17 +84,16 @@ print(f"Generating audio for: {prompt}...")
 generate_kwargs = {"do_sample": True, "guidance_scale": 3.0, "max_new_tokens": 256, "cache_implementation": "paged"}
 audio_values = model.generate(**inputs, **generate_kwargs)
-audio_values_np = audio_values.cpu().numpy().squeeze()
 sampling_rate = model.config.audio_encoder.sampling_rate
 scipy.io.wavfile.write(output_wav_path, rate=sampling_rate, data=audio_values_np)
 print(f"Audio saved to {output_wav_path}")
 ```
 __System requirements:__
-* GPUs: NVIDIA H100, L40S (recommended for compiled models).
-* CPU: AMD, Intel (for running processor/tokenizer, inference on CPU is slow for MusicGen)
 * Python: 3.8-3.11 (check dependencies for specific versions)
 To work with our elastic models and compilation tools, you\'ll need to install `elastic_models` and `qlip` libraries from TheStage:

 ```python
 import torch
 import scipy.io.wavfile
 from transformers import AutoProcessor
 from elastic_models.transformers import MusicgenForConditionalGeneration
 prompt = "A groovy funk bassline with a tight drum beat"
 output_wav_path = "generated_audio_elastic_S.wav"
+hf_token = "YOUR_TOKEN"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 processor = AutoProcessor.from_pretrained(model_name_hf, token=hf_token)
 model = MusicgenForConditionalGeneration.from_pretrained(
     model_name_hf,
     token=hf_token,
+    torch_dtype=torch.float16, # Or float32, matching compilation
     mode=elastic_mode,
     device=device,
     __full_patch=True,
 generate_kwargs = {"do_sample": True, "guidance_scale": 3.0, "max_new_tokens": 256, "cache_implementation": "paged"}
 audio_values = model.generate(**inputs, **generate_kwargs)
+audio_values_np = audio_values.to(torch.float32).cpu().numpy().squeeze()
 sampling_rate = model.config.audio_encoder.sampling_rate
 scipy.io.wavfile.write(output_wav_path, rate=sampling_rate, data=audio_values_np)
 print(f"Audio saved to {output_wav_path}")
 ```
 __System requirements:__
+* GPUs: NVIDIA H100, L40S.
+* CPU: AMD, Intel
 * Python: 3.8-3.11 (check dependencies for specific versions)
 To work with our elastic models and compilation tools, you\'ll need to install `elastic_models` and `qlip` libraries from TheStage: