Spaces:

WeixuanYuan
/

DiffuSynthV0.2

Running

App Files Files Community

WeixuanYuan commited on Sep 13

Commit

c0c17c4

•

1 Parent(s): ae1bdf7

Upload build_instrument.py

Browse files

Files changed (1) hide show

webUI/natural_language_guided/build_instrument.py +32 -2

webUI/natural_language_guided/build_instrument.py CHANGED Viewed

@@ -4,13 +4,42 @@ import torch
 import gradio as gr
 import mido
 from io import BytesIO
-import pyrubberband as pyrb
 from model.DiffSynthSampler import DiffSynthSampler
 from tools import adsr_envelope, adjust_audio_length
 from webUI.natural_language_guided.track_maker import DiffSynth
 from webUI.natural_language_guided.utils import encodeBatch2GradioOutput_STFT, phase_to_Gradio_image, \
     spectrogram_to_Gradio_image
 def get_build_instrument_module(gradioWebUI, virtual_instruments_state):
@@ -154,7 +183,8 @@ def get_build_instrument_module(gradioWebUI, virtual_instruments_state):
         sample_rate, signal = virtual_instrument["signal"]
         s = 3 / duration
-        applied_signal = pyrb.time_stretch(signal, sample_rate, s)
         applied_signal = adjust_audio_length(applied_signal, int((duration+1) * sample_rate), sample_rate, sample_rate)
         D = librosa.stft(applied_signal, n_fft=1024, hop_length=256, win_length=1024)[1:, :]

 import gradio as gr
 import mido
 from io import BytesIO
 from model.DiffSynthSampler import DiffSynthSampler
 from tools import adsr_envelope, adjust_audio_length
 from webUI.natural_language_guided.track_maker import DiffSynth
 from webUI.natural_language_guided.utils import encodeBatch2GradioOutput_STFT, phase_to_Gradio_image, \
     spectrogram_to_Gradio_image
+import torchaudio.transforms as transforms
+def time_stretch_audio(waveform, sample_rate, stretch_factor):
+    # 如果输入是 numpy 数组，则转换为 torch.Tensor
+    if isinstance(waveform, np.ndarray):
+        waveform = torch.from_numpy(waveform)
+    # 确保 waveform 的类型为 torch.float32
+    waveform = waveform.to(torch.float32)
+    # 设置 STFT 参数
+    n_fft = 2048  # STFT 窗口大小
+    hop_length = n_fft // 4  # STFT 的 hop length 设置为 n_fft 的四分之一
+    # 计算短时傅里叶变换 (STFT)
+    stft = torch.stft(waveform, n_fft=n_fft, hop_length=hop_length, return_complex=True)
+    # 创建 TimeStretch 变换
+    time_stretch = transforms.TimeStretch(hop_length=hop_length, n_freq=1025, fixed_rate=False)
+    print(stft.shape)
+    # 应用时间伸缩
+    stretched_stft = time_stretch(stft, stretch_factor)
+    # 将 STFT 转换回时域波形
+    stretched_waveform = torch.istft(stretched_stft, n_fft=n_fft, hop_length=hop_length)
+    # 返回处理后的 waveform，转换为 numpy 数组
+    return stretched_waveform.detach().numpy()
 def get_build_instrument_module(gradioWebUI, virtual_instruments_state):
         sample_rate, signal = virtual_instrument["signal"]
         s = 3 / duration
+        # applied_signal = pyrb.time_stretch(signal, sample_rate, s)
+        applied_signal = time_stretch_audio(signal, sample_rate, s)
         applied_signal = adjust_audio_length(applied_signal, int((duration+1) * sample_rate), sample_rate, sample_rate)
         D = librosa.stft(applied_signal, n_fft=1024, hop_length=256, win_length=1024)[1:, :]