ORI-Muchim commited on
Commit
1f3f80b
1 Parent(s): e9d239b

Update mel_processing.py

Browse files
Files changed (1) hide show
  1. mel_processing.py +31 -4
mel_processing.py CHANGED
@@ -1,5 +1,17 @@
 
 
 
 
1
  import torch
 
 
2
  import torch.utils.data
 
 
 
 
 
 
3
  from librosa.filters import mel as librosa_mel_fn
4
 
5
  MAX_WAV_VALUE = 32768.0
@@ -52,9 +64,13 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
52
  y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
53
  y = y.squeeze(1)
54
 
55
- spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
56
- center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False)
57
-
 
 
 
 
58
  spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
59
  return spec
60
 
@@ -90,8 +106,19 @@ def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size,
90
  y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
91
  y = y.squeeze(1)
92
 
93
- spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
 
 
 
 
94
  center=center, pad_mode='reflect', normalized=False, onesided=True)
 
 
 
 
 
 
 
95
 
96
  spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
97
 
 
1
+ import math
2
+ import os
3
+ from packaging import version
4
+ import random
5
  import torch
6
+ from torch import nn
7
+ import torch.nn.functional as F
8
  import torch.utils.data
9
+ import numpy as np
10
+ import librosa
11
+ import librosa.util as librosa_util
12
+ from librosa.util import normalize, pad_center, tiny
13
+ from scipy.signal import get_window
14
+ from scipy.io.wavfile import read
15
  from librosa.filters import mel as librosa_mel_fn
16
 
17
  MAX_WAV_VALUE = 32768.0
 
64
  y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
65
  y = y.squeeze(1)
66
 
67
+ if version.parse(torch.__version__) >= version.parse("2"):
68
+ spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
69
+ center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False)
70
+ else:
71
+ spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
72
+ center=center, pad_mode='reflect', normalized=False, onesided=True)
73
+
74
  spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
75
  return spec
76
 
 
106
  y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
107
  y = y.squeeze(1)
108
 
109
+ if version.parse(torch.__version__) >= version.parse("2"):
110
+ spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
111
+ center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False)
112
+ else:
113
+ spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
114
  center=center, pad_mode='reflect', normalized=False, onesided=True)
115
+ '''
116
+ #- reserve : from https://github.com/jaywalnut310/vits/issues/15#issuecomment-1084148441
117
+ with autocast(enabled=False):
118
+ y = y.float()
119
+ spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
120
+ center=center, pad_mode='reflect', normalized=False, onesided=True)
121
+ '''
122
 
123
  spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
124