Spaces:
Running
Running
admin
commited on
Commit
·
b027a5c
1
Parent(s):
ee49bef
sync ms
Browse files- app.py +57 -54
- requirements.txt +5 -3
- utils.py +50 -10
app.py
CHANGED
@@ -9,18 +9,16 @@ import librosa.display
|
|
9 |
import matplotlib.pyplot as plt
|
10 |
from collections import Counter
|
11 |
from model import EvalNet
|
12 |
-
from utils import
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
TEMP_DIR = "./__pycache__/tmp"
|
23 |
-
SAMPLE_RATE = 22050
|
24 |
|
25 |
|
26 |
def wav2mel(audio_path: str, width=1.6, topdb=40):
|
@@ -99,34 +97,38 @@ def most_common_element(input_list: list):
|
|
99 |
|
100 |
|
101 |
def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
104 |
|
105 |
-
|
106 |
-
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
try:
|
111 |
model = EvalNet(log_name, len(TRANSLATE)).model
|
112 |
eval("wav2%s" % spec)(wav_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
except Exception as e:
|
115 |
-
|
116 |
-
|
117 |
-
outputs = []
|
118 |
-
all_files = os.listdir(folder_path)
|
119 |
-
for file_name in all_files:
|
120 |
-
if file_name.lower().endswith(".jpg"):
|
121 |
-
file_path = os.path.join(folder_path, file_name)
|
122 |
-
input = embed_img(file_path)
|
123 |
-
output: torch.Tensor = model(input)
|
124 |
-
pred_id = torch.max(output.data, 1)[1]
|
125 |
-
outputs.append(int(pred_id))
|
126 |
|
127 |
-
|
128 |
-
shutil.rmtree(folder_path)
|
129 |
-
return os.path.basename(wav_path), TRANSLATE[CLASSES[max_count_item]]
|
130 |
|
131 |
|
132 |
if __name__ == "__main__":
|
@@ -141,36 +143,37 @@ if __name__ == "__main__":
|
|
141 |
gr.Interface(
|
142 |
fn=infer,
|
143 |
inputs=[
|
144 |
-
gr.Audio(label="
|
145 |
-
gr.Dropdown(choices=models, label="
|
146 |
],
|
147 |
outputs=[
|
148 |
-
gr.Textbox(label="
|
149 |
-
gr.Textbox(label="
|
|
|
150 |
],
|
151 |
examples=examples,
|
152 |
cache_examples=False,
|
153 |
allow_flagging="never",
|
154 |
-
title="
|
155 |
)
|
156 |
|
157 |
gr.Markdown(
|
158 |
-
""
|
159 |
-
|
160 |
-
```bibtex
|
161 |
-
@article{Zhou-2025,
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
}
|
173 |
-
```"""
|
174 |
)
|
175 |
|
176 |
demo.launch()
|
|
|
9 |
import matplotlib.pyplot as plt
|
10 |
from collections import Counter
|
11 |
from model import EvalNet
|
12 |
+
from utils import (
|
13 |
+
get_modelist,
|
14 |
+
find_wav_files,
|
15 |
+
embed_img,
|
16 |
+
_L,
|
17 |
+
SAMPLE_RATE,
|
18 |
+
TEMP_DIR,
|
19 |
+
TRANSLATE,
|
20 |
+
CLASSES,
|
21 |
+
)
|
|
|
|
|
22 |
|
23 |
|
24 |
def wav2mel(audio_path: str, width=1.6, topdb=40):
|
|
|
97 |
|
98 |
|
99 |
def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
|
100 |
+
status = "Success"
|
101 |
+
filename = result = None
|
102 |
+
try:
|
103 |
+
if os.path.exists(folder_path):
|
104 |
+
shutil.rmtree(folder_path)
|
105 |
|
106 |
+
if not wav_path:
|
107 |
+
raise ValueError("请输入音频!")
|
108 |
|
109 |
+
spec = log_name.split("_")[-3]
|
110 |
+
os.makedirs(folder_path, exist_ok=True)
|
|
|
111 |
model = EvalNet(log_name, len(TRANSLATE)).model
|
112 |
eval("wav2%s" % spec)(wav_path)
|
113 |
+
outputs = []
|
114 |
+
all_files = os.listdir(folder_path)
|
115 |
+
for file_name in all_files:
|
116 |
+
if file_name.lower().endswith(".jpg"):
|
117 |
+
file_path = os.path.join(folder_path, file_name)
|
118 |
+
input = embed_img(file_path)
|
119 |
+
output: torch.Tensor = model(input)
|
120 |
+
pred_id = torch.max(output.data, 1)[1]
|
121 |
+
outputs.append(int(pred_id))
|
122 |
+
|
123 |
+
max_count_item = most_common_element(outputs)
|
124 |
+
shutil.rmtree(folder_path)
|
125 |
+
filename = os.path.basename(wav_path)
|
126 |
+
result = TRANSLATE[CLASSES[max_count_item]]
|
127 |
|
128 |
except Exception as e:
|
129 |
+
status = f"{e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
+
return status, filename, result
|
|
|
|
|
132 |
|
133 |
|
134 |
if __name__ == "__main__":
|
|
|
143 |
gr.Interface(
|
144 |
fn=infer,
|
145 |
inputs=[
|
146 |
+
gr.Audio(label=_L("上传录音 (>40dB)"), type="filepath"),
|
147 |
+
gr.Dropdown(choices=models, label=_L("选择模型"), value=models[0]),
|
148 |
],
|
149 |
outputs=[
|
150 |
+
gr.Textbox(label=_L("状态栏"), show_copy_button=True),
|
151 |
+
gr.Textbox(label=_L("音频文件名"), show_copy_button=True),
|
152 |
+
gr.Textbox(label=_L("唱法识别"), show_copy_button=True),
|
153 |
],
|
154 |
examples=examples,
|
155 |
cache_examples=False,
|
156 |
allow_flagging="never",
|
157 |
+
title=_L("建议录音时长保持在 5s 左右, 过长会影响识别效率"),
|
158 |
)
|
159 |
|
160 |
gr.Markdown(
|
161 |
+
f"# {_L('引用')}"
|
162 |
+
+ """
|
163 |
+
```bibtex
|
164 |
+
@article{Zhou-2025,
|
165 |
+
author = {Monan Zhou and Shenyang Xu and Zhaorui Liu and Zhaowen Wang and Feng Yu and Wei Li and Baoqiang Han},
|
166 |
+
title = {CCMusic: An Open and Diverse Database for Chinese Music Information Retrieval Research},
|
167 |
+
journal = {Transactions of the International Society for Music Information Retrieval},
|
168 |
+
volume = {8},
|
169 |
+
number = {1},
|
170 |
+
pages = {22--38},
|
171 |
+
month = {Mar},
|
172 |
+
year = {2025},
|
173 |
+
url = {https://doi.org/10.5334/tismir.194},
|
174 |
+
doi = {10.5334/tismir.194}
|
175 |
+
}
|
176 |
+
```"""
|
177 |
)
|
178 |
|
179 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
-
torch
|
2 |
-
|
|
|
|
|
3 |
librosa
|
4 |
matplotlib
|
5 |
-
|
|
|
1 |
+
torch==2.6.0+cu118
|
2 |
+
-f https://download.pytorch.org/whl/torch
|
3 |
+
torchvision==0.21.0+cu118
|
4 |
+
-f https://download.pytorch.org/whl/torchvision
|
5 |
librosa
|
6 |
matplotlib
|
7 |
+
modelscope[framework]==1.21.0
|
utils.py
CHANGED
@@ -1,10 +1,53 @@
|
|
1 |
import os
|
2 |
import torch
|
3 |
import torchvision.transforms as transforms
|
4 |
-
|
|
|
5 |
from PIL import Image
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
def toCUDA(x):
|
@@ -27,19 +70,16 @@ def find_wav_files(folder_path=f"{MODEL_DIR}/examples"):
|
|
27 |
|
28 |
|
29 |
def get_modelist(model_dir=MODEL_DIR, assign_model=""):
|
30 |
-
try:
|
31 |
-
entries = os.listdir(model_dir)
|
32 |
-
except OSError as e:
|
33 |
-
print(f"Cannot access {model_dir}: {e}")
|
34 |
-
return
|
35 |
-
|
36 |
output = []
|
37 |
-
for entry in
|
|
|
38 |
full_path = os.path.join(model_dir, entry)
|
|
|
39 |
if entry == ".git" or entry == "examples":
|
40 |
-
print(f"
|
41 |
continue
|
42 |
|
|
|
43 |
if os.path.isdir(full_path):
|
44 |
model = os.path.basename(full_path)
|
45 |
if assign_model and assign_model.lower() in model:
|
|
|
1 |
import os
|
2 |
import torch
|
3 |
import torchvision.transforms as transforms
|
4 |
+
import huggingface_hub
|
5 |
+
import modelscope
|
6 |
from PIL import Image
|
7 |
|
8 |
+
|
9 |
+
EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
|
10 |
+
|
11 |
+
ZH2EN = {
|
12 |
+
"上传录音 (>40dB)": "Upload a recording (>40dB)",
|
13 |
+
"选择模型": "Select a model",
|
14 |
+
"状态栏": "Status",
|
15 |
+
"音频文件名": "Audio filename",
|
16 |
+
"唱法识别": "Singing method recognition",
|
17 |
+
"建议录音时长保持在 5s 左右, 过长会影响识别效率": "It is recommended to keep the recording length around 5s, too long will affect the recognition efficiency.",
|
18 |
+
"引用": "Cite",
|
19 |
+
"男声 & 美声唱法": "Bel Canto, Male",
|
20 |
+
"女声 & 美声唱法": "Bel Canto, Female",
|
21 |
+
"男声 & 民族唱法": "Folk Singing, Male",
|
22 |
+
"女声 & 民族唱法": "Folk Singing, Female",
|
23 |
+
}
|
24 |
+
|
25 |
+
MODEL_DIR = (
|
26 |
+
huggingface_hub.snapshot_download(
|
27 |
+
"ccmusic-database/bel_canto",
|
28 |
+
cache_dir="./__pycache__",
|
29 |
+
)
|
30 |
+
if EN_US
|
31 |
+
else modelscope.snapshot_download(
|
32 |
+
"ccmusic-database/bel_canto",
|
33 |
+
cache_dir="./__pycache__",
|
34 |
+
)
|
35 |
+
)
|
36 |
+
|
37 |
+
|
38 |
+
def _L(zh_txt: str):
|
39 |
+
return ZH2EN[zh_txt] if EN_US else zh_txt
|
40 |
+
|
41 |
+
|
42 |
+
TRANSLATE = {
|
43 |
+
"m_bel": _L("男声 & 美声唱法"),
|
44 |
+
"f_bel": _L("女声 & 美声唱法"),
|
45 |
+
"m_folk": _L("男声 & 民族唱法"),
|
46 |
+
"f_folk": _L("女声 & 民族唱法"),
|
47 |
+
}
|
48 |
+
CLASSES = list(TRANSLATE.keys())
|
49 |
+
TEMP_DIR = "./__pycache__/tmp"
|
50 |
+
SAMPLE_RATE = 22050
|
51 |
|
52 |
|
53 |
def toCUDA(x):
|
|
|
70 |
|
71 |
|
72 |
def get_modelist(model_dir=MODEL_DIR, assign_model=""):
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
output = []
|
74 |
+
for entry in os.listdir(model_dir):
|
75 |
+
# 获取完整路径
|
76 |
full_path = os.path.join(model_dir, entry)
|
77 |
+
# 跳过'.git'文件夹
|
78 |
if entry == ".git" or entry == "examples":
|
79 |
+
print(f"跳过 .git 或 examples 文件夹: {full_path}")
|
80 |
continue
|
81 |
|
82 |
+
# 检查条目是文件还是目录
|
83 |
if os.path.isdir(full_path):
|
84 |
model = os.path.basename(full_path)
|
85 |
if assign_model and assign_model.lower() in model:
|