Spaces:

cchaun
/

musical_instrument_identification

Build error

cchaun commited on Aug 21, 2022

Commit

a2148ac

1 Parent(s): b63864d

improve model accuracy

Files changed (6) hide show

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 venv
-__pycache__

 venv
+__pycache__
+flagged

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import torch, torchaudio
 from timeit import default_timer as timer
@@ -8,16 +9,21 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 SAMPLE_RATE = 44100
 AUDIO_LEN = 2.90
-model = torch.load("torch_efficientnet_b0_fold4.pth", map_location=torch.device('cpu'))
 CHINESE_LABELS = [
     "大提琴", "單簧管", "長笛", "民謠吉他", "電吉他", "風琴", "鋼琴", "薩克斯風", "喇叭", "小提琴", "人聲"
 ]
 example_list = [
     "samples/guitar_acoustic.wav",
     "samples/piano.wav",
-    "samples/guitar_electric.wav"
 ]
 def predict(audio_path):
@@ -31,7 +37,6 @@ def predict(audio_path):
         return
     # input Preprocessing
     img = audio_preprocess(wav, SAMPLE_RATE).unsqueeze(0)
-    print(img.shape)
     model.eval()
     with torch.inference_mode():
         pred_probs = torch.softmax(model(img), dim=1)
@@ -40,8 +45,8 @@ def predict(audio_path):
     return pred_labels_and_probs, pred_time
-title = "Musical Instrument Classification 🎺🎸🎹🎻"
-description = "An EfficientNetB0 feature extractor model to classify 11 different musical instruments"
 article = ""
 demo = gr.Interface(fn=predict,

+# -*- coding: UTF-8 -*-
 import gradio as gr
 import torch, torchaudio
 from timeit import default_timer as timer
 SAMPLE_RATE = 44100
 AUDIO_LEN = 2.90
+model = torch.load("models/torch_efficientnet_fold2_CNN.pth", map_location=torch.device('cpu'))
 CHINESE_LABELS = [
     "大提琴", "單簧管", "長笛", "民謠吉他", "電吉他", "風琴", "鋼琴", "薩克斯風", "喇叭", "小提琴", "人聲"
 ]
+LABELS = [
+    "Cello", "Clarinet", "Flute", "Acoustic Guitar", "Electric Guitar", "Organ", "Piano", "Saxophone", "Trumpet", "Violin", "Voice"
+]
 example_list = [
     "samples/guitar_acoustic.wav",
     "samples/piano.wav",
+    "samples/violin.wav",
+    "samples/flute.wav"
 ]
 def predict(audio_path):
         return
     # input Preprocessing
     img = audio_preprocess(wav, SAMPLE_RATE).unsqueeze(0)
     model.eval()
     with torch.inference_mode():
         pred_probs = torch.softmax(model(img), dim=1)
     return pred_labels_and_probs, pred_time
+title = "樂器辨識🎺🎸🎹🎻"
+description = "使用IRMAS資料集訓練的深度學習模型，可辨識11種不同樂器，包含「大提琴, 單簧管, 長笛, 民謠吉他, 電吉他, 風琴, 鋼琴, 薩克斯風, 喇叭, 小提琴, 人聲」"
 article = ""
 demo = gr.Interface(fn=predict,

torch_efficientnet_b0_fold4.pth → models/torch_efficientnet_b0_fold4.pth RENAMED Viewed

File without changes

models/torch_efficientnet_fold2_CNN.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a55dbb25c9a1678bd3b5d2968695923931564e5b4e04839c5836a3ee5421c1a
+size 16419953

samples/flute.wav ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:2aaa6c5640106826a4db1d7932f9edc3b0fbb0c68cbd4e7d7d544d2fdc28af17
+size 3528044

samples/violin.wav ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:690365b52ee8ca9f7b0147247270e375d70be31512c3ae591e52bf55605d3ece
+size 19105034