add model
Browse files- README.md +30 -0
- _best/dev.yaml +9 -0
- _test/test_holistic.yaml +8 -0
- file_handler.yaml +2 -0
- inference_transform.yaml +12 -0
- model.pt +3 -0
- model.yaml +7 -0
- optimizer.pt +3 -0
- preprocess_file_handler.yaml +2 -0
- preprocess_pipeline.yaml +2 -0
- target_transform.yaml +6 -0
README.md
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: cc-by-4.0
|
3 |
+
metrics:
|
4 |
+
- accuracy
|
5 |
+
- f1
|
6 |
+
- uar
|
7 |
+
pipeline_tag: audio-classification
|
8 |
+
tags:
|
9 |
+
- audio
|
10 |
+
- audio-classification
|
11 |
+
- speech-emotion-recognition
|
12 |
+
- autrainer
|
13 |
+
library_name: autrainer
|
14 |
+
model-index:
|
15 |
+
- name: dcase-2020-t1a-cnn14-32k-t
|
16 |
+
results:
|
17 |
+
- task:
|
18 |
+
type: audio-classification
|
19 |
+
name: Speech Emotion Recognition
|
20 |
+
metrics:
|
21 |
+
- type: accuracy
|
22 |
+
name: Accuracy
|
23 |
+
value: 0.6166793457588436
|
24 |
+
- type: f1
|
25 |
+
name: F1
|
26 |
+
value: 0.5716599171523286
|
27 |
+
- type: uar
|
28 |
+
name: Unweighted Average Recall
|
29 |
+
value: 0.6499883154795764
|
30 |
+
---
|
_best/dev.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accuracy:
|
2 |
+
all: 0.5984346298218992
|
3 |
+
uar:
|
4 |
+
all: 0.630735177103743
|
5 |
+
f1:
|
6 |
+
all: 0.5791954276271887
|
7 |
+
dev_loss:
|
8 |
+
all: 0.9837130961151764
|
9 |
+
iteration: 4
|
_test/test_holistic.yaml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accuracy:
|
2 |
+
all: 0.6166793457588436
|
3 |
+
uar:
|
4 |
+
all: 0.6499883154795764
|
5 |
+
f1:
|
6 |
+
all: 0.5716599171523286
|
7 |
+
loss:
|
8 |
+
all: 0.914642338334034
|
file_handler.yaml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
$autrainer.datasets.utils.file_handlers.AudioFileHandler==0.4.0:
|
2 |
+
target_sample_rate: null
|
inference_transform.yaml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
$autrainer.transforms.smart_compose.SmartCompose==0.4.0:
|
2 |
+
transforms:
|
3 |
+
- $autrainer.transforms.specific_transforms.Expand==0.4.0:
|
4 |
+
size: 48000
|
5 |
+
method: pad
|
6 |
+
axis: -1
|
7 |
+
order: -85
|
8 |
+
- $autrainer.transforms.specific_transforms.FeatureExtractor==0.4.0:
|
9 |
+
fe_type: W2V2
|
10 |
+
fe_transfer: audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim
|
11 |
+
sampling_rate: 16000
|
12 |
+
order: -80
|
model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae40f66ab797c5840326e268c5145af234e22cf37582cc364cde4a06f04ca5f9
|
3 |
+
size 659338874
|
model.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
$autrainer.models.w2v2.W2V2FFNN==0.4.0:
|
2 |
+
output_dim: 4
|
3 |
+
model_name: audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim
|
4 |
+
freeze_extractor: true
|
5 |
+
hidden_size: 512
|
6 |
+
num_layers: 2
|
7 |
+
dropout: 0.5
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f41960597a39d7adf831a04678b998bf4c60f170c4b53cd9f418fab918273eab
|
3 |
+
size 3448
|
preprocess_file_handler.yaml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
$autrainer.datasets.utils.file_handlers.AudioFileHandler==0.4.0:
|
2 |
+
target_sample_rate: null
|
preprocess_pipeline.yaml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
$autrainer.transforms.smart_compose.SmartCompose==0.4.0:
|
2 |
+
transforms: []
|
target_transform.yaml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
$autrainer.datasets.utils.target_transforms.label_encoder.LabelEncoder==0.4.0:
|
2 |
+
labels:
|
3 |
+
- A
|
4 |
+
- H
|
5 |
+
- N
|
6 |
+
- S
|