Upload folder using huggingface_hub
Browse files- CKPT.yaml +11 -0
- README.md +66 -0
- attention_pooling.ckpt +3 -0
- brain.ckpt +3 -0
- dataloader-TRAIN.ckpt +3 -0
- dialect_encoder.txt +22 -0
- hyperparams.yaml +49 -0
- optimizer.ckpt +3 -0
- output_mlp.ckpt +3 -0
- whisper.ckpt +3 -0
- whisper_opt.ckpt +3 -0
CKPT.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# yamllint disable
|
2 |
+
end-of-epoch: true
|
3 |
+
error: 2.839878559112549
|
4 |
+
loss: 0.18992407526573798
|
5 |
+
macro_f1: 0.9538202964889487
|
6 |
+
macro_precision: 0.952679604174255
|
7 |
+
macro_recall: 0.9565894020982324
|
8 |
+
unixtime: 1737431086.8832679
|
9 |
+
weighted_f1: 0.9599932477445305
|
10 |
+
weighted_precision: 0.9608126922866167
|
11 |
+
weighted_recall: 0.9601927882898965
|
README.md
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- ar
|
4 |
+
pipeline_tag: audio-classification
|
5 |
+
library_name: speechbrain
|
6 |
+
tags:
|
7 |
+
- DIalectID
|
8 |
+
- ADI
|
9 |
+
- ADI-20
|
10 |
+
- speechbrain
|
11 |
+
- Identification
|
12 |
+
- pytorch
|
13 |
+
- embeddings
|
14 |
+
datasets:
|
15 |
+
- ADI-20
|
16 |
+
metrics:
|
17 |
+
- f1
|
18 |
+
- precision
|
19 |
+
- recall
|
20 |
+
- accuracy
|
21 |
+
---
|
22 |
+
|
23 |
+
## Install Requirements
|
24 |
+
|
25 |
+
### SpeechBrain
|
26 |
+
First of all, please install SpeechBrain with the following command:
|
27 |
+
|
28 |
+
```bash
|
29 |
+
pip install git+https://github.com/speechbrain/speechbrain.git@develop
|
30 |
+
```
|
31 |
+
|
32 |
+
### Clone ADI github repository
|
33 |
+
```bash
|
34 |
+
git clone https://github.com/elyadata/ADI-20
|
35 |
+
cd ADI-20
|
36 |
+
pip install -r requirements.txt
|
37 |
+
```
|
38 |
+
|
39 |
+
|
40 |
+
### Perform Arabic Dialect Identification
|
41 |
+
```python
|
42 |
+
from inference.classifier_attention_pooling import WhisperDialectClassifier
|
43 |
+
|
44 |
+
dialect_id = WhisperDialectClassifier.from_hparams(
|
45 |
+
source="",
|
46 |
+
hparams_file="hyperparms.yaml",
|
47 |
+
savedir="pretrained_DID/tmp").to("cuda")
|
48 |
+
|
49 |
+
dialect_id.device = "cuda"
|
50 |
+
|
51 |
+
dialect_id.classify_file("filenane.wav")
|
52 |
+
```
|
53 |
+
|
54 |
+
### Citation
|
55 |
+
If using this work, please cite:
|
56 |
+
```
|
57 |
+
@inproceedings{elleuch2025adi20,
|
58 |
+
author = {Haroun Elleuch and Salima Mdhaffar and Yannick Estève and Fethi Bougares},
|
59 |
+
title = {ADI‑20: Arabic Dialect Identification Dataset and Models},
|
60 |
+
booktitle = {Proceedings of the Annual Conference of the International Speech Communication Association (Interspeech)},
|
61 |
+
year = {2025},
|
62 |
+
address = {Rotterdam Ahoy Convention Centre, Rotterdam, The Netherlands},
|
63 |
+
month = {August},
|
64 |
+
days = {17‑21}
|
65 |
+
}
|
66 |
+
```
|
attention_pooling.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e015a4ed868bc4dfcec47af51a95b622037fc13becb702cc8171a223dfddfe8
|
3 |
+
size 6740
|
brain.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3888629ac8efb67b3b056f3fe0d026702b046af2a15e965378332f7d63c5ca8f
|
3 |
+
size 50
|
dataloader-TRAIN.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a21369bcca05a0d5c2a7eb0ba00bd5dd34c28915c8c3da30553ee4043b3d5a6
|
3 |
+
size 5
|
dialect_encoder.txt
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'ALG' => 0
|
2 |
+
'EGY' => 1
|
3 |
+
'IRA' => 2
|
4 |
+
'JOR' => 3
|
5 |
+
'KSA' => 4
|
6 |
+
'KUW' => 5
|
7 |
+
'LEB' => 6
|
8 |
+
'LIB' => 7
|
9 |
+
'MAU' => 8
|
10 |
+
'MOR' => 9
|
11 |
+
'OMA' => 10
|
12 |
+
'PAL' => 11
|
13 |
+
'QAT' => 12
|
14 |
+
'SUD' => 13
|
15 |
+
'SYR' => 14
|
16 |
+
'UAE' => 15
|
17 |
+
'YEM' => 16
|
18 |
+
'BAH' => 17
|
19 |
+
'MSA' => 18
|
20 |
+
'TUN' => 19
|
21 |
+
================
|
22 |
+
'starting_index' => 0
|
hyperparams.yaml
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ##########################################################################################
|
2 |
+
# Model: Whisper-large-v3 Encoder + Attion pooling for Arabic Dialect Identification
|
3 |
+
#
|
4 |
+
# Author: Haroun Elleuch
|
5 |
+
############################################################################################
|
6 |
+
|
7 |
+
|
8 |
+
pretrained_path: Elyadata/ADI-whisper-ADI20
|
9 |
+
whisper_hub: openai/whisper-large-v3
|
10 |
+
|
11 |
+
n_languages: 20
|
12 |
+
features_dim: 1280
|
13 |
+
|
14 |
+
whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
|
15 |
+
source: !ref <whisper_hub>
|
16 |
+
encoder_only: True
|
17 |
+
freeze_encoder: False
|
18 |
+
save_path: !ref <whisper_hub>
|
19 |
+
|
20 |
+
attention_pooling: !new:speechbrain.nnet.pooling.AttentionPooling
|
21 |
+
input_dim: !ref <features_dim>
|
22 |
+
|
23 |
+
output_mlp: !new:speechbrain.nnet.linear.Linear
|
24 |
+
input_size: !ref <features_dim>
|
25 |
+
n_neurons: !ref <n_languages>
|
26 |
+
bias: False
|
27 |
+
|
28 |
+
|
29 |
+
modules:
|
30 |
+
whisper: !ref <whisper>
|
31 |
+
attention_pooling: !ref <attention_pooling>
|
32 |
+
output_mlp: !ref <output_mlp>
|
33 |
+
|
34 |
+
log_softmax: !new:speechbrain.nnet.activations.Softmax
|
35 |
+
apply_log: True
|
36 |
+
|
37 |
+
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
|
38 |
+
|
39 |
+
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
|
40 |
+
loadables:
|
41 |
+
whisper: !ref <whisper>
|
42 |
+
attention_pooling: !ref <attention_pooling>
|
43 |
+
output_mlp: !ref <output_mlp>
|
44 |
+
label_encoder: !ref <label_encoder>
|
45 |
+
paths:
|
46 |
+
whisper: !ref <pretrained_path>/whisper.ckpt
|
47 |
+
attention_pooling: !ref <pretrained_path>/attention_pooling.ckpt
|
48 |
+
output_mlp: !ref <pretrained_path>/output_mlp.ckpt
|
49 |
+
label_encoder: !ref <pretrained_path>/dialect_encoder.txt
|
optimizer.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdd58ef92828f25761d1f03453a16225327b46a9e13fb978c72e966a17cbf617
|
3 |
+
size 218582
|
output_mlp.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9d99ccddfc47f7160b7a630ef475327c769eaa4b0e1fa302c7e152e377dad5c
|
3 |
+
size 103723
|
whisper.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5721aa93158f312d0f694a573b72ed736dce9e33217c9f01d06e8d2cb149cc17
|
3 |
+
size 2548162402
|
whisper_opt.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68e246d6bf1425e5e864514f09a6c2dcd5f342939f5178923578edd00493445b
|
3 |
+
size 5080804356
|