HarounElleuch commited on
Commit
bbbc9ba
·
verified ·
1 Parent(s): ae80b12

Upload folder using huggingface_hub

Browse files
CKPT.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # yamllint disable
2
+ end-of-epoch: true
3
+ error: 2.839878559112549
4
+ loss: 0.18992407526573798
5
+ macro_f1: 0.9538202964889487
6
+ macro_precision: 0.952679604174255
7
+ macro_recall: 0.9565894020982324
8
+ unixtime: 1737431086.8832679
9
+ weighted_f1: 0.9599932477445305
10
+ weighted_precision: 0.9608126922866167
11
+ weighted_recall: 0.9601927882898965
README.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - ar
4
+ pipeline_tag: audio-classification
5
+ library_name: speechbrain
6
+ tags:
7
+ - DIalectID
8
+ - ADI
9
+ - ADI-20
10
+ - speechbrain
11
+ - Identification
12
+ - pytorch
13
+ - embeddings
14
+ datasets:
15
+ - ADI-20
16
+ metrics:
17
+ - f1
18
+ - precision
19
+ - recall
20
+ - accuracy
21
+ ---
22
+
23
+ ## Install Requirements
24
+
25
+ ### SpeechBrain
26
+ First of all, please install SpeechBrain with the following command:
27
+
28
+ ```bash
29
+ pip install git+https://github.com/speechbrain/speechbrain.git@develop
30
+ ```
31
+
32
+ ### Clone ADI github repository
33
+ ```bash
34
+ git clone https://github.com/elyadata/ADI-20
35
+ cd ADI-20
36
+ pip install -r requirements.txt
37
+ ```
38
+
39
+
40
+ ### Perform Arabic Dialect Identification
41
+ ```python
42
+ from inference.classifier_attention_pooling import WhisperDialectClassifier
43
+
44
+ dialect_id = WhisperDialectClassifier.from_hparams(
45
+ source="",
46
+ hparams_file="hyperparms.yaml",
47
+ savedir="pretrained_DID/tmp").to("cuda")
48
+
49
+ dialect_id.device = "cuda"
50
+
51
+ dialect_id.classify_file("filenane.wav")
52
+ ```
53
+
54
+ ### Citation
55
+ If using this work, please cite:
56
+ ```
57
+ @inproceedings{elleuch2025adi20,
58
+ author = {Haroun Elleuch and Salima Mdhaffar and Yannick Estève and Fethi Bougares},
59
+ title = {ADI‑20: Arabic Dialect Identification Dataset and Models},
60
+ booktitle = {Proceedings of the Annual Conference of the International Speech Communication Association (Interspeech)},
61
+ year = {2025},
62
+ address = {Rotterdam Ahoy Convention Centre, Rotterdam, The Netherlands},
63
+ month = {August},
64
+ days = {17‑21}
65
+ }
66
+ ```
attention_pooling.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e015a4ed868bc4dfcec47af51a95b622037fc13becb702cc8171a223dfddfe8
3
+ size 6740
brain.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3888629ac8efb67b3b056f3fe0d026702b046af2a15e965378332f7d63c5ca8f
3
+ size 50
dataloader-TRAIN.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a21369bcca05a0d5c2a7eb0ba00bd5dd34c28915c8c3da30553ee4043b3d5a6
3
+ size 5
dialect_encoder.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 'ALG' => 0
2
+ 'EGY' => 1
3
+ 'IRA' => 2
4
+ 'JOR' => 3
5
+ 'KSA' => 4
6
+ 'KUW' => 5
7
+ 'LEB' => 6
8
+ 'LIB' => 7
9
+ 'MAU' => 8
10
+ 'MOR' => 9
11
+ 'OMA' => 10
12
+ 'PAL' => 11
13
+ 'QAT' => 12
14
+ 'SUD' => 13
15
+ 'SYR' => 14
16
+ 'UAE' => 15
17
+ 'YEM' => 16
18
+ 'BAH' => 17
19
+ 'MSA' => 18
20
+ 'TUN' => 19
21
+ ================
22
+ 'starting_index' => 0
hyperparams.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ##########################################################################################
2
+ # Model: Whisper-large-v3 Encoder + Attion pooling for Arabic Dialect Identification
3
+ #
4
+ # Author: Haroun Elleuch
5
+ ############################################################################################
6
+
7
+
8
+ pretrained_path: Elyadata/ADI-whisper-ADI20
9
+ whisper_hub: openai/whisper-large-v3
10
+
11
+ n_languages: 20
12
+ features_dim: 1280
13
+
14
+ whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
15
+ source: !ref <whisper_hub>
16
+ encoder_only: True
17
+ freeze_encoder: False
18
+ save_path: !ref <whisper_hub>
19
+
20
+ attention_pooling: !new:speechbrain.nnet.pooling.AttentionPooling
21
+ input_dim: !ref <features_dim>
22
+
23
+ output_mlp: !new:speechbrain.nnet.linear.Linear
24
+ input_size: !ref <features_dim>
25
+ n_neurons: !ref <n_languages>
26
+ bias: False
27
+
28
+
29
+ modules:
30
+ whisper: !ref <whisper>
31
+ attention_pooling: !ref <attention_pooling>
32
+ output_mlp: !ref <output_mlp>
33
+
34
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
35
+ apply_log: True
36
+
37
+ label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
38
+
39
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
40
+ loadables:
41
+ whisper: !ref <whisper>
42
+ attention_pooling: !ref <attention_pooling>
43
+ output_mlp: !ref <output_mlp>
44
+ label_encoder: !ref <label_encoder>
45
+ paths:
46
+ whisper: !ref <pretrained_path>/whisper.ckpt
47
+ attention_pooling: !ref <pretrained_path>/attention_pooling.ckpt
48
+ output_mlp: !ref <pretrained_path>/output_mlp.ckpt
49
+ label_encoder: !ref <pretrained_path>/dialect_encoder.txt
optimizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdd58ef92828f25761d1f03453a16225327b46a9e13fb978c72e966a17cbf617
3
+ size 218582
output_mlp.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9d99ccddfc47f7160b7a630ef475327c769eaa4b0e1fa302c7e152e377dad5c
3
+ size 103723
whisper.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5721aa93158f312d0f694a573b72ed736dce9e33217c9f01d06e8d2cb149cc17
3
+ size 2548162402
whisper_opt.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68e246d6bf1425e5e864514f09a6c2dcd5f342939f5178923578edd00493445b
3
+ size 5080804356