mihaimasala commited on
Commit
232c059
·
verified ·
1 Parent(s): e711307

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +487 -4
README.md CHANGED
@@ -4,6 +4,488 @@ language:
4
  - ro
5
  base_model:
6
  - google/gemma-7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  # Model Card for Model ID
@@ -90,7 +572,7 @@ print(tokenizer.decode(outputs[0]))
90
  <td>gemma-1.1-7b-it</td><td><center>41.44</center></td><td><center>40.32</center></td><td><center>47.22</center></td><td><center>55.01</center></td><td><center>47.03</center></td><td><center>9.50</center></td><td><center>49.58</center></td>
91
  </tr>
92
  <tr>
93
- <td><em>RoGemma-7b-Instruct</em></td><td><center><em><strong>53.42</strong></em></center></td><td><center><em><strong>52.44</strong></em></center></td><td><center><em><strong>54.44</strong></em></center></td><td><center><em><strong>69.36</strong></em></center></td><td><center><em><strong>61.96</strong></em></center></td><td><center><em><strong>31.06</strong></em></center></td><td><center><em><strong>51.23</strong></em></center></td>
94
  </tr>
95
  </tbody>
96
  </table>
@@ -123,15 +605,16 @@ print(tokenizer.decode(outputs[0]))
123
  <td><center><strong>RO-EN<br>(Bleu)</strong></center>
124
  </tr>
125
  <tr>
126
- <td>gemma-1.1-7b-it</td><td><center>87.54</center></td><td><center>51.49</center></td><td><center>83.87</center></td><td><center>85.61</center></td><td><center>17.96</center></td><td><center><strong>27.74</strong></center></td><td><center>25.48</center></td><td><center>36.11</center></td>
127
  </tr>
128
  <tr>
129
- <td><em>RoGemma-7b-Instruct</em></td><td><center><em><strong>97.87</strong></em></center></td><td><center><em><strong>65.71</strong></em></center></td><td><center><em><strong>98.43</strong></em></center></td><td><center><em><strong>87.18</strong></em></center></td><td><center><em><strong>27.91</strong></em></center></td><td><center><em>23.08</em></center></td><td><center><em><strong>27.99</strong></em></center></td><td><center><em><strong>39.51</strong></em></center></td>
130
  </tr>
131
  </tbody>
132
  </table>
133
 
134
 
 
135
  <table>
136
  <tbody>
137
  <tr>
@@ -158,7 +641,7 @@ print(tokenizer.decode(outputs[0]))
158
  <td><center><strong>(Pearson)</strong></center></td>
159
  </tr>
160
  <tr>
161
- <td>gemma-1.1-7b-it</td><td><center><strong>42.10</strong></center></td><td><center><strong>62.30</strong></center></td><td><center><strong>60.34</strong></center></td><td><center><strong>77.40</strong></center></td><td><center>49.10</center></td><td><center>50.23</center></td><td><center>83.43</center></td><td><center>83.65</center></td>
162
  </tr>
163
  <tr>
164
  <td><em>RoGemma-7b-Instruct</em></td><td><center><em>17.75</em></center></td><td><center><em>28.11</em></center></td><td><center><em>52.02</em></center></td><td><center><em>68.43</em></center></td><td><center><em><strong>73.96</strong></em></center></td><td><center><em><strong>75.16</strong></em></center></td><td><center><em><strong>86.45</strong></em></center></td><td><center><em><strong>86.31</strong></em></center></td>
 
4
  - ro
5
  base_model:
6
  - google/gemma-7b
7
+ model-index:
8
+ - name: OpenLLM-Ro/RoGemma-7b-Instruct
9
+ results:
10
+ - task:
11
+ type: text-generation
12
+ dataset:
13
+ name: RoMT-Bench
14
+ type: RoMT-Bench
15
+ metrics:
16
+ - name: Score
17
+ type: Score
18
+ value: 5.26
19
+ - task:
20
+ type: text-generation
21
+ dataset:
22
+ name: RoCulturaBench
23
+ type: RoCulturaBench
24
+ metrics:
25
+ - name: Score
26
+ type: Score
27
+ value: 3.26
28
+ - task:
29
+ type: text-generation
30
+ dataset:
31
+ name: Romanian_Academic_Benchmarks
32
+ type: Romanian_Academic_Benchmarks
33
+ metrics:
34
+ - name: Average accuracy
35
+ type: accuracy
36
+ value: 53.41
37
+ - task:
38
+ type: text-generation
39
+ dataset:
40
+ name: OpenLLM-Ro/ro_arc_challenge
41
+ type: OpenLLM-Ro/ro_arc_challenge
42
+ metrics:
43
+ - name: Average accuracy
44
+ type: accuracy
45
+ value: 52.44
46
+ - task:
47
+ type: text-generation
48
+ dataset:
49
+ name: OpenLLM-Ro/ro_mmlu
50
+ type: OpenLLM-Ro/ro_mmlu
51
+ metrics:
52
+ - name: Average accuracy
53
+ type: accuracy
54
+ value: 54.44
55
+ - task:
56
+ type: text-generation
57
+ dataset:
58
+ name: OpenLLM-Ro/ro_winogrande
59
+ type: OpenLLM-Ro/ro_winogrande
60
+ metrics:
61
+ - name: Average accuracy
62
+ type: accuracy
63
+ value: 69.36
64
+ - task:
65
+ type: text-generation
66
+ dataset:
67
+ name: OpenLLM-Ro/ro_hellaswag
68
+ type: OpenLLM-Ro/ro_hellaswag
69
+ metrics:
70
+ - name: Average accuracy
71
+ type: accuracy
72
+ value: 61.96
73
+ - task:
74
+ type: text-generation
75
+ dataset:
76
+ name: OpenLLM-Ro/ro_gsm8k
77
+ type: OpenLLM-Ro/ro_gsm8k
78
+ metrics:
79
+ - name: Average accuracy
80
+ type: accuracy
81
+ value: 31.06
82
+ - task:
83
+ type: text-generation
84
+ dataset:
85
+ name: OpenLLM-Ro/ro_truthfulqa
86
+ type: OpenLLM-Ro/ro_truthfulqa
87
+ metrics:
88
+ - name: Average accuracy
89
+ type: accuracy
90
+ value: 51.23
91
+ - task:
92
+ type: text-generation
93
+ dataset:
94
+ name: LaRoSeDa_binary
95
+ type: LaRoSeDa_binary
96
+ metrics:
97
+ - name: Average macro-f1
98
+ type: macro-f1
99
+ value: 97.86
100
+ - task:
101
+ type: text-generation
102
+ dataset:
103
+ name: LaRoSeDa_multiclass
104
+ type: LaRoSeDa_multiclass
105
+ metrics:
106
+ - name: Average macro-f1
107
+ type: macro-f1
108
+ value: 65.70
109
+ - task:
110
+ type: text-generation
111
+ dataset:
112
+ name: LaRoSeDa_binary_finetuned
113
+ type: LaRoSeDa_binary_finetuned
114
+ metrics:
115
+ - name: Average macro-f1
116
+ type: macro-f1
117
+ value: 98.43
118
+ - task:
119
+ type: text-generation
120
+ dataset:
121
+ name: LaRoSeDa_multiclass_finetuned
122
+ type: LaRoSeDa_multiclass_finetuned
123
+ metrics:
124
+ - name: Average macro-f1
125
+ type: macro-f1
126
+ value: 87.17
127
+ - task:
128
+ type: text-generation
129
+ dataset:
130
+ name: WMT_EN-RO
131
+ type: WMT_EN-RO
132
+ metrics:
133
+ - name: Average bleu
134
+ type: bleu
135
+ value: 27.91
136
+ - task:
137
+ type: text-generation
138
+ dataset:
139
+ name: WMT_RO-EN
140
+ type: WMT_RO-EN
141
+ metrics:
142
+ - name: Average bleu
143
+ type: bleu
144
+ value: 23.08
145
+ - task:
146
+ type: text-generation
147
+ dataset:
148
+ name: WMT_EN-RO_finetuned
149
+ type: WMT_EN-RO_finetuned
150
+ metrics:
151
+ - name: Average bleu
152
+ type: bleu
153
+ value: 27.99
154
+ - task:
155
+ type: text-generation
156
+ dataset:
157
+ name: WMT_RO-EN_finetuned
158
+ type: WMT_RO-EN_finetuned
159
+ metrics:
160
+ - name: Average bleu
161
+ type: bleu
162
+ value: 39.51
163
+ - task:
164
+ type: text-generation
165
+ dataset:
166
+ name: XQuAD
167
+ type: XQuAD
168
+ metrics:
169
+ - name: Average exact_match
170
+ type: exact_match
171
+ value: 17.75
172
+ - task:
173
+ type: text-generation
174
+ dataset:
175
+ name: XQuAD
176
+ type: XQuAD
177
+ metrics:
178
+ - name: Average f1
179
+ type: f1
180
+ value: 28.11
181
+ - task:
182
+ type: text-generation
183
+ dataset:
184
+ name: XQuAD_finetuned
185
+ type: XQuAD_finetuned
186
+ metrics:
187
+ - name: Average exact_match
188
+ type: exact_match
189
+ value: 52.02
190
+ - task:
191
+ type: text-generation
192
+ dataset:
193
+ name: XQuAD_finetuned
194
+ type: XQuAD_finetuned
195
+ metrics:
196
+ - name: Average f1
197
+ type: f1
198
+ value: 68.43
199
+ - task:
200
+ type: text-generation
201
+ dataset:
202
+ name: STS
203
+ type: STS
204
+ metrics:
205
+ - name: Average spearman
206
+ type: spearman
207
+ value: 73.96
208
+ - task:
209
+ type: text-generation
210
+ dataset:
211
+ name: STS
212
+ type: STS
213
+ metrics:
214
+ - name: Average pearson
215
+ type: pearson
216
+ value: 75.16
217
+ - task:
218
+ type: text-generation
219
+ dataset:
220
+ name: STS_finetuned
221
+ type: STS_finetuned
222
+ metrics:
223
+ - name: Average spearman
224
+ type: spearman
225
+ value: 86.45
226
+ - task:
227
+ type: text-generation
228
+ dataset:
229
+ name: STS_finetuned
230
+ type: STS_finetuned
231
+ metrics:
232
+ - name: Average pearson
233
+ type: pearson
234
+ value: 86.31
235
+ - task:
236
+ type: text-generation
237
+ dataset:
238
+ name: RoMT-Bench
239
+ type: RoMT-Bench
240
+ metrics:
241
+ - name: First turn
242
+ type: Score
243
+ value: 5.92
244
+ - name: Second turn
245
+ type: Score
246
+ value: 4.60
247
+ - task:
248
+ type: text-generation
249
+ dataset:
250
+ name: OpenLLM-Ro/ro_arc_challenge
251
+ type: OpenLLM-Ro/ro_arc_challenge
252
+ metrics:
253
+ - name: 0-shot
254
+ type: accuracy
255
+ value: 50.30
256
+ - name: 1-shot
257
+ type: accuracy
258
+ value: 50.90
259
+ - name: 3-shot
260
+ type: accuracy
261
+ value: 52.53
262
+ - name: 5-shot
263
+ type: accuracy
264
+ value: 53.30
265
+ - name: 10-shot
266
+ type: accuracy
267
+ value: 54.33
268
+ - name: 25-shot
269
+ type: accuracy
270
+ value: 53.30
271
+ - task:
272
+ type: text-generation
273
+ dataset:
274
+ name: OpenLLM-Ro/ro_mmlu
275
+ type: OpenLLM-Ro/ro_mmlu
276
+ metrics:
277
+ - name: 0-shot
278
+ type: accuracy
279
+ value: 54.95
280
+ - name: 1-shot
281
+ type: accuracy
282
+ value: 54.01
283
+ - name: 3-shot
284
+ type: accuracy
285
+ value: 54.03
286
+ - name: 5-shot
287
+ type: accuracy
288
+ value: 54.76
289
+ - task:
290
+ type: text-generation
291
+ dataset:
292
+ name: OpenLLM-Ro/ro_winogrande
293
+ type: OpenLLM-Ro/ro_winogrande
294
+ metrics:
295
+ - name: 0-shot
296
+ type: accuracy
297
+ value: 68.67
298
+ - name: 1-shot
299
+ type: accuracy
300
+ value: 69.46
301
+ - name: 3-shot
302
+ type: accuracy
303
+ value: 68.43
304
+ - name: 5-shot
305
+ type: accuracy
306
+ value: 70.88
307
+ - task:
308
+ type: text-generation
309
+ dataset:
310
+ name: OpenLLM-Ro/ro_hellaswag
311
+ type: OpenLLM-Ro/ro_hellaswag
312
+ metrics:
313
+ - name: 0-shot
314
+ type: accuracy
315
+ value: 61.54
316
+ - name: 1-shot
317
+ type: accuracy
318
+ value: 61.54
319
+ - name: 3-shot
320
+ type: accuracy
321
+ value: 62.08
322
+ - name: 5-shot
323
+ type: accuracy
324
+ value: 62.12
325
+ - name: 10-shot
326
+ type: accuracy
327
+ value: 62.51
328
+ - task:
329
+ type: text-generation
330
+ dataset:
331
+ name: OpenLLM-Ro/ro_gsm8k
332
+ type: OpenLLM-Ro/ro_gsm8k
333
+ metrics:
334
+ - name: 0-shot
335
+ type: accuracy
336
+ value: 24.79
337
+ - name: 1-shot
338
+ type: accuracy
339
+ value: 34.50
340
+ - name: 3-shot
341
+ type: accuracy
342
+ value: 33.89
343
+ - task:
344
+ type: text-generation
345
+ dataset:
346
+ name: LaRoSeDa_binary
347
+ type: LaRoSeDa_binary
348
+ metrics:
349
+ - name: 0-shot
350
+ type: macro-f1
351
+ value: 97.60
352
+ - name: 1-shot
353
+ type: macro-f1
354
+ value: 97.23
355
+ - name: 3-shot
356
+ type: macro-f1
357
+ value: 98.13
358
+ - name: 5-shot
359
+ type: macro-f1
360
+ value: 98.50
361
+ - task:
362
+ type: text-generation
363
+ dataset:
364
+ name: LaRoSeDa_multiclass
365
+ type: LaRoSeDa_multiclass
366
+ metrics:
367
+ - name: 0-shot
368
+ type: macro-f1
369
+ value: 68.53
370
+ - name: 1-shot
371
+ type: macro-f1
372
+ value: 64.84
373
+ - name: 3-shot
374
+ type: macro-f1
375
+ value: 63.62
376
+ - name: 5-shot
377
+ type: macro-f1
378
+ value: 65.83
379
+ - task:
380
+ type: text-generation
381
+ dataset:
382
+ name: WMT_EN-RO
383
+ type: WMT_EN-RO
384
+ metrics:
385
+ - name: 0-shot
386
+ type: bleu
387
+ value: 25.04
388
+ - name: 1-shot
389
+ type: bleu
390
+ value: 28.43
391
+ - name: 3-shot
392
+ type: bleu
393
+ value: 28.87
394
+ - name: 5-shot
395
+ type: bleu
396
+ value: 29.28
397
+ - task:
398
+ type: text-generation
399
+ dataset:
400
+ name: WMT_RO-EN
401
+ type: WMT_RO-EN
402
+ metrics:
403
+ - name: 0-shot
404
+ type: bleu
405
+ value: 4.94
406
+ - name: 1-shot
407
+ type: bleu
408
+ value: 25.33
409
+ - name: 3-shot
410
+ type: bleu
411
+ value: 30.87
412
+ - name: 5-shot
413
+ type: bleu
414
+ value: 31.19
415
+ - task:
416
+ type: text-generation
417
+ dataset:
418
+ name: XQuAD_EM
419
+ type: XQuAD_EM
420
+ metrics:
421
+ - name: 0-shot
422
+ type: exact_match
423
+ value: 36.47
424
+ - name: 1-shot
425
+ type: exact_match
426
+ value: 26.22
427
+ - name: 3-shot
428
+ type: exact_match
429
+ value: 3.19
430
+ - name: 5-shot
431
+ type: exact_match
432
+ value: 5.13
433
+ - task:
434
+ type: text-generation
435
+ dataset:
436
+ name: XQuAD_F1
437
+ type: XQuAD_F1
438
+ metrics:
439
+ - name: 0-shot
440
+ type: f1
441
+ value: 56.83
442
+ - name: 1-shot
443
+ type: f1
444
+ value: 38.53
445
+ - name: 3-shot
446
+ type: f1
447
+ value: 6.88
448
+ - name: 5-shot
449
+ type: f1
450
+ value: 10.19
451
+ - task:
452
+ type: text-generation
453
+ dataset:
454
+ name: STS
455
+ type: STS
456
+ metrics:
457
+ - name: 0-shot
458
+ type: spearman
459
+ value: 70.61
460
+ - name: 1-shot
461
+ type: spearman
462
+ value: 73.53
463
+ - name: 3-shot
464
+ type: spearman
465
+ value: 77.73
466
+ - task:
467
+ type: text-generation
468
+ dataset:
469
+ name: STS
470
+ type: STS
471
+ metrics:
472
+ - name: 0-shot
473
+ type: pearson
474
+ value: 72.28
475
+ - name: 1-shot
476
+ type: pearson
477
+ value: 74.46
478
+ - name: 3-shot
479
+ type: pearson
480
+ value: 78.75
481
+ datasets:
482
+ - OpenLLM-Ro/ro_sft_alpaca
483
+ - OpenLLM-Ro/ro_sft_alpaca_gpt4
484
+ - OpenLLM-Ro/ro_sft_dolly
485
+ - OpenLLM-Ro/ro_sft_selfinstruct_gpt4
486
+ - OpenLLM-Ro/ro_sft_norobots
487
+ - OpenLLM-Ro/ro_sft_orca
488
+ - OpenLLM-Ro/ro_sft_camel
489
  ---
490
 
491
  # Model Card for Model ID
 
572
  <td>gemma-1.1-7b-it</td><td><center>41.44</center></td><td><center>40.32</center></td><td><center>47.22</center></td><td><center>55.01</center></td><td><center>47.03</center></td><td><center>9.50</center></td><td><center>49.58</center></td>
573
  </tr>
574
  <tr>
575
+ <td><em>RoGemma-7b-Instruct</em></td><td><center><em><strong>53.41</strong></em></center></td><td><center><em><strong>52.44</strong></em></center></td><td><center><em><strong>54.44</strong></em></center></td><td><center><em><strong>69.36</strong></em></center></td><td><center><em><strong>61.96</strong></em></center></td><td><center><em><strong>31.06</strong></em></center></td><td><center><em><strong>51.23</strong></em></center></td>
576
  </tr>
577
  </tbody>
578
  </table>
 
605
  <td><center><strong>RO-EN<br>(Bleu)</strong></center>
606
  </tr>
607
  <tr>
608
+ <td>gemma-1.1-7b-it</td><td><center>87.54</center></td><td><center>51.48</center></td><td><center>83.87</center></td><td><center>85.61</center></td><td><center>17.96</center></td><td><center><strong>27.74</strong></center></td><td><center>25.48</center></td><td><center>36.11</center></td>
609
  </tr>
610
  <tr>
611
+ <td><em>RoGemma-7b-Instruct</em></td><td><center><em><strong>97.86</strong></em></center></td><td><center><em><strong>65.70</strong></em></center></td><td><center><em><strong>98.43</strong></em></center></td><td><center><em><strong>87.17</strong></em></center></td><td><center><em><strong>27.91</strong></em></center></td><td><center><em>23.08</em></center></td><td><center><em><strong>27.99</strong></em></center></td><td><center><em><strong>39.51</strong></em></center></td>
612
  </tr>
613
  </tbody>
614
  </table>
615
 
616
 
617
+
618
  <table>
619
  <tbody>
620
  <tr>
 
641
  <td><center><strong>(Pearson)</strong></center></td>
642
  </tr>
643
  <tr>
644
+ <td>gemma-1.1-7b-it</td><td><center><strong>42.10</strong></center></td><td><center><strong>62.30</strong></center></td><td><center><strong>60.34</strong></center></td><td><center><strong>77.40</strong></center></td><td><center>49.10</center></td><td><center>50.23</center></td><td><center>83.43</center></td><td><center>83.64</center></td>
645
  </tr>
646
  <tr>
647
  <td><em>RoGemma-7b-Instruct</em></td><td><center><em>17.75</em></center></td><td><center><em>28.11</em></center></td><td><center><em>52.02</em></center></td><td><center><em>68.43</em></center></td><td><center><em><strong>73.96</strong></em></center></td><td><center><em><strong>75.16</strong></em></center></td><td><center><em><strong>86.45</strong></em></center></td><td><center><em><strong>86.31</strong></em></center></td>