Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- results/cross_lingual/zero_shot/cross_logiqa.csv +1 -0
- results/cross_lingual/zero_shot/cross_logiqa_no_prompt.csv +3 -0
- results/cross_lingual/zero_shot/cross_mmlu.csv +1 -0
- results/cross_lingual/zero_shot/cross_mmlu_no_prompt.csv +3 -0
- results/cross_lingual/zero_shot/cross_xquad.csv +1 -0
- results/cross_lingual/zero_shot/cross_xquad_no_prompt.csv +3 -0
- results/cultural_reasoning/zero_shot/cn_eval.csv +3 -0
- results/cultural_reasoning/zero_shot/ph_eval.csv +3 -0
- results/cultural_reasoning/zero_shot/sg_eval_v2_mcq_no_prompt.csv +3 -0
- results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +2 -0
- results/cultural_reasoning/zero_shot/us_eval.csv +3 -0
- results/dialogue/zero_shot/dialogsum.csv +2 -0
- results/dialogue/zero_shot/dream.csv +2 -0
- results/dialogue/zero_shot/samsum.csv +3 -0
- results/emotion/zero_shot/ind_emotion.csv +3 -0
- results/emotion/zero_shot/sst2.csv +3 -0
- results/flores_translation/zero_shot/ind2eng.csv +2 -0
- results/flores_translation/zero_shot/vie2eng.csv +2 -0
- results/flores_translation/zero_shot/zho2eng.csv +2 -0
- results/flores_translation/zero_shot/zsm2eng.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/c3.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/cola.csv +3 -0
- results/fundamental_nlp_tasks/zero_shot/mnli.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/mrpc.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/ocnli.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/qnli.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/qqp.csv +2 -0
- results/fundamental_nlp_tasks/zero_shot/rte.csv +3 -0
- results/fundamental_nlp_tasks/zero_shot/wnli.csv +3 -0
- results/general_reasoning/zero_shot/c_eval.csv +2 -0
- results/general_reasoning/zero_shot/cmmlu_no_prompt.csv +7 -0
- results/general_reasoning/zero_shot/indommlu_no_prompt.csv +6 -0
- results/general_reasoning/zero_shot/mmlu_no_prompt.csv +7 -0
- results/general_reasoning/zero_shot/zbench.csv +3 -0
results/cross_lingual/zero_shot/cross_logiqa.csv
CHANGED
@@ -19,5 +19,6 @@ Qwen2_5_14B_Instruct,0.6436688311688312,0.5938311688311688,0.6177464473895627,0.
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.6055194805194805,0.6220779220779219,0.6136870270197391,0.6590909090909091,0.6590909090909091,0.5625,0.6193181818181818,0.5681818181818182,0.5852272727272727,0.5852272727272727
|
20 |
gemma-2-2b-it,0.4780844155844156,0.4448051948051948,0.46084478401384643,0.5568181818181818,0.5,0.5,0.48863636363636365,0.4375,0.4602272727272727,0.4034090909090909
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.48538961038961037,0.4472402597402597,0.46553468284769084,0.5284090909090909,0.5113636363636364,0.5227272727272727,0.5227272727272727,0.48863636363636365,0.44886363636363635,0.375
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.3538961038961039,0.1978896103896103,0.25383898238962527,0.45454545454545453,0.39204545454545453,0.3465909090909091,0.375,0.3409090909090909,0.30113636363636365,0.26704545454545453
|
23 |
GPT4o_0513,0.7159090909090909,0.6941558441558444,0.7048646724637749,0.7613636363636364,0.7670454545454546,0.6988636363636364,0.6988636363636364,0.7045454545454546,0.6761363636363636,0.7045454545454546
|
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.6055194805194805,0.6220779220779219,0.6136870270197391,0.6590909090909091,0.6590909090909091,0.5625,0.6193181818181818,0.5681818181818182,0.5852272727272727,0.5852272727272727
|
20 |
gemma-2-2b-it,0.4780844155844156,0.4448051948051948,0.46084478401384643,0.5568181818181818,0.5,0.5,0.48863636363636365,0.4375,0.4602272727272727,0.4034090909090909
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.48538961038961037,0.4472402597402597,0.46553468284769084,0.5284090909090909,0.5113636363636364,0.5227272727272727,0.5227272727272727,0.48863636363636365,0.44886363636363635,0.375
|
22 |
+
cross_openhermes_llama3_8b_12288_inst,0.48701298701298695,0.46964285714285725,0.4781702261263516,0.5511363636363636,0.5227272727272727,0.4715909090909091,0.4772727272727273,0.4943181818181818,0.5056818181818182,0.38636363636363635
|
23 |
Qwen2_5_0_5B_Instruct,0.3538961038961039,0.1978896103896103,0.25383898238962527,0.45454545454545453,0.39204545454545453,0.3465909090909091,0.375,0.3409090909090909,0.30113636363636365,0.26704545454545453
|
24 |
GPT4o_0513,0.7159090909090909,0.6941558441558444,0.7048646724637749,0.7613636363636364,0.7670454545454546,0.6988636363636364,0.6988636363636364,0.7045454545454546,0.6761363636363636,0.7045454545454546
|
results/cross_lingual/zero_shot/cross_logiqa_no_prompt.csv
CHANGED
@@ -6,7 +6,9 @@ Qwen2_5_7B_Instruct,0.6047077922077922,0.47938311688311697,0.5348014705675028,0.
|
|
6 |
Qwen2_5_1_5B_Instruct,0.44642857142857134,0.27094155844155837,0.33722076741815865,0.48295454545454547,0.5454545454545454,0.44886363636363635,0.4659090909090909,0.4772727272727273,0.36363636363636365,0.3409090909090909
|
7 |
Sailor2-8B-Chat,0.5503246753246753,0.5363636363636365,0.5432544747850031,0.6136363636363636,0.625,0.5056818181818182,0.5625,0.5113636363636364,0.5511363636363636,0.48295454545454547
|
8 |
Meta-Llama-3-8B-Instruct,0.5,0.4426948051948052,0.46960564830561785,0.6022727272727273,0.5227272727272727,0.5454545454545454,0.5056818181818182,0.4375,0.48295454545454547,0.4034090909090909
|
|
|
9 |
Qwen2_5_3B_Instruct,0.5097402597402597,0.3623376623376624,0.42358163053231446,0.6363636363636364,0.6136363636363636,0.4659090909090909,0.4602272727272727,0.5056818181818182,0.48863636363636365,0.3977272727272727
|
|
|
10 |
SeaLLMs-v3-7B-Chat,0.5324675324675324,0.41266233766233773,0.46497164802104307,0.5681818181818182,0.5852272727272727,0.5738636363636364,0.5568181818181818,0.4943181818181818,0.5170454545454546,0.4318181818181818
|
11 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.5105519480519481,0.4558441558441559,0.48164954476113636,0.5909090909090909,0.5284090909090909,0.5454545454545454,0.5,0.4943181818181818,0.48863636363636365,0.42613636363636365
|
12 |
gemma-2-9b-it,0.6006493506493508,0.5753246753246755,0.587714328691409,0.6590909090909091,0.6363636363636364,0.5511363636363636,0.6022727272727273,0.5852272727272727,0.6022727272727273,0.5681818181818182
|
@@ -14,4 +16,5 @@ Qwen2_5_14B_Instruct,0.6420454545454545,0.5673701298701299,0.6024023794498856,0.
|
|
14 |
gemma2-9b-cpt-sea-lionv3-instruct,0.5844155844155844,0.605844155844156,0.5949369778657175,0.6363636363636364,0.6420454545454546,0.5625,0.5681818181818182,0.5568181818181818,0.5511363636363636,0.5738636363636364
|
15 |
gemma-2-2b-it,0.48295454545454547,0.46590909090909094,0.4742787152466955,0.5170454545454546,0.5284090909090909,0.5170454545454546,0.4602272727272727,0.45454545454545453,0.48863636363636365,0.4147727272727273
|
16 |
llama3-8b-cpt-sea-lionv2-instruct,0.45373376623376627,0.37159090909090914,0.408574583313631,0.5397727272727273,0.4943181818181818,0.5340909090909091,0.45454545454545453,0.3977272727272727,0.4318181818181818,0.32386363636363635
|
|
|
17 |
Qwen2_5_0_5B_Instruct,0.3319805194805195,0.16087662337662337,0.216727730394231,0.35795454545454547,0.3806818181818182,0.32386363636363635,0.3125,0.3352272727272727,0.3068181818181818,0.3068181818181818
|
|
|
6 |
Qwen2_5_1_5B_Instruct,0.44642857142857134,0.27094155844155837,0.33722076741815865,0.48295454545454547,0.5454545454545454,0.44886363636363635,0.4659090909090909,0.4772727272727273,0.36363636363636365,0.3409090909090909
|
7 |
Sailor2-8B-Chat,0.5503246753246753,0.5363636363636365,0.5432544747850031,0.6136363636363636,0.625,0.5056818181818182,0.5625,0.5113636363636364,0.5511363636363636,0.48295454545454547
|
8 |
Meta-Llama-3-8B-Instruct,0.5,0.4426948051948052,0.46960564830561785,0.6022727272727273,0.5227272727272727,0.5454545454545454,0.5056818181818182,0.4375,0.48295454545454547,0.4034090909090909
|
9 |
+
Meta-Llama-3.1-70B-Instruct,0.6574675324675324,0.5948051948051948,0.6245685866654581,0.6988636363636364,0.7102272727272727,0.6875,0.6590909090909091,0.6477272727272727,0.6534090909090909,0.5454545454545454
|
10 |
Qwen2_5_3B_Instruct,0.5097402597402597,0.3623376623376624,0.42358163053231446,0.6363636363636364,0.6136363636363636,0.4659090909090909,0.4602272727272727,0.5056818181818182,0.48863636363636365,0.3977272727272727
|
11 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.6793831168831169,0.6006493506493507,0.6375948084391003,0.7784090909090909,0.7045454545454546,0.6875,0.6534090909090909,0.6647727272727273,0.6590909090909091,0.6079545454545454
|
12 |
SeaLLMs-v3-7B-Chat,0.5324675324675324,0.41266233766233773,0.46497164802104307,0.5681818181818182,0.5852272727272727,0.5738636363636364,0.5568181818181818,0.4943181818181818,0.5170454545454546,0.4318181818181818
|
13 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.5105519480519481,0.4558441558441559,0.48164954476113636,0.5909090909090909,0.5284090909090909,0.5454545454545454,0.5,0.4943181818181818,0.48863636363636365,0.42613636363636365
|
14 |
gemma-2-9b-it,0.6006493506493508,0.5753246753246755,0.587714328691409,0.6590909090909091,0.6363636363636364,0.5511363636363636,0.6022727272727273,0.5852272727272727,0.6022727272727273,0.5681818181818182
|
|
|
16 |
gemma2-9b-cpt-sea-lionv3-instruct,0.5844155844155844,0.605844155844156,0.5949369778657175,0.6363636363636364,0.6420454545454546,0.5625,0.5681818181818182,0.5568181818181818,0.5511363636363636,0.5738636363636364
|
17 |
gemma-2-2b-it,0.48295454545454547,0.46590909090909094,0.4742787152466955,0.5170454545454546,0.5284090909090909,0.5170454545454546,0.4602272727272727,0.45454545454545453,0.48863636363636365,0.4147727272727273
|
18 |
llama3-8b-cpt-sea-lionv2-instruct,0.45373376623376627,0.37159090909090914,0.408574583313631,0.5397727272727273,0.4943181818181818,0.5340909090909091,0.45454545454545453,0.3977272727272727,0.4318181818181818,0.32386363636363635
|
19 |
+
cross_openhermes_llama3_8b_12288_inst,0.47970779220779214,0.4741883116883117,0.476932083353324,0.5625,0.5227272727272727,0.5,0.4375,0.4659090909090909,0.5,0.3693181818181818
|
20 |
Qwen2_5_0_5B_Instruct,0.3319805194805195,0.16087662337662337,0.216727730394231,0.35795454545454547,0.3806818181818182,0.32386363636363635,0.3125,0.3352272727272727,0.3068181818181818,0.3068181818181818
|
results/cross_lingual/zero_shot/cross_mmlu.csv
CHANGED
@@ -19,5 +19,6 @@ Qwen2_5_14B_Instruct,0.7266666666666666,0.680952380952381,0.7030672078887086,0.7
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7180952380952382,0.7485714285714284,0.7330166975381478,0.78,0.7133333333333334,0.7133333333333334,0.6866666666666666,0.7266666666666667,0.7,0.7066666666666667
|
20 |
gemma-2-2b-it,0.5780952380952381,0.5480000000000002,0.5626454667971265,0.7,0.5866666666666667,0.5866666666666667,0.5333333333333333,0.5666666666666667,0.5333333333333333,0.54
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.6104761904761905,0.5685714285714286,0.5887791368067445,0.72,0.6,0.6133333333333333,0.58,0.6333333333333333,0.5933333333333334,0.5333333333333333
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.4228571428571429,0.2436190476190476,0.3091364879297727,0.6133333333333333,0.5,0.4266666666666667,0.4066666666666667,0.3933333333333333,0.3333333333333333,0.2866666666666667
|
23 |
GPT4o_0513,0.8038095238095239,0.8506666666666668,0.8265745643832277,0.8266666666666667,0.7933333333333333,0.8,0.7666666666666667,0.7933333333333333,0.8266666666666667,0.82
|
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7180952380952382,0.7485714285714284,0.7330166975381478,0.78,0.7133333333333334,0.7133333333333334,0.6866666666666666,0.7266666666666667,0.7,0.7066666666666667
|
20 |
gemma-2-2b-it,0.5780952380952381,0.5480000000000002,0.5626454667971265,0.7,0.5866666666666667,0.5866666666666667,0.5333333333333333,0.5666666666666667,0.5333333333333333,0.54
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.6104761904761905,0.5685714285714286,0.5887791368067445,0.72,0.6,0.6133333333333333,0.58,0.6333333333333333,0.5933333333333334,0.5333333333333333
|
22 |
+
cross_openhermes_llama3_8b_12288_inst,0.5733333333333333,0.5337142857142858,0.5528148657449711,0.6666666666666666,0.6,0.5466666666666666,0.5333333333333333,0.5666666666666667,0.5933333333333334,0.5066666666666667
|
23 |
Qwen2_5_0_5B_Instruct,0.4228571428571429,0.2436190476190476,0.3091364879297727,0.6133333333333333,0.5,0.4266666666666667,0.4066666666666667,0.3933333333333333,0.3333333333333333,0.2866666666666667
|
24 |
GPT4o_0513,0.8038095238095239,0.8506666666666668,0.8265745643832277,0.8266666666666667,0.7933333333333333,0.8,0.7666666666666667,0.7933333333333333,0.8266666666666667,0.82
|
results/cross_lingual/zero_shot/cross_mmlu_no_prompt.csv
CHANGED
@@ -6,7 +6,9 @@ Qwen2_5_7B_Instruct,0.7742857142857141,0.6222857142857142,0.6900140284752591,0.8
|
|
6 |
Qwen2_5_1_5B_Instruct,0.5285714285714286,0.32666666666666666,0.403786191489535,0.7,0.6333333333333333,0.5333333333333333,0.4666666666666667,0.5,0.44,0.4266666666666667
|
7 |
Sailor2-8B-Chat,0.6923809523809524,0.6592380952380954,0.6754031781322388,0.7266666666666667,0.7066666666666667,0.7133333333333334,0.6733333333333333,0.6733333333333333,0.6466666666666666,0.7066666666666667
|
8 |
Meta-Llama-3-8B-Instruct,0.6647619047619048,0.5036190476190476,0.5730780815259353,0.7733333333333333,0.66,0.6666666666666666,0.66,0.6266666666666667,0.64,0.6266666666666667
|
|
|
9 |
Qwen2_5_3B_Instruct,0.6676190476190477,0.45619047619047626,0.5420161420018103,0.8333333333333334,0.74,0.7,0.62,0.68,0.62,0.48
|
|
|
10 |
SeaLLMs-v3-7B-Chat,0.7342857142857142,0.5765714285714287,0.6459409639562039,0.8333333333333334,0.7266666666666667,0.7866666666666666,0.7133333333333334,0.74,0.6866666666666666,0.6533333333333333
|
11 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6980952380952381,0.5891428571428572,0.6390081595918414,0.8466666666666667,0.6933333333333334,0.6933333333333334,0.6933333333333334,0.7133333333333334,0.6133333333333333,0.6333333333333333
|
12 |
gemma-2-9b-it,0.781904761904762,0.747047619047619,0.7640788528690432,0.84,0.7933333333333333,0.7866666666666666,0.7466666666666667,0.78,0.7466666666666667,0.78
|
@@ -14,5 +16,6 @@ Qwen2_5_14B_Instruct,0.8142857142857143,0.7396190476190475,0.7751584771679209,0.
|
|
14 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7809523809523808,0.7506666666666667,0.7655100940510849,0.8466666666666667,0.7866666666666666,0.7733333333333333,0.78,0.7933333333333333,0.7333333333333333,0.7533333333333333
|
15 |
gemma-2-2b-it,0.6514285714285714,0.5255238095238095,0.5817418444469077,0.76,0.6533333333333333,0.7,0.6066666666666667,0.64,0.5866666666666667,0.6133333333333333
|
16 |
llama3-8b-cpt-sea-lionv2-instruct,0.6685714285714285,0.5620952380952383,0.6107272204160255,0.76,0.66,0.6733333333333333,0.6533333333333333,0.6866666666666666,0.64,0.6066666666666667
|
|
|
17 |
Qwen2_5_0_5B_Instruct,0.41904761904761906,0.17276190476190473,0.24465799189698598,0.5666666666666667,0.4666666666666667,0.4066666666666667,0.4066666666666667,0.37333333333333335,0.34,0.37333333333333335
|
18 |
GPT4o_0513,0.8819047619047619,0.8609523809523807,0.8713026281050943,0.9266666666666666,0.8866666666666667,0.9066666666666666,0.7933333333333333,0.88,0.9066666666666666,0.8733333333333333
|
|
|
6 |
Qwen2_5_1_5B_Instruct,0.5285714285714286,0.32666666666666666,0.403786191489535,0.7,0.6333333333333333,0.5333333333333333,0.4666666666666667,0.5,0.44,0.4266666666666667
|
7 |
Sailor2-8B-Chat,0.6923809523809524,0.6592380952380954,0.6754031781322388,0.7266666666666667,0.7066666666666667,0.7133333333333334,0.6733333333333333,0.6733333333333333,0.6466666666666666,0.7066666666666667
|
8 |
Meta-Llama-3-8B-Instruct,0.6647619047619048,0.5036190476190476,0.5730780815259353,0.7733333333333333,0.66,0.6666666666666666,0.66,0.6266666666666667,0.64,0.6266666666666667
|
9 |
+
Meta-Llama-3.1-70B-Instruct,0.8504761904761905,0.8062857142857143,0.8277916105009011,0.9066666666666666,0.8066666666666666,0.8733333333333333,0.84,0.8533333333333334,0.8466666666666667,0.8266666666666667
|
10 |
Qwen2_5_3B_Instruct,0.6676190476190477,0.45619047619047626,0.5420161420018103,0.8333333333333334,0.74,0.7,0.62,0.68,0.62,0.48
|
11 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.8657142857142858,0.8251428571428571,0.84494182392531,0.9266666666666666,0.8266666666666667,0.8866666666666667,0.8666666666666667,0.8466666666666667,0.86,0.8466666666666667
|
12 |
SeaLLMs-v3-7B-Chat,0.7342857142857142,0.5765714285714287,0.6459409639562039,0.8333333333333334,0.7266666666666667,0.7866666666666666,0.7133333333333334,0.74,0.6866666666666666,0.6533333333333333
|
13 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6980952380952381,0.5891428571428572,0.6390081595918414,0.8466666666666667,0.6933333333333334,0.6933333333333334,0.6933333333333334,0.7133333333333334,0.6133333333333333,0.6333333333333333
|
14 |
gemma-2-9b-it,0.781904761904762,0.747047619047619,0.7640788528690432,0.84,0.7933333333333333,0.7866666666666666,0.7466666666666667,0.78,0.7466666666666667,0.78
|
|
|
16 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7809523809523808,0.7506666666666667,0.7655100940510849,0.8466666666666667,0.7866666666666666,0.7733333333333333,0.78,0.7933333333333333,0.7333333333333333,0.7533333333333333
|
17 |
gemma-2-2b-it,0.6514285714285714,0.5255238095238095,0.5817418444469077,0.76,0.6533333333333333,0.7,0.6066666666666667,0.64,0.5866666666666667,0.6133333333333333
|
18 |
llama3-8b-cpt-sea-lionv2-instruct,0.6685714285714285,0.5620952380952383,0.6107272204160255,0.76,0.66,0.6733333333333333,0.6533333333333333,0.6866666666666666,0.64,0.6066666666666667
|
19 |
+
cross_openhermes_llama3_8b_12288_inst,0.6066666666666667,0.4874285714285715,0.54055013922636,0.7266666666666667,0.6,0.6,0.5866666666666667,0.58,0.5733333333333334,0.58
|
20 |
Qwen2_5_0_5B_Instruct,0.41904761904761906,0.17276190476190473,0.24465799189698598,0.5666666666666667,0.4666666666666667,0.4066666666666667,0.4066666666666667,0.37333333333333335,0.34,0.37333333333333335
|
21 |
GPT4o_0513,0.8819047619047619,0.8609523809523807,0.8713026281050943,0.9266666666666666,0.8866666666666667,0.9066666666666666,0.7933333333333333,0.88,0.9066666666666666,0.8733333333333333
|
results/cross_lingual/zero_shot/cross_xquad.csv
CHANGED
@@ -17,5 +17,6 @@ Qwen2_5_14B_Instruct,0.9581932773109244,0.9474789915966386,0.9528060148705768,0.
|
|
17 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9573529411764706,0.9365546218487395,0.9468395810403457,0.9638655462184874,0.9428571428571428,0.9605042016806723,0.9621848739495799,,,
|
18 |
gemma-2-2b-it,0.917016806722689,0.8665966386554622,0.8910940700869288,0.934453781512605,0.9025210084033614,0.9193277310924369,0.9117647058823529,,,
|
19 |
llama3-8b-cpt-sea-lionv2-instruct,0.9365546218487395,0.9086134453781513,0.9223724784871395,0.9420168067226891,0.926890756302521,0.9436974789915966,0.9336134453781513,,,
|
|
|
20 |
Qwen2_5_0_5B_Instruct,0.6584033613445378,0.48172268907563026,0.5563732844778362,0.692436974789916,0.673109243697479,0.653781512605042,0.6142857142857143,,,
|
21 |
GPT4o_0513,0.9605042016806723,0.951890756302521,0.9561780814209724,0.965546218487395,0.9537815126050421,0.9630252100840336,0.9596638655462185,,,
|
|
|
17 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9573529411764706,0.9365546218487395,0.9468395810403457,0.9638655462184874,0.9428571428571428,0.9605042016806723,0.9621848739495799,,,
|
18 |
gemma-2-2b-it,0.917016806722689,0.8665966386554622,0.8910940700869288,0.934453781512605,0.9025210084033614,0.9193277310924369,0.9117647058823529,,,
|
19 |
llama3-8b-cpt-sea-lionv2-instruct,0.9365546218487395,0.9086134453781513,0.9223724784871395,0.9420168067226891,0.926890756302521,0.9436974789915966,0.9336134453781513,,,
|
20 |
+
cross_openhermes_llama3_8b_12288_inst,0.9273109243697479,0.8850840336134453,0.9057055579353634,0.9394957983193277,0.9252100840336135,0.9218487394957983,0.9226890756302522,,,
|
21 |
Qwen2_5_0_5B_Instruct,0.6584033613445378,0.48172268907563026,0.5563732844778362,0.692436974789916,0.673109243697479,0.653781512605042,0.6142857142857143,,,
|
22 |
GPT4o_0513,0.9605042016806723,0.951890756302521,0.9561780814209724,0.965546218487395,0.9537815126050421,0.9630252100840336,0.9596638655462185,,,
|
results/cross_lingual/zero_shot/cross_xquad_no_prompt.csv
CHANGED
@@ -6,7 +6,9 @@ Qwen2_5_7B_Instruct,0.9069327731092437,0.8264705882352941,0.8648342089942876,0.9
|
|
6 |
Qwen2_5_1_5B_Instruct,0.8439075630252101,0.6844537815126049,0.7558627739261137,0.8890756302521008,0.8369747899159664,0.8588235294117647,0.7907563025210084,,,
|
7 |
Sailor2-8B-Chat,0.9086134453781513,0.8378151260504201,0.8717792421413649,0.9252100840336135,0.8949579831932774,0.9117647058823529,0.9025210084033614,,,
|
8 |
Meta-Llama-3-8B-Instruct,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9310924369747899,0.8848739495798319,0.9277310924369748,0.9033613445378151,,,
|
|
|
9 |
Qwen2_5_3B_Instruct,0.8859243697478991,0.7831932773109245,0.8313973694706849,0.9058823529411765,0.8739495798319328,0.9008403361344538,0.8630252100840337,,,
|
|
|
10 |
SeaLLMs-v3-7B-Chat,0.8943277310924369,0.7991596638655463,0.8440696412045011,0.9210084033613445,0.8773109243697479,0.9,0.8789915966386554,,,
|
11 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9302521008403362,0.8899159663865546,0.9210084033613445,0.9058823529411765,,,
|
12 |
gemma-2-9b-it,0.8668067226890757,0.7012605042016806,0.7752949732453414,0.8773109243697479,0.8529411764705882,0.8714285714285714,0.865546218487395,,,
|
@@ -14,5 +16,6 @@ Qwen2_5_14B_Instruct,0.9084033613445378,0.8453781512605042,0.8757582956018183,0.
|
|
14 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9315126050420168,0.8716386554621849,0.9005815677746684,0.9453781512605042,0.9142857142857143,0.9369747899159664,0.9294117647058824,,,
|
15 |
gemma-2-2b-it,0.8873949579831932,0.7871848739495798,0.8342915336505994,0.9184873949579831,0.8638655462184874,0.9016806722689076,0.865546218487395,,,
|
16 |
llama3-8b-cpt-sea-lionv2-instruct,0.9296218487394958,0.8630252100840337,0.8950865005059928,0.9445378151260504,0.9058823529411765,0.9411764705882353,0.926890756302521,,,
|
|
|
17 |
Qwen2_5_0_5B_Instruct,0.7186974789915966,0.4804621848739496,0.5759149034045948,0.7815126050420168,0.7142857142857143,0.7478991596638656,0.6310924369747899,,,
|
18 |
GPT4o_0513,0.8941176470588236,0.8014705882352942,0.8452629967360276,0.9302521008403362,0.8857142857142857,0.9168067226890756,0.8436974789915966,,,
|
|
|
6 |
Qwen2_5_1_5B_Instruct,0.8439075630252101,0.6844537815126049,0.7558627739261137,0.8890756302521008,0.8369747899159664,0.8588235294117647,0.7907563025210084,,,
|
7 |
Sailor2-8B-Chat,0.9086134453781513,0.8378151260504201,0.8717792421413649,0.9252100840336135,0.8949579831932774,0.9117647058823529,0.9025210084033614,,,
|
8 |
Meta-Llama-3-8B-Instruct,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9310924369747899,0.8848739495798319,0.9277310924369748,0.9033613445378151,,,
|
9 |
+
Meta-Llama-3.1-70B-Instruct,0.9394957983193277,0.8817226890756302,0.9096928977574603,0.9537815126050421,0.9310924369747899,0.946218487394958,0.926890756302521,,,
|
10 |
Qwen2_5_3B_Instruct,0.8859243697478991,0.7831932773109245,0.8313973694706849,0.9058823529411765,0.8739495798319328,0.9008403361344538,0.8630252100840337,,,
|
11 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.9365546218487395,0.8758403361344538,0.9051805304907758,0.9495798319327731,0.9277310924369748,0.9403361344537815,0.9285714285714286,,,
|
12 |
SeaLLMs-v3-7B-Chat,0.8943277310924369,0.7991596638655463,0.8440696412045011,0.9210084033613445,0.8773109243697479,0.9,0.8789915966386554,,,
|
13 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9302521008403362,0.8899159663865546,0.9210084033613445,0.9058823529411765,,,
|
14 |
gemma-2-9b-it,0.8668067226890757,0.7012605042016806,0.7752949732453414,0.8773109243697479,0.8529411764705882,0.8714285714285714,0.865546218487395,,,
|
|
|
16 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9315126050420168,0.8716386554621849,0.9005815677746684,0.9453781512605042,0.9142857142857143,0.9369747899159664,0.9294117647058824,,,
|
17 |
gemma-2-2b-it,0.8873949579831932,0.7871848739495798,0.8342915336505994,0.9184873949579831,0.8638655462184874,0.9016806722689076,0.865546218487395,,,
|
18 |
llama3-8b-cpt-sea-lionv2-instruct,0.9296218487394958,0.8630252100840337,0.8950865005059928,0.9445378151260504,0.9058823529411765,0.9411764705882353,0.926890756302521,,,
|
19 |
+
cross_openhermes_llama3_8b_12288_inst,0.9054621848739496,0.8298319327731092,0.8659989418997561,0.9285714285714286,0.892436974789916,0.9134453781512605,0.8873949579831932,,,
|
20 |
Qwen2_5_0_5B_Instruct,0.7186974789915966,0.4804621848739496,0.5759149034045948,0.7815126050420168,0.7142857142857143,0.7478991596638656,0.6310924369747899,,,
|
21 |
GPT4o_0513,0.8941176470588236,0.8014705882352942,0.8452629967360276,0.9302521008403362,0.8857142857142857,0.9168067226890756,0.8436974789915966,,,
|
results/cultural_reasoning/zero_shot/cn_eval.csv
CHANGED
@@ -10,14 +10,17 @@ Sailor2-8B-Chat,0.7142857142857143
|
|
10 |
Meta-Llama-3-8B-Instruct,0.4666666666666667
|
11 |
Meta-Llama-3.1-70B-Instruct,0.5428571428571428
|
12 |
Qwen2_5_3B_Instruct,0.7142857142857143
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.819047619047619
|
14 |
Qwen2_5_72B_Instruct,0.8761904761904762
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.5142857142857142
|
16 |
gemma-2-9b-it,0.580952380952381
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.5333333333333333
|
18 |
Qwen2_5_14B_Instruct,0.8285714285714286
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.5904761904761905
|
20 |
gemma-2-2b-it,0.3619047619047619
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.49523809523809526
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.3619047619047619
|
23 |
GPT4o_0513,0.8095238095238095
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.4666666666666667
|
11 |
Meta-Llama-3.1-70B-Instruct,0.5428571428571428
|
12 |
Qwen2_5_3B_Instruct,0.7142857142857143
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.5523809523809524
|
14 |
SeaLLMs-v3-7B-Chat,0.819047619047619
|
15 |
Qwen2_5_72B_Instruct,0.8761904761904762
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.5142857142857142
|
17 |
gemma-2-9b-it,0.580952380952381
|
18 |
+
llama3.1-8b-cpt-sea-lionv3-instruct,0.5142857142857142
|
19 |
Meta-Llama-3-70B-Instruct,0.5333333333333333
|
20 |
Qwen2_5_14B_Instruct,0.8285714285714286
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.5904761904761905
|
22 |
gemma-2-2b-it,0.3619047619047619
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.49523809523809526
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.5523809523809524
|
25 |
Qwen2_5_0_5B_Instruct,0.3619047619047619
|
26 |
GPT4o_0513,0.8095238095238095
|
results/cultural_reasoning/zero_shot/ph_eval.csv
CHANGED
@@ -10,14 +10,17 @@ Sailor2-8B-Chat,0.53
|
|
10 |
Meta-Llama-3-8B-Instruct,0.58
|
11 |
Meta-Llama-3.1-70B-Instruct,0.68
|
12 |
Qwen2_5_3B_Instruct,0.4
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.47
|
14 |
Qwen2_5_72B_Instruct,0.72
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.54
|
16 |
gemma-2-9b-it,0.58
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.63
|
18 |
Qwen2_5_14B_Instruct,0.6
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.59
|
20 |
gemma-2-2b-it,0.4
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.56
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.27
|
23 |
GPT4o_0513,0.77
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.58
|
11 |
Meta-Llama-3.1-70B-Instruct,0.68
|
12 |
Qwen2_5_3B_Instruct,0.4
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.69
|
14 |
SeaLLMs-v3-7B-Chat,0.47
|
15 |
Qwen2_5_72B_Instruct,0.72
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.54
|
17 |
gemma-2-9b-it,0.58
|
18 |
+
llama3.1-8b-cpt-sea-lionv3-instruct,0.6
|
19 |
Meta-Llama-3-70B-Instruct,0.63
|
20 |
Qwen2_5_14B_Instruct,0.6
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.59
|
22 |
gemma-2-2b-it,0.4
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.56
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.52
|
25 |
Qwen2_5_0_5B_Instruct,0.27
|
26 |
GPT4o_0513,0.77
|
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq_no_prompt.csv
CHANGED
@@ -6,7 +6,9 @@ Qwen2_5_7B_Instruct,0.7654545454545455
|
|
6 |
Qwen2_5_1_5B_Instruct,0.6927272727272727
|
7 |
Sailor2-8B-Chat,0.7145454545454546
|
8 |
Meta-Llama-3-8B-Instruct,0.8290909090909091
|
|
|
9 |
Qwen2_5_3B_Instruct,0.7072727272727273
|
|
|
10 |
SeaLLMs-v3-7B-Chat,0.7581818181818182
|
11 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.7854545454545454
|
12 |
gemma-2-9b-it,0.7618181818181818
|
@@ -14,5 +16,6 @@ Qwen2_5_14B_Instruct,0.8236363636363636
|
|
14 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7818181818181819
|
15 |
gemma-2-2b-it,0.6927272727272727
|
16 |
llama3-8b-cpt-sea-lionv2-instruct,0.7981818181818182
|
|
|
17 |
Qwen2_5_0_5B_Instruct,0.5490909090909091
|
18 |
GPT4o_0513,0.9072727272727272
|
|
|
6 |
Qwen2_5_1_5B_Instruct,0.6927272727272727
|
7 |
Sailor2-8B-Chat,0.7145454545454546
|
8 |
Meta-Llama-3-8B-Instruct,0.8290909090909091
|
9 |
+
Meta-Llama-3.1-70B-Instruct,0.8854545454545455
|
10 |
Qwen2_5_3B_Instruct,0.7072727272727273
|
11 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.8854545454545455
|
12 |
SeaLLMs-v3-7B-Chat,0.7581818181818182
|
13 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.7854545454545454
|
14 |
gemma-2-9b-it,0.7618181818181818
|
|
|
16 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7818181818181819
|
17 |
gemma-2-2b-it,0.6927272727272727
|
18 |
llama3-8b-cpt-sea-lionv2-instruct,0.7981818181818182
|
19 |
+
cross_openhermes_llama3_8b_12288_inst,0.7945454545454546
|
20 |
Qwen2_5_0_5B_Instruct,0.5490909090909091
|
21 |
GPT4o_0513,0.9072727272727272
|
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,54.36
|
|
10 |
Meta-Llama-3-8B-Instruct,51.120000000000005
|
11 |
Meta-Llama-3.1-70B-Instruct,51.31999999999999
|
12 |
Qwen2_5_3B_Instruct,47.24
|
|
|
13 |
SeaLLMs-v3-7B-Chat,55.0
|
14 |
Qwen2_5_72B_Instruct,53.32
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,49.2
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,53.2
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,55.0
|
20 |
gemma-2-2b-it,52.08
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,50.03999999999999
|
|
|
22 |
Qwen2_5_0_5B_Instruct,35.28
|
23 |
GPT4o_0513,57.28
|
|
|
10 |
Meta-Llama-3-8B-Instruct,51.120000000000005
|
11 |
Meta-Llama-3.1-70B-Instruct,51.31999999999999
|
12 |
Qwen2_5_3B_Instruct,47.24
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,51.92
|
14 |
SeaLLMs-v3-7B-Chat,55.0
|
15 |
Qwen2_5_72B_Instruct,53.32
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,49.2
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,55.0
|
21 |
gemma-2-2b-it,52.08
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,50.03999999999999
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,52.480000000000004
|
24 |
Qwen2_5_0_5B_Instruct,35.28
|
25 |
GPT4o_0513,57.28
|
results/cultural_reasoning/zero_shot/us_eval.csv
CHANGED
@@ -10,14 +10,17 @@ Sailor2-8B-Chat,0.7009345794392523
|
|
10 |
Meta-Llama-3-8B-Instruct,0.7009345794392523
|
11 |
Meta-Llama-3.1-70B-Instruct,0.8411214953271028
|
12 |
Qwen2_5_3B_Instruct,0.6728971962616822
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.6915887850467289
|
14 |
Qwen2_5_72B_Instruct,0.8598130841121495
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.7383177570093458
|
16 |
gemma-2-9b-it,0.8130841121495327
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.8691588785046729
|
18 |
Qwen2_5_14B_Instruct,0.822429906542056
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.8037383177570093
|
20 |
gemma-2-2b-it,0.6915887850467289
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.7009345794392523
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.37383177570093457
|
23 |
GPT4o_0513,0.8691588785046729
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.7009345794392523
|
11 |
Meta-Llama-3.1-70B-Instruct,0.8411214953271028
|
12 |
Qwen2_5_3B_Instruct,0.6728971962616822
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.8691588785046729
|
14 |
SeaLLMs-v3-7B-Chat,0.6915887850467289
|
15 |
Qwen2_5_72B_Instruct,0.8598130841121495
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.7383177570093458
|
17 |
gemma-2-9b-it,0.8130841121495327
|
18 |
+
llama3.1-8b-cpt-sea-lionv3-instruct,0.7570093457943925
|
19 |
Meta-Llama-3-70B-Instruct,0.8691588785046729
|
20 |
Qwen2_5_14B_Instruct,0.822429906542056
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.8037383177570093
|
22 |
gemma-2-2b-it,0.6915887850467289
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.7009345794392523
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.7663551401869159
|
25 |
Qwen2_5_0_5B_Instruct,0.37383177570093457
|
26 |
GPT4o_0513,0.8691588785046729
|
results/dialogue/zero_shot/dialogsum.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.19777087324327317,0.2970393044008424,0.07701994204737679,0.219
|
|
10 |
Meta-Llama-3-8B-Instruct,0.23978455271183616,0.33971099717559883,0.1203340311564728,0.2593086298034369
|
11 |
Meta-Llama-3.1-70B-Instruct,0.2526239717396146,0.35714386898604744,0.1258832921736473,0.27484475405914904
|
12 |
Qwen2_5_3B_Instruct,0.22107390172674926,0.32206286484028823,0.10065030710901035,0.24050853323094928
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0.27107727769558715
|
14 |
Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.25236243090492,0.3573462392196718,0.125506438977953,0.27423461451713527
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.2343478938479703,0.3386251381162625,0.10742381514017992,0
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.23790909190042164,0.34294544984076464,0.1078722585042388,0.26290956735626153
|
20 |
gemma-2-2b-it,0.2597323674875989,0.36848124762381895,0.12622684440269072,0.2844890104362872
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.25777587511641403,0.35911990072292727,0.13269121463917308,0.2815165099871418
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.19408176276624156,0.28989753303423227,0.07842728643649079,0.21392046882800164
|
23 |
GPT4o_0513,0.2375730297294346,0.3364674648846549,0.11718194476069822,0.25906967954295057
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.23978455271183616,0.33971099717559883,0.1203340311564728,0.2593086298034369
|
11 |
Meta-Llama-3.1-70B-Instruct,0.2526239717396146,0.35714386898604744,0.1258832921736473,0.27484475405914904
|
12 |
Qwen2_5_3B_Instruct,0.22107390172674926,0.32206286484028823,0.10065030710901035,0.24050853323094928
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.24811540013390557,0.3522697826489573,0.1222020386386602,0.2698743791140993
|
14 |
SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0.27107727769558715
|
15 |
Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.25236243090492,0.3573462392196718,0.125506438977953,0.27423461451713527
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.23790909190042164,0.34294544984076464,0.1078722585042388,0.26290956735626153
|
21 |
gemma-2-2b-it,0.2597323674875989,0.36848124762381895,0.12622684440269072,0.2844890104362872
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.25777587511641403,0.35911990072292727,0.13269121463917308,0.2815165099871418
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.27081377092899106,0.3746700335717668,0.1422316280821482,0.2955396511330582
|
24 |
Qwen2_5_0_5B_Instruct,0.19408176276624156,0.28989753303423227,0.07842728643649079,0.21392046882800164
|
25 |
GPT4o_0513,0.2375730297294346,0.3364674648846549,0.11718194476069822,0.25906967954295057
|
results/dialogue/zero_shot/dream.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.9054385105340519
|
|
10 |
Meta-Llama-3-8B-Instruct,0.8946594806467418
|
11 |
Meta-Llama-3.1-70B-Instruct,0.9559039686428221
|
12 |
Qwen2_5_3B_Instruct,0.9029887310142087
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.9265066144047036
|
14 |
Qwen2_5_72B_Instruct,0.9627633512983832
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.9103380695737384
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.9461048505634493
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9407153356197943
|
20 |
gemma-2-2b-it,0.8510534051935326
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.8858402743753062
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.6526212640862322
|
23 |
GPT4o_0513,0.9583537481626654
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.8946594806467418
|
11 |
Meta-Llama-3.1-70B-Instruct,0.9559039686428221
|
12 |
Qwen2_5_3B_Instruct,0.9029887310142087
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.9563939245467908
|
14 |
SeaLLMs-v3-7B-Chat,0.9265066144047036
|
15 |
Qwen2_5_72B_Instruct,0.9627633512983832
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.9103380695737384
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9407153356197943
|
21 |
gemma-2-2b-it,0.8510534051935326
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.8858402743753062
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.8829005389514943
|
24 |
Qwen2_5_0_5B_Instruct,0.6526212640862322
|
25 |
GPT4o_0513,0.9583537481626654
|
results/dialogue/zero_shot/samsum.csv
CHANGED
@@ -10,14 +10,17 @@ Sailor2-8B-Chat,0.23525560304744508,0.34567892481583223,0.10170204161284628,0.25
|
|
10 |
Meta-Llama-3-8B-Instruct,0.2846315092346869,0.39397110152251813,0.154320846916639,0.30560257926490364
|
11 |
Meta-Llama-3.1-70B-Instruct,0.28934874612070227,0.4036295731242805,0.15211190810296196,0.31230475713486433
|
12 |
Qwen2_5_3B_Instruct,0.26935624341081515,0.380865832002109,0.13872106416227833,0.28848183406805816
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.316729373004692
|
14 |
Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.2827552959388026,0.3953429193664384,0.14797005050571224,0.30495291794425716
|
16 |
gemma-2-9b-it,0.3100514077180449,0.4289412957792292,0.16727050182456474,0.3339424255503407
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321
|
18 |
Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.29509358497900623,0.4129497614150914,0.1502573048316353,0.32207368869029196
|
20 |
gemma-2-2b-it,0.31118787136959813,0.4324251755711466,0.16441328335793207,0.33672515517971563
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.306997595680581,0.4214048099551701,0.1709790451938523,0.3286089318927205
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.20766179544894214,0.3105872033328297,0.08726222085933319,0.22513596215466355
|
23 |
GPT4o_0513,0.27736679291505306,0.386750207633093,0.14889081847621596,0.2964593526358502
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.2846315092346869,0.39397110152251813,0.154320846916639,0.30560257926490364
|
11 |
Meta-Llama-3.1-70B-Instruct,0.28934874612070227,0.4036295731242805,0.15211190810296196,0.31230475713486433
|
12 |
Qwen2_5_3B_Instruct,0.26935624341081515,0.380865832002109,0.13872106416227833,0.28848183406805816
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.2787591738477117,0.38934731111245685,0.1449671143713316,0.3019630960593467
|
14 |
SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.316729373004692
|
15 |
Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.2827552959388026,0.3953429193664384,0.14797005050571224,0.30495291794425716
|
17 |
gemma-2-9b-it,0.3100514077180449,0.4289412957792292,0.16727050182456474,0.3339424255503407
|
18 |
+
llama3.1-8b-cpt-sea-lionv3-instruct,0.2807372487518108,0.39766299519223614,0.13960542386322855,0.30494332719996775
|
19 |
Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321
|
20 |
Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.29509358497900623,0.4129497614150914,0.1502573048316353,0.32207368869029196
|
22 |
gemma-2-2b-it,0.31118787136959813,0.4324251755711466,0.16441328335793207,0.33672515517971563
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.306997595680581,0.4214048099551701,0.1709790451938523,0.3286089318927205
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.30043920936284546,0.41309659421156786,0.16636483587009585,0.3218561980068726
|
25 |
Qwen2_5_0_5B_Instruct,0.20766179544894214,0.3105872033328297,0.08726222085933319,0.22513596215466355
|
26 |
GPT4o_0513,0.27736679291505306,0.386750207633093,0.14889081847621596,0.2964593526358502
|
results/emotion/zero_shot/ind_emotion.csv
CHANGED
@@ -10,14 +10,17 @@ Sailor2-8B-Chat,0.7363636363636363
|
|
10 |
Meta-Llama-3-8B-Instruct,0.6522727272727272
|
11 |
Meta-Llama-3.1-70B-Instruct,0.7159090909090909
|
12 |
Qwen2_5_3B_Instruct,0.5522727272727272
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.6454545454545455
|
14 |
Qwen2_5_72B_Instruct,0.7068181818181818
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.7
|
16 |
gemma-2-9b-it,0.7477272727272727
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.6909090909090909
|
18 |
Qwen2_5_14B_Instruct,0.6954545454545454
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7340909090909091
|
20 |
gemma-2-2b-it,0.6636363636363637
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.6613636363636364
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.37727272727272726
|
23 |
GPT4o_0513,0.7068181818181818
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.6522727272727272
|
11 |
Meta-Llama-3.1-70B-Instruct,0.7159090909090909
|
12 |
Qwen2_5_3B_Instruct,0.5522727272727272
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.7386363636363636
|
14 |
SeaLLMs-v3-7B-Chat,0.6454545454545455
|
15 |
Qwen2_5_72B_Instruct,0.7068181818181818
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.7
|
17 |
gemma-2-9b-it,0.7477272727272727
|
18 |
+
llama3.1-8b-cpt-sea-lionv3-instruct,0.6772727272727272
|
19 |
Meta-Llama-3-70B-Instruct,0.6909090909090909
|
20 |
Qwen2_5_14B_Instruct,0.6954545454545454
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7340909090909091
|
22 |
gemma-2-2b-it,0.6636363636363637
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.6613636363636364
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.7045454545454546
|
25 |
Qwen2_5_0_5B_Instruct,0.37727272727272726
|
26 |
GPT4o_0513,0.7068181818181818
|
results/emotion/zero_shot/sst2.csv
CHANGED
@@ -10,14 +10,17 @@ Sailor2-8B-Chat,0.9461009174311926
|
|
10 |
Meta-Llama-3-8B-Instruct,0.8784403669724771
|
11 |
Meta-Llama-3.1-70B-Instruct,0.9529816513761468
|
12 |
Qwen2_5_3B_Instruct,0.8245412844036697
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.9403669724770642
|
14 |
Qwen2_5_72B_Instruct,0.9334862385321101
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.8841743119266054
|
16 |
gemma-2-9b-it,0.9311926605504587
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.9495412844036697
|
18 |
Qwen2_5_14B_Instruct,0.9311926605504587
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9311926605504587
|
20 |
gemma-2-2b-it,0.9243119266055045
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.9128440366972477
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.7889908256880734
|
23 |
GPT4o_0513,0.9415137614678899
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.8784403669724771
|
11 |
Meta-Llama-3.1-70B-Instruct,0.9529816513761468
|
12 |
Qwen2_5_3B_Instruct,0.8245412844036697
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.9529816513761468
|
14 |
SeaLLMs-v3-7B-Chat,0.9403669724770642
|
15 |
Qwen2_5_72B_Instruct,0.9334862385321101
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.8841743119266054
|
17 |
gemma-2-9b-it,0.9311926605504587
|
18 |
+
llama3.1-8b-cpt-sea-lionv3-instruct,0.9231651376146789
|
19 |
Meta-Llama-3-70B-Instruct,0.9495412844036697
|
20 |
Qwen2_5_14B_Instruct,0.9311926605504587
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9311926605504587
|
22 |
gemma-2-2b-it,0.9243119266055045
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.9128440366972477
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.9288990825688074
|
25 |
Qwen2_5_0_5B_Instruct,0.7889908256880734
|
26 |
GPT4o_0513,0.9415137614678899
|
results/flores_translation/zero_shot/ind2eng.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.30613567466028746
|
|
10 |
Meta-Llama-3-8B-Instruct,0.33079891679041123
|
11 |
Meta-Llama-3.1-70B-Instruct,0.43366494500251235
|
12 |
Qwen2_5_3B_Instruct,0.3316936422167389
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.3594829412574955
|
14 |
Qwen2_5_72B_Instruct,0.4215612766585066
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.38376586000725804
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.3901044620348051
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.3922444585880475
|
20 |
gemma-2-2b-it,0.3482500758113138
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.3916108972514423
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.15776662800152338
|
23 |
GPT4o_0513,0.42589589086974855
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.33079891679041123
|
11 |
Meta-Llama-3.1-70B-Instruct,0.43366494500251235
|
12 |
Qwen2_5_3B_Instruct,0.3316936422167389
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.4178494331839418
|
14 |
SeaLLMs-v3-7B-Chat,0.3594829412574955
|
15 |
Qwen2_5_72B_Instruct,0.4215612766585066
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.38376586000725804
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.3922444585880475
|
21 |
gemma-2-2b-it,0.3482500758113138
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.3916108972514423
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.3900675406718024
|
24 |
Qwen2_5_0_5B_Instruct,0.15776662800152338
|
25 |
GPT4o_0513,0.42589589086974855
|
results/flores_translation/zero_shot/vie2eng.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.2508650753772058
|
|
10 |
Meta-Llama-3-8B-Instruct,0.2647448190950291
|
11 |
Meta-Llama-3.1-70B-Instruct,0.37244508311079816
|
12 |
Qwen2_5_3B_Instruct,0.27312609009801636
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.30981028289420137
|
14 |
Qwen2_5_72B_Instruct,0.35733464866179004
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.30900856944791294
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.32198218156960645
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.3176282835521885
|
20 |
gemma-2-2b-it,0.27518909199172303
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.327781936019637
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.14677375445859656
|
23 |
GPT4o_0513,0.36219303373759176
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.2647448190950291
|
11 |
Meta-Llama-3.1-70B-Instruct,0.37244508311079816
|
12 |
Qwen2_5_3B_Instruct,0.27312609009801636
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.3609253353112318
|
14 |
SeaLLMs-v3-7B-Chat,0.30981028289420137
|
15 |
Qwen2_5_72B_Instruct,0.35733464866179004
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.30900856944791294
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.3176282835521885
|
21 |
gemma-2-2b-it,0.27518909199172303
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.327781936019637
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.29952664743728336
|
24 |
Qwen2_5_0_5B_Instruct,0.14677375445859656
|
25 |
GPT4o_0513,0.36219303373759176
|
results/flores_translation/zero_shot/zho2eng.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.18385611872796095
|
|
10 |
Meta-Llama-3-8B-Instruct,0.199495011482748
|
11 |
Meta-Llama-3.1-70B-Instruct,0.2832594176173152
|
12 |
Qwen2_5_3B_Instruct,0.2245195134637718
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.2516593644617717
|
14 |
Qwen2_5_72B_Instruct,0.2843491241986514
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.24133164017585856
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.2627781200417998
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.25110750921976727
|
20 |
gemma-2-2b-it,0.21164036008441425
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.2381535278220489
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.13846648470535672
|
23 |
GPT4o_0513,0.27722306559544163
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.199495011482748
|
11 |
Meta-Llama-3.1-70B-Instruct,0.2832594176173152
|
12 |
Qwen2_5_3B_Instruct,0.2245195134637718
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.2786437349197164
|
14 |
SeaLLMs-v3-7B-Chat,0.2516593644617717
|
15 |
Qwen2_5_72B_Instruct,0.2843491241986514
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.24133164017585856
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.25110750921976727
|
21 |
gemma-2-2b-it,0.21164036008441425
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.2381535278220489
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.2437964546132799
|
24 |
Qwen2_5_0_5B_Instruct,0.13846648470535672
|
25 |
GPT4o_0513,0.27722306559544163
|
results/flores_translation/zero_shot/zsm2eng.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.3139160319283414
|
|
10 |
Meta-Llama-3-8B-Instruct,0.31625368345049
|
11 |
Meta-Llama-3.1-70B-Instruct,0.4462132282683508
|
12 |
Qwen2_5_3B_Instruct,0.31056841204320457
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.3484133510670942
|
14 |
Qwen2_5_72B_Instruct,0.4237666988692159
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.3729790018011108
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.3841042767934729
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.4059485540822735
|
20 |
gemma-2-2b-it,0.33737270487369614
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.38799258214381604
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.1194369315142997
|
23 |
GPT4o_0513,0.451496635720668
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.31625368345049
|
11 |
Meta-Llama-3.1-70B-Instruct,0.4462132282683508
|
12 |
Qwen2_5_3B_Instruct,0.31056841204320457
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.44351811676542874
|
14 |
SeaLLMs-v3-7B-Chat,0.3484133510670942
|
15 |
Qwen2_5_72B_Instruct,0.4237666988692159
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.3729790018011108
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.4059485540822735
|
21 |
gemma-2-2b-it,0.33737270487369614
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.38799258214381604
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.39589080400186966
|
24 |
Qwen2_5_0_5B_Instruct,0.1194369315142997
|
25 |
GPT4o_0513,0.451496635720668
|
results/fundamental_nlp_tasks/zero_shot/c3.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.8960359012715033
|
|
10 |
Meta-Llama-3-8B-Instruct,0.8515332834704562
|
11 |
Meta-Llama-3.1-70B-Instruct,0.9603590127150337
|
12 |
Qwen2_5_3B_Instruct,0.8668661181750187
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.9143605086013463
|
14 |
Qwen2_5_72B_Instruct,0.9596110695587136
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.8706058339566193
|
@@ -20,5 +21,6 @@ Meta-Llama-3.1-70B,0.7786088257292446
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9255796559461481
|
21 |
gemma-2-2b-it,0.7700074794315632
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.8672400897531788
|
|
|
23 |
Qwen2_5_0_5B_Instruct,0.612939416604338
|
24 |
GPT4o_0513,0.9648466716529543
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.8515332834704562
|
11 |
Meta-Llama-3.1-70B-Instruct,0.9603590127150337
|
12 |
Qwen2_5_3B_Instruct,0.8668661181750187
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.9640987284966342
|
14 |
SeaLLMs-v3-7B-Chat,0.9143605086013463
|
15 |
Qwen2_5_72B_Instruct,0.9596110695587136
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.8706058339566193
|
|
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9255796559461481
|
22 |
gemma-2-2b-it,0.7700074794315632
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.8672400897531788
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.8485415108451758
|
25 |
Qwen2_5_0_5B_Instruct,0.612939416604338
|
26 |
GPT4o_0513,0.9648466716529543
|
results/fundamental_nlp_tasks/zero_shot/cola.csv
CHANGED
@@ -10,14 +10,17 @@ Sailor2-8B-Chat,0.7900287631831256
|
|
10 |
Meta-Llama-3-8B-Instruct,0.6548418024928092
|
11 |
Meta-Llama-3.1-70B-Instruct,0.850431447746884
|
12 |
Qwen2_5_3B_Instruct,0.6644295302013423
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.785234899328859
|
14 |
Qwen2_5_72B_Instruct,0.8571428571428571
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6174496644295302
|
16 |
gemma-2-9b-it,0.7938638542665388
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.835091083413231
|
18 |
Qwen2_5_14B_Instruct,0.8063279002876318
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.825503355704698
|
20 |
gemma-2-2b-it,0.6749760306807286
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.6078619367209971
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.6116970278044104
|
23 |
GPT4o_0513,0.8398849472674976
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.6548418024928092
|
11 |
Meta-Llama-3.1-70B-Instruct,0.850431447746884
|
12 |
Qwen2_5_3B_Instruct,0.6644295302013423
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.8485139022051774
|
14 |
SeaLLMs-v3-7B-Chat,0.785234899328859
|
15 |
Qwen2_5_72B_Instruct,0.8571428571428571
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6174496644295302
|
17 |
gemma-2-9b-it,0.7938638542665388
|
18 |
+
llama3.1-8b-cpt-sea-lionv3-instruct,0.7698945349952061
|
19 |
Meta-Llama-3-70B-Instruct,0.835091083413231
|
20 |
Qwen2_5_14B_Instruct,0.8063279002876318
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.825503355704698
|
22 |
gemma-2-2b-it,0.6749760306807286
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.6078619367209971
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.8207094918504314
|
25 |
Qwen2_5_0_5B_Instruct,0.6116970278044104
|
26 |
GPT4o_0513,0.8398849472674976
|
results/fundamental_nlp_tasks/zero_shot/mnli.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.664
|
|
10 |
Meta-Llama-3-8B-Instruct,0.546
|
11 |
Meta-Llama-3.1-70B-Instruct,0.7015
|
12 |
Qwen2_5_3B_Instruct,0.7465
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.653
|
14 |
Qwen2_5_72B_Instruct,0.8445
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.5375
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.818
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7445
|
20 |
gemma-2-2b-it,0.6185
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.5765
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.5095
|
23 |
GPT4o_0513,0.8335
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.546
|
11 |
Meta-Llama-3.1-70B-Instruct,0.7015
|
12 |
Qwen2_5_3B_Instruct,0.7465
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.736
|
14 |
SeaLLMs-v3-7B-Chat,0.653
|
15 |
Qwen2_5_72B_Instruct,0.8445
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.5375
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7445
|
21 |
gemma-2-2b-it,0.6185
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.5765
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.6485
|
24 |
Qwen2_5_0_5B_Instruct,0.5095
|
25 |
GPT4o_0513,0.8335
|
results/fundamental_nlp_tasks/zero_shot/mrpc.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.7769607843137255
|
|
10 |
Meta-Llama-3-8B-Instruct,0.678921568627451
|
11 |
Meta-Llama-3.1-70B-Instruct,0.7696078431372549
|
12 |
Qwen2_5_3B_Instruct,0.5661764705882353
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.7475490196078431
|
14 |
Qwen2_5_72B_Instruct,0.8014705882352942
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6274509803921569
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.7794117647058824
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7794117647058824
|
20 |
gemma-2-2b-it,0.7083333333333334
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.5833333333333334
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.5759803921568627
|
23 |
GPT4o_0513,0.7377450980392157
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.678921568627451
|
11 |
Meta-Llama-3.1-70B-Instruct,0.7696078431372549
|
12 |
Qwen2_5_3B_Instruct,0.5661764705882353
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.7745098039215687
|
14 |
SeaLLMs-v3-7B-Chat,0.7475490196078431
|
15 |
Qwen2_5_72B_Instruct,0.8014705882352942
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6274509803921569
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7794117647058824
|
21 |
gemma-2-2b-it,0.7083333333333334
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.5833333333333334
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.6985294117647058
|
24 |
Qwen2_5_0_5B_Instruct,0.5759803921568627
|
25 |
GPT4o_0513,0.7377450980392157
|
results/fundamental_nlp_tasks/zero_shot/ocnli.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.5569491525423729
|
|
10 |
Meta-Llama-3-8B-Instruct,0.44033898305084745
|
11 |
Meta-Llama-3.1-70B-Instruct,0.6423728813559322
|
12 |
Qwen2_5_3B_Instruct,0.6145762711864406
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.5698305084745763
|
14 |
Qwen2_5_72B_Instruct,0.7684745762711864
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.4633898305084746
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.7538983050847458
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.6488135593220339
|
20 |
gemma-2-2b-it,0.43322033898305085
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.45559322033898303
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.3847457627118644
|
23 |
GPT4o_0513,0.7308474576271187
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.44033898305084745
|
11 |
Meta-Llama-3.1-70B-Instruct,0.6423728813559322
|
12 |
Qwen2_5_3B_Instruct,0.6145762711864406
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.6955932203389831
|
14 |
SeaLLMs-v3-7B-Chat,0.5698305084745763
|
15 |
Qwen2_5_72B_Instruct,0.7684745762711864
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.4633898305084746
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.6488135593220339
|
21 |
gemma-2-2b-it,0.43322033898305085
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.45559322033898303
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.5925423728813559
|
24 |
Qwen2_5_0_5B_Instruct,0.3847457627118644
|
25 |
GPT4o_0513,0.7308474576271187
|
results/fundamental_nlp_tasks/zero_shot/qnli.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.6822258832143511
|
|
10 |
Meta-Llama-3-8B-Instruct,0.6025993044114956
|
11 |
Meta-Llama-3.1-70B-Instruct,0.9026176093721399
|
12 |
Qwen2_5_3B_Instruct,0.7645982061138569
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.7159070107999268
|
14 |
Qwen2_5_72B_Instruct,0.9082921471718836
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6522057477576423
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.9079260479589969
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9055464030752334
|
20 |
gemma-2-2b-it,0.7792421746293245
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.6101043382756727
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.5464030752333883
|
23 |
GPT4o_0513,0.9304411495515285
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.6025993044114956
|
11 |
Meta-Llama-3.1-70B-Instruct,0.9026176093721399
|
12 |
Qwen2_5_3B_Instruct,0.7645982061138569
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.9200073219842577
|
14 |
SeaLLMs-v3-7B-Chat,0.7159070107999268
|
15 |
Qwen2_5_72B_Instruct,0.9082921471718836
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6522057477576423
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.9055464030752334
|
21 |
gemma-2-2b-it,0.7792421746293245
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.6101043382756727
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.8282994691561413
|
24 |
Qwen2_5_0_5B_Instruct,0.5464030752333883
|
25 |
GPT4o_0513,0.9304411495515285
|
results/fundamental_nlp_tasks/zero_shot/qqp.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.8205
|
|
10 |
Meta-Llama-3-8B-Instruct,0.563
|
11 |
Meta-Llama-3.1-70B-Instruct,0.815
|
12 |
Qwen2_5_3B_Instruct,0.7415
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.7625
|
14 |
Qwen2_5_72B_Instruct,0.8315
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.597
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.8255
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.803
|
20 |
gemma-2-2b-it,0.761
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.6225
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.619
|
23 |
GPT4o_0513,0.8085
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.563
|
11 |
Meta-Llama-3.1-70B-Instruct,0.815
|
12 |
Qwen2_5_3B_Instruct,0.7415
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.8295
|
14 |
SeaLLMs-v3-7B-Chat,0.7625
|
15 |
Qwen2_5_72B_Instruct,0.8315
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.597
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.803
|
21 |
gemma-2-2b-it,0.761
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.6225
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.792
|
24 |
Qwen2_5_0_5B_Instruct,0.619
|
25 |
GPT4o_0513,0.8085
|
results/fundamental_nlp_tasks/zero_shot/rte.csv
CHANGED
@@ -10,14 +10,17 @@ Sailor2-8B-Chat,0.8122743682310469
|
|
10 |
Meta-Llama-3-8B-Instruct,0.6173285198555957
|
11 |
Meta-Llama-3.1-70B-Instruct,0.8483754512635379
|
12 |
Qwen2_5_3B_Instruct,0.779783393501805
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.7870036101083032
|
14 |
Qwen2_5_72B_Instruct,0.9025270758122743
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6606498194945848
|
16 |
gemma-2-9b-it,0.7472924187725631
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.8086642599277978
|
18 |
Qwen2_5_14B_Instruct,0.8664259927797834
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.8483754512635379
|
20 |
gemma-2-2b-it,0.7292418772563177
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.6859205776173285
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.5992779783393501
|
23 |
GPT4o_0513,0.8700361010830325
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.6173285198555957
|
11 |
Meta-Llama-3.1-70B-Instruct,0.8483754512635379
|
12 |
Qwen2_5_3B_Instruct,0.779783393501805
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.8880866425992779
|
14 |
SeaLLMs-v3-7B-Chat,0.7870036101083032
|
15 |
Qwen2_5_72B_Instruct,0.9025270758122743
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6606498194945848
|
17 |
gemma-2-9b-it,0.7472924187725631
|
18 |
+
llama3.1-8b-cpt-sea-lionv3-instruct,0.7833935018050542
|
19 |
Meta-Llama-3-70B-Instruct,0.8086642599277978
|
20 |
Qwen2_5_14B_Instruct,0.8664259927797834
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.8483754512635379
|
22 |
gemma-2-2b-it,0.7292418772563177
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.6859205776173285
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.8050541516245487
|
25 |
Qwen2_5_0_5B_Instruct,0.5992779783393501
|
26 |
GPT4o_0513,0.8700361010830325
|
results/fundamental_nlp_tasks/zero_shot/wnli.csv
CHANGED
@@ -10,14 +10,17 @@ Sailor2-8B-Chat,0.5492957746478874
|
|
10 |
Meta-Llama-3-8B-Instruct,0.4788732394366197
|
11 |
Meta-Llama-3.1-70B-Instruct,0.8450704225352113
|
12 |
Qwen2_5_3B_Instruct,0.647887323943662
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.5915492957746479
|
14 |
Qwen2_5_72B_Instruct,0.8169014084507042
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.5492957746478874
|
16 |
gemma-2-9b-it,0.7746478873239436
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.7887323943661971
|
18 |
Qwen2_5_14B_Instruct,0.8309859154929577
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7605633802816901
|
20 |
gemma-2-2b-it,0.43661971830985913
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.5774647887323944
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.43661971830985913
|
23 |
GPT4o_0513,0.9295774647887324
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.4788732394366197
|
11 |
Meta-Llama-3.1-70B-Instruct,0.8450704225352113
|
12 |
Qwen2_5_3B_Instruct,0.647887323943662
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.8591549295774648
|
14 |
SeaLLMs-v3-7B-Chat,0.5915492957746479
|
15 |
Qwen2_5_72B_Instruct,0.8169014084507042
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.5492957746478874
|
17 |
gemma-2-9b-it,0.7746478873239436
|
18 |
+
llama3.1-8b-cpt-sea-lionv3-instruct,0.7323943661971831
|
19 |
Meta-Llama-3-70B-Instruct,0.7887323943661971
|
20 |
Qwen2_5_14B_Instruct,0.8309859154929577
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7605633802816901
|
22 |
gemma-2-2b-it,0.43661971830985913
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.5774647887323944
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.5211267605633803
|
25 |
Qwen2_5_0_5B_Instruct,0.43661971830985913
|
26 |
GPT4o_0513,0.9295774647887324
|
results/general_reasoning/zero_shot/c_eval.csv
CHANGED
@@ -10,6 +10,7 @@ Sailor2-8B-Chat,0.5946450809464509
|
|
10 |
Meta-Llama-3-8B-Instruct,0.4775840597758406
|
11 |
Meta-Llama-3.1-70B-Instruct,0.6612702366127023
|
12 |
Qwen2_5_3B_Instruct,0.6537982565379825
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.7658779576587795
|
14 |
Qwen2_5_72B_Instruct,0.8325031133250311
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.5205479452054794
|
@@ -19,5 +20,6 @@ Qwen2_5_14B_Instruct,0.7839352428393525
|
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.5722291407222914
|
20 |
gemma-2-2b-it,0.4352428393524284
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.49813200498132004
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.41718555417185554
|
23 |
GPT4o_0513,0.7073474470734745
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.4775840597758406
|
11 |
Meta-Llama-3.1-70B-Instruct,0.6612702366127023
|
12 |
Qwen2_5_3B_Instruct,0.6537982565379825
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.676214196762142
|
14 |
SeaLLMs-v3-7B-Chat,0.7658779576587795
|
15 |
Qwen2_5_72B_Instruct,0.8325031133250311
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.5205479452054794
|
|
|
20 |
gemma2-9b-cpt-sea-lionv3-instruct,0.5722291407222914
|
21 |
gemma-2-2b-it,0.4352428393524284
|
22 |
llama3-8b-cpt-sea-lionv2-instruct,0.49813200498132004
|
23 |
+
cross_openhermes_llama3_8b_12288_inst,0.4863013698630137
|
24 |
Qwen2_5_0_5B_Instruct,0.41718555417185554
|
25 |
GPT4o_0513,0.7073474470734745
|
results/general_reasoning/zero_shot/cmmlu_no_prompt.csv
CHANGED
@@ -6,8 +6,15 @@ Qwen2_5_7B_Instruct,0.7684337765498187
|
|
6 |
Qwen2_5_1_5B_Instruct,0.6070626834743568
|
7 |
Sailor2-8B-Chat,0.652909687446037
|
8 |
Meta-Llama-3-8B-Instruct,0.5214125366948713
|
|
|
|
|
|
|
|
|
9 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.555776204455189
|
10 |
gemma-2-9b-it,0.588154032118805
|
11 |
Qwen2_5_14B_Instruct,0.79951649110689
|
12 |
gemma2-9b-cpt-sea-lionv3-instruct,0.5871179416335693
|
|
|
|
|
|
|
13 |
Qwen2_5_0_5B_Instruct,0.42730098428596097
|
|
|
6 |
Qwen2_5_1_5B_Instruct,0.6070626834743568
|
7 |
Sailor2-8B-Chat,0.652909687446037
|
8 |
Meta-Llama-3-8B-Instruct,0.5214125366948713
|
9 |
+
Meta-Llama-3.1-70B-Instruct,0.7076498014159903
|
10 |
+
Qwen2_5_3B_Instruct,0.6737178380245208
|
11 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.7057503021930582
|
12 |
+
SeaLLMs-v3-7B-Chat,0.7256950440338457
|
13 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.555776204455189
|
14 |
gemma-2-9b-it,0.588154032118805
|
15 |
Qwen2_5_14B_Instruct,0.79951649110689
|
16 |
gemma2-9b-cpt-sea-lionv3-instruct,0.5871179416335693
|
17 |
+
gemma-2-2b-it,0.4499222932136073
|
18 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.5088931099982732
|
19 |
+
cross_openhermes_llama3_8b_12288_inst,0.5006907269901572
|
20 |
Qwen2_5_0_5B_Instruct,0.42730098428596097
|
results/general_reasoning/zero_shot/indommlu_no_prompt.csv
CHANGED
@@ -6,10 +6,16 @@ Qwen2_5_7B_Instruct,0.581814540356499
|
|
6 |
Qwen2_5_1_5B_Instruct,0.41337873022231125
|
7 |
Sailor2-8B-Chat,0.6342212430736365
|
8 |
Meta-Llama-3-8B-Instruct,0.537686093864744
|
|
|
|
|
|
|
9 |
SeaLLMs-v3-7B-Chat,0.5406235396221376
|
10 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.575806128580012
|
11 |
gemma-2-9b-it,0.6210027371653648
|
12 |
Qwen2_5_14B_Instruct,0.6356899659523333
|
13 |
gemma2-9b-cpt-sea-lionv3-instruct,0.6258762267174044
|
|
|
|
|
|
|
14 |
Qwen2_5_0_5B_Instruct,0.3137058548634755
|
15 |
GPT4o_0513,0.7599305694639161
|
|
|
6 |
Qwen2_5_1_5B_Instruct,0.41337873022231125
|
7 |
Sailor2-8B-Chat,0.6342212430736365
|
8 |
Meta-Llama-3-8B-Instruct,0.537686093864744
|
9 |
+
Meta-Llama-3.1-70B-Instruct,0.6863609052673744
|
10 |
+
Qwen2_5_3B_Instruct,0.5119166833566994
|
11 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.6925028373055612
|
12 |
SeaLLMs-v3-7B-Chat,0.5406235396221376
|
13 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.575806128580012
|
14 |
gemma-2-9b-it,0.6210027371653648
|
15 |
Qwen2_5_14B_Instruct,0.6356899659523333
|
16 |
gemma2-9b-cpt-sea-lionv3-instruct,0.6258762267174044
|
17 |
+
gemma-2-2b-it,0.4856799519327058
|
18 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.560451298484545
|
19 |
+
cross_openhermes_llama3_8b_12288_inst,0.5279391147606649
|
20 |
Qwen2_5_0_5B_Instruct,0.3137058548634755
|
21 |
GPT4o_0513,0.7599305694639161
|
results/general_reasoning/zero_shot/mmlu_no_prompt.csv
CHANGED
@@ -3,12 +3,19 @@ Qwen2-7B-Instruct,0.7032534858777262
|
|
3 |
Meta-Llama-3.1-8B-Instruct,0.7056131569538792
|
4 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.6454057919199142
|
5 |
Qwen2_5_7B_Instruct,0.73936360386128
|
|
|
6 |
Sailor2-8B-Chat,0.6406149445834823
|
7 |
Meta-Llama-3-8B-Instruct,0.6735788344654987
|
|
|
|
|
|
|
8 |
SeaLLMs-v3-7B-Chat,0.6913836253128351
|
9 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6988916696460493
|
10 |
gemma-2-9b-it,0.740293171254916
|
11 |
Qwen2_5_14B_Instruct,0.7939220593493028
|
12 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7372899535216303
|
|
|
|
|
|
|
13 |
Qwen2_5_0_5B_Instruct,0.4267429388630676
|
14 |
GPT4o_0513,0.871576689309975
|
|
|
3 |
Meta-Llama-3.1-8B-Instruct,0.7056131569538792
|
4 |
llama3-8b-cpt-sea-lionv2.1-instruct,0.6454057919199142
|
5 |
Qwen2_5_7B_Instruct,0.73936360386128
|
6 |
+
Qwen2_5_1_5B_Instruct,0.5869860564890954
|
7 |
Sailor2-8B-Chat,0.6406149445834823
|
8 |
Meta-Llama-3-8B-Instruct,0.6735788344654987
|
9 |
+
Meta-Llama-3.1-70B-Instruct,0.8463353593135502
|
10 |
+
Qwen2_5_3B_Instruct,0.665856274579907
|
11 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.8539149088308903
|
12 |
SeaLLMs-v3-7B-Chat,0.6913836253128351
|
13 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.6988916696460493
|
14 |
gemma-2-9b-it,0.740293171254916
|
15 |
Qwen2_5_14B_Instruct,0.7939220593493028
|
16 |
gemma2-9b-cpt-sea-lionv3-instruct,0.7372899535216303
|
17 |
+
gemma-2-2b-it,0.6008580622095102
|
18 |
+
llama3-8b-cpt-sea-lionv2-instruct,0.6478369681801931
|
19 |
+
cross_openhermes_llama3_8b_12288_inst,0.6010010725777619
|
20 |
Qwen2_5_0_5B_Instruct,0.4267429388630676
|
21 |
GPT4o_0513,0.871576689309975
|
results/general_reasoning/zero_shot/zbench.csv
CHANGED
@@ -10,14 +10,17 @@ Sailor2-8B-Chat,0.5151515151515151
|
|
10 |
Meta-Llama-3-8B-Instruct,0.3333333333333333
|
11 |
Meta-Llama-3.1-70B-Instruct,0.48484848484848486
|
12 |
Qwen2_5_3B_Instruct,0.5757575757575758
|
|
|
13 |
SeaLLMs-v3-7B-Chat,0.5454545454545454
|
14 |
Qwen2_5_72B_Instruct,0.696969696969697
|
15 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.42424242424242425
|
16 |
gemma-2-9b-it,0.48484848484848486
|
|
|
17 |
Meta-Llama-3-70B-Instruct,0.5151515151515151
|
18 |
Qwen2_5_14B_Instruct,0.6666666666666666
|
19 |
gemma2-9b-cpt-sea-lionv3-instruct,0.42424242424242425
|
20 |
gemma-2-2b-it,0.24242424242424243
|
21 |
llama3-8b-cpt-sea-lionv2-instruct,0.30303030303030304
|
|
|
22 |
Qwen2_5_0_5B_Instruct,0.36363636363636365
|
23 |
GPT4o_0513,0.696969696969697
|
|
|
10 |
Meta-Llama-3-8B-Instruct,0.3333333333333333
|
11 |
Meta-Llama-3.1-70B-Instruct,0.48484848484848486
|
12 |
Qwen2_5_3B_Instruct,0.5757575757575758
|
13 |
+
llama3.1-70b-cpt-sea-lionv3-instruct,0.5757575757575758
|
14 |
SeaLLMs-v3-7B-Chat,0.5454545454545454
|
15 |
Qwen2_5_72B_Instruct,0.696969696969697
|
16 |
meralion-merged-llama3-8b-sg-inst-avg-diff,0.42424242424242425
|
17 |
gemma-2-9b-it,0.48484848484848486
|
18 |
+
llama3.1-8b-cpt-sea-lionv3-instruct,0.3939393939393939
|
19 |
Meta-Llama-3-70B-Instruct,0.5151515151515151
|
20 |
Qwen2_5_14B_Instruct,0.6666666666666666
|
21 |
gemma2-9b-cpt-sea-lionv3-instruct,0.42424242424242425
|
22 |
gemma-2-2b-it,0.24242424242424243
|
23 |
llama3-8b-cpt-sea-lionv2-instruct,0.30303030303030304
|
24 |
+
cross_openhermes_llama3_8b_12288_inst,0.42424242424242425
|
25 |
Qwen2_5_0_5B_Instruct,0.36363636363636365
|
26 |
GPT4o_0513,0.696969696969697
|