Upload Qwen-Qwen3-8B/metrics.eval.jsonl with huggingface_hub
Browse files
Qwen-Qwen3-8B/metrics.eval.jsonl
CHANGED
|
@@ -6,3 +6,5 @@
|
|
| 6 |
{"created_at": "2025-09-03T09:26:53.000488", "global_step": 60000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.28242320819112626, "acc_stderr,none": 0.013155456884097224, "acc_norm,none": 0.30802047781569963, "acc_norm_stderr,none": 0.01349142951729204}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4718013468013468, "acc_stderr,none": 0.010243454104071792, "acc_norm,none": 0.4802188552188552, "acc_norm_stderr,none": 0.010251751199542738}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3942441744672376, "acc_stderr,none": 0.004876889983110828, "acc_norm,none": 0.5104560844453296, "acc_norm_stderr,none": 0.004988690229505669}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.6996735582154516, "acc_stderr,none": 0.010695225308183134, "acc_norm,none": 0.6953210010881393, "acc_norm_stderr,none": 0.010738889044325161}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5052208835341365, "acc_stderr,none": 0.01002152649653032}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4751004016064257, "acc_stderr,none": 0.01000963798302251}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3497991967871486, "acc_stderr,none": 0.00955918147477829}}
|
| 7 |
{"created_at": "2025-09-04T06:27:38.521394", "global_step": 70000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2986348122866894, "acc_stderr,none": 0.013374078615068742, "acc_norm,none": 0.3191126279863481, "acc_norm_stderr,none": 0.013621696119173311}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4621212121212121, "acc_stderr,none": 0.010230299628864806, "acc_norm,none": 0.4936868686868687, "acc_norm_stderr,none": 0.010258965668044436}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4063931487751444, "acc_stderr,none": 0.004901558132335526, "acc_norm,none": 0.527185819557857, "acc_norm_stderr,none": 0.004982400368939663}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.7067464635473341, "acc_stderr,none": 0.010621818421101924, "acc_norm,none": 0.7132752992383025, "acc_norm_stderr,none": 0.01055131450310807}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5345381526104418, "acc_stderr,none": 0.00999813393626117}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4682730923694779, "acc_stderr,none": 0.010001876146466703}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3582329317269076, "acc_stderr,none": 0.009610788482973918}}
|
| 8 |
{"created_at": "2025-09-04T12:57:53.627191", "global_step": 80000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2986348122866894, "acc_stderr,none": 0.01337407861506874, "acc_norm,none": 0.32593856655290104, "acc_norm_stderr,none": 0.013697432466693252}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4730639730639731, "acc_stderr,none": 0.010244884740620078, "acc_norm,none": 0.49326599326599324, "acc_norm_stderr,none": 0.010258852980991825}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4153555068711412, "acc_stderr,none": 0.004917761181740167, "acc_norm,none": 0.5440151364270066, "acc_norm_stderr,none": 0.004970410081009462}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.719260065288357, "acc_stderr,none": 0.010484325438311827, "acc_norm,none": 0.7143634385201306, "acc_norm_stderr,none": 0.010539303948661927}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5208835341365462, "acc_stderr,none": 0.010013327358568523}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.47630522088353416, "acc_stderr,none": 0.010010812905412067}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3502008032128514, "acc_stderr,none": 0.009561713038161952}}
|
|
|
|
|
|
|
|
|
| 6 |
{"created_at": "2025-09-03T09:26:53.000488", "global_step": 60000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.28242320819112626, "acc_stderr,none": 0.013155456884097224, "acc_norm,none": 0.30802047781569963, "acc_norm_stderr,none": 0.01349142951729204}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4718013468013468, "acc_stderr,none": 0.010243454104071792, "acc_norm,none": 0.4802188552188552, "acc_norm_stderr,none": 0.010251751199542738}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3942441744672376, "acc_stderr,none": 0.004876889983110828, "acc_norm,none": 0.5104560844453296, "acc_norm_stderr,none": 0.004988690229505669}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.6996735582154516, "acc_stderr,none": 0.010695225308183134, "acc_norm,none": 0.6953210010881393, "acc_norm_stderr,none": 0.010738889044325161}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5052208835341365, "acc_stderr,none": 0.01002152649653032}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4751004016064257, "acc_stderr,none": 0.01000963798302251}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3497991967871486, "acc_stderr,none": 0.00955918147477829}}
|
| 7 |
{"created_at": "2025-09-04T06:27:38.521394", "global_step": 70000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2986348122866894, "acc_stderr,none": 0.013374078615068742, "acc_norm,none": 0.3191126279863481, "acc_norm_stderr,none": 0.013621696119173311}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4621212121212121, "acc_stderr,none": 0.010230299628864806, "acc_norm,none": 0.4936868686868687, "acc_norm_stderr,none": 0.010258965668044436}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4063931487751444, "acc_stderr,none": 0.004901558132335526, "acc_norm,none": 0.527185819557857, "acc_norm_stderr,none": 0.004982400368939663}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.7067464635473341, "acc_stderr,none": 0.010621818421101924, "acc_norm,none": 0.7132752992383025, "acc_norm_stderr,none": 0.01055131450310807}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5345381526104418, "acc_stderr,none": 0.00999813393626117}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4682730923694779, "acc_stderr,none": 0.010001876146466703}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3582329317269076, "acc_stderr,none": 0.009610788482973918}}
|
| 8 |
{"created_at": "2025-09-04T12:57:53.627191", "global_step": 80000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2986348122866894, "acc_stderr,none": 0.01337407861506874, "acc_norm,none": 0.32593856655290104, "acc_norm_stderr,none": 0.013697432466693252}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4730639730639731, "acc_stderr,none": 0.010244884740620078, "acc_norm,none": 0.49326599326599324, "acc_norm_stderr,none": 0.010258852980991825}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4153555068711412, "acc_stderr,none": 0.004917761181740167, "acc_norm,none": 0.5440151364270066, "acc_norm_stderr,none": 0.004970410081009462}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.719260065288357, "acc_stderr,none": 0.010484325438311827, "acc_norm,none": 0.7143634385201306, "acc_norm_stderr,none": 0.010539303948661927}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5208835341365462, "acc_stderr,none": 0.010013327358568523}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.47630522088353416, "acc_stderr,none": 0.010010812905412067}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3502008032128514, "acc_stderr,none": 0.009561713038161952}}
|
| 9 |
+
{"created_at": "2025-09-05T18:52:32.487885", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3054607508532423, "acc_stderr,none": 0.013460080478002507, "acc_norm,none": 0.32849829351535836, "acc_norm_stderr,none": 0.013724978465537371}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.48274410774410775, "acc_stderr,none": 0.010253671674754631, "acc_norm,none": 0.5037878787878788, "acc_norm_stderr,none": 0.010259489101351842}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.42093208524198367, "acc_stderr,none": 0.004926996830194231, "acc_norm,none": 0.5533758215494922, "acc_norm_stderr,none": 0.004961268387512967}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.7230685527747551, "acc_stderr,none": 0.01044049996933452, "acc_norm,none": 0.7219804134929271, "acc_norm_stderr,none": 0.010453117358332813}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5465863453815261, "acc_stderr,none": 0.009978476483838969}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4799196787148594, "acc_stderr,none": 0.010013987419234073}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3449799196787149, "acc_stderr,none": 0.009528219800053311}}
|
| 10 |
+
{"created_at": "2025-09-05T19:00:31.160212", "global_step": 100000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3046075085324232, "acc_stderr,none": 0.013449522109932487, "acc_norm,none": 0.3319112627986348, "acc_norm_stderr,none": 0.013760988200880538}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.47895622895622897, "acc_stderr,none": 0.010250692602022583, "acc_norm,none": 0.49957912457912457, "acc_norm_stderr,none": 0.010259779886094427}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.42362079267078273, "acc_stderr,none": 0.004931219148182243, "acc_norm,none": 0.5553674566819359, "acc_norm_stderr,none": 0.0049590941464715136}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.7230685527747551, "acc_stderr,none": 0.01044049996933452, "acc_norm,none": 0.7263329706202394, "acc_norm_stderr,none": 0.010402184206229213}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.536144578313253, "acc_stderr,none": 0.00999585228282235}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4979919678714859, "acc_stderr,none": 0.010021992045038411}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.35542168674698793, "acc_stderr,none": 0.009593947957927137}}
|