craffel HF Staff commited on
Commit
943b025
·
verified ·
1 Parent(s): 2898dc9

Upload tiktoken-gpt-4o/metrics.eval.jsonl with huggingface_hub

Browse files
Files changed (1) hide show
  1. tiktoken-gpt-4o/metrics.eval.jsonl +9 -0
tiktoken-gpt-4o/metrics.eval.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {"created_at": "2025-09-03T05:36:48.892610", "global_step": 20000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2721843003412969, "acc_stderr,none": 0.013006600406423702, "acc_norm,none": 0.295221843003413, "acc_norm_stderr,none": 0.013329750293382321}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.43434343434343436, "acc_stderr,none": 0.010170943451269425, "acc_norm,none": 0.4524410774410774, "acc_norm_stderr,none": 0.010213265860171392}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3542123083051185, "acc_stderr,none": 0.004772964697941339, "acc_norm,none": 0.43975303724357695, "acc_norm_stderr,none": 0.004953426186069838}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.6784548422198041, "acc_stderr,none": 0.010897500107575649, "acc_norm,none": 0.675734494015234, "acc_norm_stderr,none": 0.01092153904134797}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4943775100401606, "acc_stderr,none": 0.010021439203777306}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.42248995983935744, "acc_stderr,none": 0.009900919227857793}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.009485250208516876}}
2
+ {"created_at": "2025-09-03T07:36:31.262546", "global_step": 30000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2832764505119454, "acc_stderr,none": 0.013167478735134576, "acc_norm,none": 0.29692832764505117, "acc_norm_stderr,none": 0.01335202597672522}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.43434343434343436, "acc_stderr,none": 0.010170943451269423, "acc_norm,none": 0.460016835016835, "acc_norm_stderr,none": 0.01022692723349151}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.36516630153355906, "acc_stderr,none": 0.004804927608773129, "acc_norm,none": 0.4629555865365465, "acc_norm_stderr,none": 0.004976067726432575}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.6746463547334058, "acc_stderr,none": 0.010931036623525195, "acc_norm,none": 0.676278563656148, "acc_norm_stderr,none": 0.010916765010708767}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4959839357429719, "acc_stderr,none": 0.010021749574555898}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46626506024096387, "acc_stderr,none": 0.009999235684721611}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293527}}
3
+ {"created_at": "2025-09-03T07:55:23.897881", "global_step": 40000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.27986348122866894, "acc_stderr,none": 0.013119040897725922, "acc_norm,none": 0.31313993174061433, "acc_norm_stderr,none": 0.013552671543623497}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4553872053872054, "acc_stderr,none": 0.010218861787618726, "acc_norm,none": 0.4722222222222222, "acc_norm_stderr,none": 0.010243938285881118}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.37731527584146585, "acc_stderr,none": 0.004837242015191119, "acc_norm,none": 0.4778928500298745, "acc_norm_stderr,none": 0.0049849017528463884}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.7002176278563657, "acc_stderr,none": 0.010689686967138092, "acc_norm,none": 0.7018498367791077, "acc_norm_stderr,none": 0.010672964114008306}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.510441767068273, "acc_stderr,none": 0.010019887205677445}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4566265060240964, "acc_stderr,none": 0.009984293410840318}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}}
4
+ {"created_at": "2025-09-04T06:23:41.559893", "global_step": 50000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2841296928327645, "acc_stderr,none": 0.013179442447653887, "acc_norm,none": 0.32764505119453924, "acc_norm_stderr,none": 0.01371584794071934}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.46380471380471383, "acc_stderr,none": 0.010232865550346745, "acc_norm,none": 0.49242424242424243, "acc_norm_stderr,none": 0.010258605792153316}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3859788886675961, "acc_stderr,none": 0.004858306877874621, "acc_norm,none": 0.49352718581955785, "acc_norm_stderr,none": 0.004989363276955167}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.6980413492927094, "acc_stderr,none": 0.010711732891588352, "acc_norm,none": 0.6974972796517954, "acc_norm_stderr,none": 0.010717199698083891}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5092369477911647, "acc_stderr,none": 0.010020362530631355}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4530120481927711, "acc_stderr,none": 0.009977719904353732}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}}
5
+ {"created_at": "2025-09-04T10:35:01.145637", "global_step": 60000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.295221843003413, "acc_stderr,none": 0.013329750293382316, "acc_norm,none": 0.3319112627986348, "acc_norm_stderr,none": 0.013760988200880534}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4675925925925926, "acc_stderr,none": 0.010238210368801896, "acc_norm,none": 0.49242424242424243, "acc_norm_stderr,none": 0.010258605792153316}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.39255128460466043, "acc_stderr,none": 0.0048732032693663075, "acc_norm,none": 0.5103565026887075, "acc_norm_stderr,none": 0.004988710917169336}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.7007616974972797, "acc_stderr,none": 0.010684130673134581, "acc_norm,none": 0.7083786724700761, "acc_norm_stderr,none": 0.010604441527428784}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5160642570281124, "acc_stderr,none": 0.010016898932355679}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.45823293172690766, "acc_stderr,none": 0.009987044882812574}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.009476976849778588}}
6
+ {"created_at": "2025-09-05T18:52:46.755669", "global_step": 70000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.30204778156996587, "acc_stderr,none": 0.013417519144716417, "acc_norm,none": 0.3370307167235495, "acc_norm_stderr,none": 0.013813476652902272}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4730639730639731, "acc_stderr,none": 0.010244884740620084, "acc_norm,none": 0.49537037037037035, "acc_norm_stderr,none": 0.010259343705889726}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4063931487751444, "acc_stderr,none": 0.004901558132335524, "acc_norm,none": 0.5370444134634534, "acc_norm_stderr,none": 0.00497606772643256}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.7018498367791077, "acc_stderr,none": 0.010672964114008305, "acc_norm,none": 0.7100108813928183, "acc_norm_stderr,none": 0.010586899128169328}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5176706827309236, "acc_stderr,none": 0.010015812066461158}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.45180722891566266, "acc_stderr,none": 0.009975410845717854}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512708}}
7
+ {"created_at": "2025-09-05T19:01:04.616794", "global_step": 80000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.30631399317406144, "acc_stderr,none": 0.01347058441727651, "acc_norm,none": 0.33361774744027306, "acc_norm_stderr,none": 0.013778687054176536}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.47264309764309764, "acc_stderr,none": 0.01024441516439054, "acc_norm,none": 0.49747474747474746, "acc_norm_stderr,none": 0.01025965266878347}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41814379605656243, "acc_stderr,none": 0.004922459820434779, "acc_norm,none": 0.548994224258116, "acc_norm_stderr,none": 0.004965768348628071}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.7121871599564744, "acc_stderr,none": 0.01056325038305919, "acc_norm,none": 0.7285092491838956, "acc_norm_stderr,none": 0.010376251176596138}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5140562248995983, "acc_stderr,none": 0.010018111813088551}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4526104417670683, "acc_stderr,none": 0.009976956772510008}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.342570281124498, "acc_stderr,none": 0.009512333319470372}}
8
+ {"created_at": "2025-09-06T15:30:19.295432", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3191126279863481, "acc_stderr,none": 0.013621696119173306, "acc_norm,none": 0.3506825938566553, "acc_norm_stderr,none": 0.01394463593072609}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4764309764309764, "acc_stderr,none": 0.010248378585554037, "acc_norm,none": 0.5050505050505051, "acc_norm_stderr,none": 0.010259260102565875}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.42411870145389363, "acc_stderr,none": 0.004931984642695339, "acc_norm,none": 0.5617406891057558, "acc_norm_stderr,none": 0.00495159406327205}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.7252448313384113, "acc_stderr,none": 0.010415033676676037, "acc_norm,none": 0.7328618063112078, "acc_norm_stderr,none": 0.010323440492612435}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5148594377510041, "acc_stderr,none": 0.01001764608425538}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4678714859437751, "acc_stderr,none": 0.010001361068173077}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3381526104417671, "acc_stderr,none": 0.009482500057981033}}
9
+ {"created_at": "2025-09-10T05:42:34.303882", "global_step": 100000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3199658703071672, "acc_stderr,none": 0.013631345807016196, "acc_norm,none": 0.3506825938566553, "acc_norm_stderr,none": 0.01394463593072609}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4802188552188552, "acc_stderr,none": 0.010251751199542735, "acc_norm,none": 0.5063131313131313, "acc_norm_stderr,none": 0.010258965668044438}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4270065723959371, "acc_stderr,none": 0.004936323537147924, "acc_norm,none": 0.5653256323441546, "acc_norm_stderr,none": 0.004947010937455358}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.7252448313384113, "acc_stderr,none": 0.010415033676676039, "acc_norm,none": 0.7306855277475517, "acc_norm_stderr,none": 0.01035000407058876}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5244979919678715, "acc_stderr,none": 0.010010036112667896}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4542168674698795, "acc_stderr,none": 0.00997996999168044}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3469879518072289, "acc_stderr,none": 0.009541251561568398}}