Upload CohereLabs-aya-expanse-8b/metrics.eval.jsonl with huggingface_hub
Browse files
CohereLabs-aya-expanse-8b/metrics.eval.jsonl
CHANGED
|
@@ -3,3 +3,6 @@
|
|
| 3 |
{"created_at": "2025-09-13T16:04:03.867167", "global_step": 50000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.31313993174061433, "acc_stderr,none": 0.013552671543623496, "acc_norm,none": 0.33276450511945393, "acc_norm_stderr,none": 0.013769863046192307}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6662457912457912, "acc_stderr,none": 0.009676065683575477, "acc_norm,none": 0.6035353535353535, "acc_norm_stderr,none": 0.010037412763064526}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.01470940541341315, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.01470940541341315}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2633333333333333, "acc_stderr,none": 0.01468955304734253, "acc_norm,none": 0.2633333333333333, "acc_norm_stderr,none": 0.01468955304734253}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2922222222222222, "acc_stderr,none": 0.015167892258807002, "acc_norm,none": 0.2922222222222222, "acc_norm_stderr,none": 0.015167892258807002}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2677777777777778, "acc_stderr,none": 0.014768244481214545, "acc_norm,none": 0.2677777777777778, "acc_norm_stderr,none": 0.014768244481214545}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.29, "acc_stderr,none": 0.015133811749341875, "acc_norm,none": 0.29, "acc_norm_stderr,none": 0.015133811749341875}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3919537940649273, "acc_stderr,none": 0.004871887422893585, "acc_norm,none": 0.5066719776936865, "acc_norm_stderr,none": 0.0049893371485720765}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018429010238306554, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.352112676056338, "acc_stderr,none": 0.05708756925195619}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418898}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921429}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.20689655172413793, "acc_stderr,none": 0.04368097459950702}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018723981167064634, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.03534016139050469}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2645161290322581, "acc_stderr,none": 0.03554285382300394}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851862}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.018847974003617287, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.0355092018568963}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560373}, "piqa": {"alias": "piqa", "acc,none": 0.70620239390642, "acc_stderr,none": 0.010627574080514797, "acc_norm,none": 0.705114254624592, "acc_norm_stderr,none": 0.010639030620157008}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5076305220883535, "acc_stderr,none": 0.01002090573154232}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.44779116465863456, "acc_stderr,none": 0.009967287545636125}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3453815261044177, "acc_stderr,none": 0.009530841175865182}}
|
| 4 |
{"created_at": "2025-09-14T12:23:13.549188", "global_step": 60000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3191126279863481, "acc_stderr,none": 0.013621696119173313, "acc_norm,none": 0.3430034129692833, "acc_norm_stderr,none": 0.013872423223718164}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6788720538720538, "acc_stderr,none": 0.009580787536986797, "acc_norm,none": 0.6094276094276094, "acc_norm_stderr,none": 0.010011059112064236}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.014526354751055183, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.014526354751055183}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24666666666666667, "acc_stderr,none": 0.014377023375409376, "acc_norm,none": 0.24666666666666667, "acc_norm_stderr,none": 0.014377023375409376}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.014198634809308198, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.014198634809308198}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581951, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581951}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581963, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581963}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.407787293367855, "acc_stderr,none": 0.004904189257891272, "acc_norm,none": 0.5253933479386577, "acc_norm_stderr,none": 0.004983342213776255}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018454121026570052, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.056538877391335146}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909659}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_italian": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.01841689517685107, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387369}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25806451612903225, "acc_stderr,none": 0.03526036935484003}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2215568862275449, "acc_stderr,none": 0.03223309610157497}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.018698440415429975, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233136}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.30120481927710846, "acc_stderr,none": 0.035716092300534796}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.06663945022680343}, "piqa": {"alias": "piqa", "acc,none": 0.7100108813928183, "acc_stderr,none": 0.010586899128169328, "acc_norm,none": 0.7100108813928183, "acc_norm_stderr,none": 0.010586899128169328}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.48032128514056227, "acc_stderr,none": 0.010014307727112712}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4566265060240964, "acc_stderr,none": 0.009984293410840315}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3413654618473896, "acc_stderr,none": 0.009504288078880218}}
|
| 5 |
{"created_at": "2025-09-15T07:40:23.920922", "global_step": 70000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.32764505119453924, "acc_stderr,none": 0.01371584794071934, "acc_norm,none": 0.35409556313993173, "acc_norm_stderr,none": 0.013975454122756553}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6944444444444444, "acc_stderr,none": 0.009452181213593468, "acc_norm,none": 0.6064814814814815, "acc_norm_stderr,none": 0.010024426884292562}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581932, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581932}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.28, "acc_stderr,none": 0.014974951276705731, "acc_norm,none": 0.28, "acc_norm_stderr,none": 0.014974951276705731}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2733333333333333, "acc_stderr,none": 0.014863944409417471, "acc_norm,none": 0.2733333333333333, "acc_norm_stderr,none": 0.014863944409417471}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2677777777777778, "acc_stderr,none": 0.014768244481214539, "acc_norm,none": 0.2677777777777778, "acc_norm_stderr,none": 0.014768244481214539}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218164, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218164}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41635132443736306, "acc_stderr,none": 0.0049194578501042295, "acc_norm,none": 0.5419239195379406, "acc_norm_stderr,none": 0.004972210244020565}, "include_base_44_chinese": {"acc,none": 0.27522935779816515, "acc_stderr,none": 0.019036574518446477, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.625, "acc_stderr,none": 0.125}, "include_base_44_italian": {"acc,none": 0.2427007299270073, "acc_stderr,none": 0.018380839396644175, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666293}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25161290322580643, "acc_stderr,none": 0.034967874881680024}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.03367511880168704}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018277324409299855, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.032082844503563655}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.052372293656638154}, "piqa": {"alias": "piqa", "acc,none": 0.7165397170837867, "acc_stderr,none": 0.010515057791152065, "acc_norm,none": 0.7127312295973884, "acc_norm_stderr,none": 0.010557291761528637}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4887550200803213, "acc_stderr,none": 0.010019537972975076}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.45582329317269077, "acc_stderr,none": 0.009982878443738423}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.363855421686747, "acc_stderr,none": 0.009643393577626726}}
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
{"created_at": "2025-09-13T16:04:03.867167", "global_step": 50000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.31313993174061433, "acc_stderr,none": 0.013552671543623496, "acc_norm,none": 0.33276450511945393, "acc_norm_stderr,none": 0.013769863046192307}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6662457912457912, "acc_stderr,none": 0.009676065683575477, "acc_norm,none": 0.6035353535353535, "acc_norm_stderr,none": 0.010037412763064526}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.01470940541341315, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.01470940541341315}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2633333333333333, "acc_stderr,none": 0.01468955304734253, "acc_norm,none": 0.2633333333333333, "acc_norm_stderr,none": 0.01468955304734253}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2922222222222222, "acc_stderr,none": 0.015167892258807002, "acc_norm,none": 0.2922222222222222, "acc_norm_stderr,none": 0.015167892258807002}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2677777777777778, "acc_stderr,none": 0.014768244481214545, "acc_norm,none": 0.2677777777777778, "acc_norm_stderr,none": 0.014768244481214545}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.29, "acc_stderr,none": 0.015133811749341875, "acc_norm,none": 0.29, "acc_norm_stderr,none": 0.015133811749341875}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3919537940649273, "acc_stderr,none": 0.004871887422893585, "acc_norm,none": 0.5066719776936865, "acc_norm_stderr,none": 0.0049893371485720765}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018429010238306554, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.352112676056338, "acc_stderr,none": 0.05708756925195619}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418898}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921429}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.20689655172413793, "acc_stderr,none": 0.04368097459950702}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018723981167064634, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.03534016139050469}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2645161290322581, "acc_stderr,none": 0.03554285382300394}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851862}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.018847974003617287, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.0355092018568963}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560373}, "piqa": {"alias": "piqa", "acc,none": 0.70620239390642, "acc_stderr,none": 0.010627574080514797, "acc_norm,none": 0.705114254624592, "acc_norm_stderr,none": 0.010639030620157008}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5076305220883535, "acc_stderr,none": 0.01002090573154232}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.44779116465863456, "acc_stderr,none": 0.009967287545636125}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3453815261044177, "acc_stderr,none": 0.009530841175865182}}
|
| 4 |
{"created_at": "2025-09-14T12:23:13.549188", "global_step": 60000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3191126279863481, "acc_stderr,none": 0.013621696119173313, "acc_norm,none": 0.3430034129692833, "acc_norm_stderr,none": 0.013872423223718164}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6788720538720538, "acc_stderr,none": 0.009580787536986797, "acc_norm,none": 0.6094276094276094, "acc_norm_stderr,none": 0.010011059112064236}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.014526354751055183, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.014526354751055183}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24666666666666667, "acc_stderr,none": 0.014377023375409376, "acc_norm,none": 0.24666666666666667, "acc_norm_stderr,none": 0.014377023375409376}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.014198634809308198, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.014198634809308198}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581951, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581951}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581963, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581963}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.407787293367855, "acc_stderr,none": 0.004904189257891272, "acc_norm,none": 0.5253933479386577, "acc_norm_stderr,none": 0.004983342213776255}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018454121026570052, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.056538877391335146}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909659}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_italian": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.01841689517685107, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387369}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25806451612903225, "acc_stderr,none": 0.03526036935484003}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2215568862275449, "acc_stderr,none": 0.03223309610157497}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.018698440415429975, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233136}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.30120481927710846, "acc_stderr,none": 0.035716092300534796}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.06663945022680343}, "piqa": {"alias": "piqa", "acc,none": 0.7100108813928183, "acc_stderr,none": 0.010586899128169328, "acc_norm,none": 0.7100108813928183, "acc_norm_stderr,none": 0.010586899128169328}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.48032128514056227, "acc_stderr,none": 0.010014307727112712}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4566265060240964, "acc_stderr,none": 0.009984293410840315}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3413654618473896, "acc_stderr,none": 0.009504288078880218}}
|
| 5 |
{"created_at": "2025-09-15T07:40:23.920922", "global_step": 70000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.32764505119453924, "acc_stderr,none": 0.01371584794071934, "acc_norm,none": 0.35409556313993173, "acc_norm_stderr,none": 0.013975454122756553}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6944444444444444, "acc_stderr,none": 0.009452181213593468, "acc_norm,none": 0.6064814814814815, "acc_norm_stderr,none": 0.010024426884292562}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581932, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581932}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.28, "acc_stderr,none": 0.014974951276705731, "acc_norm,none": 0.28, "acc_norm_stderr,none": 0.014974951276705731}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2733333333333333, "acc_stderr,none": 0.014863944409417471, "acc_norm,none": 0.2733333333333333, "acc_norm_stderr,none": 0.014863944409417471}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2677777777777778, "acc_stderr,none": 0.014768244481214539, "acc_norm,none": 0.2677777777777778, "acc_norm_stderr,none": 0.014768244481214539}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218164, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218164}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41635132443736306, "acc_stderr,none": 0.0049194578501042295, "acc_norm,none": 0.5419239195379406, "acc_norm_stderr,none": 0.004972210244020565}, "include_base_44_chinese": {"acc,none": 0.27522935779816515, "acc_stderr,none": 0.019036574518446477, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.625, "acc_stderr,none": 0.125}, "include_base_44_italian": {"acc,none": 0.2427007299270073, "acc_stderr,none": 0.018380839396644175, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666293}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25161290322580643, "acc_stderr,none": 0.034967874881680024}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.03367511880168704}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018277324409299855, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.032082844503563655}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.052372293656638154}, "piqa": {"alias": "piqa", "acc,none": 0.7165397170837867, "acc_stderr,none": 0.010515057791152065, "acc_norm,none": 0.7127312295973884, "acc_norm_stderr,none": 0.010557291761528637}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4887550200803213, "acc_stderr,none": 0.010019537972975076}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.45582329317269077, "acc_stderr,none": 0.009982878443738423}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.363855421686747, "acc_stderr,none": 0.009643393577626726}}
|
| 6 |
+
{"created_at": "2025-09-16T03:37:31.739199", "global_step": 80000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.33532423208191126, "acc_stderr,none": 0.013796182947785562, "acc_norm,none": 0.3677474402730375, "acc_norm_stderr,none": 0.01409099561816847}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7062289562289562, "acc_stderr,none": 0.009346423298166723, "acc_norm,none": 0.6355218855218855, "acc_norm_stderr,none": 0.009875729282482438}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.014266513886578912, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.014266513886578912}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218176, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218176}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.26555555555555554, "acc_stderr,none": 0.0147291377875851, "acc_norm,none": 0.26555555555555554, "acc_norm_stderr,none": 0.0147291377875851}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.014198634809308172, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.014198634809308172}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198735, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198735}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.42959569806811393, "acc_stderr,none": 0.004940067402031033, "acc_norm,none": 0.5617406891057558, "acc_norm_stderr,none": 0.004951594063272053}, "include_base_44_chinese": {"acc,none": 0.26422018348623855, "acc_stderr,none": 0.018922035489799614, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.046870495038546706}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921429}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278444}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214189}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.4375, "acc_stderr,none": 0.128086884574495}, "include_base_44_italian": {"acc,none": 0.23175182481751824, "acc_stderr,none": 0.018046940763569056, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.19161676646706588, "acc_stderr,none": 0.030547196475366606}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2129032258064516, "acc_stderr,none": 0.032987152383729575}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.03443623453899477}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.23175182481751824, "acc_stderr,none": 0.01806122796157954, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.061011875725893214}, "piqa": {"alias": "piqa", "acc,none": 0.7219804134929271, "acc_stderr,none": 0.01045311735833281, "acc_norm,none": 0.721436343852013, "acc_norm_stderr,none": 0.010459397235965178}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5040160642570282, "acc_stderr,none": 0.0100217495745559}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.457429718875502, "acc_stderr,none": 0.009985682220227462}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.35542168674698793, "acc_stderr,none": 0.009593947957927137}}
|
| 7 |
+
{"created_at": "2025-09-16T23:24:37.609138", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3438566552901024, "acc_stderr,none": 0.01388064457015622, "acc_norm,none": 0.3779863481228669, "acc_norm_stderr,none": 0.014169664520303103}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7053872053872053, "acc_stderr,none": 0.009354224395837095, "acc_norm,none": 0.6430976430976431, "acc_norm_stderr,none": 0.009830630210347016}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755677, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755677}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888451, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888451}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380042, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380042}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198738, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198738}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198738, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198738}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4358693487353117, "acc_stderr,none": 0.004948567856373871, "acc_norm,none": 0.5695080661222864, "acc_norm_stderr,none": 0.0049413312155985505}, "include_base_44_chinese": {"acc,none": 0.26972477064220185, "acc_stderr,none": 0.019089241579820938, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278443}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_italian": {"acc,none": 0.22627737226277372, "acc_stderr,none": 0.017902394382046435, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666292}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.19760479041916168, "acc_stderr,none": 0.03090571916724061}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2, "acc_stderr,none": 0.03223291856101518}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.033675118801687026}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.27007299270072993, "acc_stderr,none": 0.018994500791915527, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553026}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.7187159956474428, "acc_stderr,none": 0.010490509832327424, "acc_norm,none": 0.7306855277475517, "acc_norm_stderr,none": 0.010350004070588762}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5, "acc_stderr,none": 0.010022072867228943}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.47309236947791167, "acc_stderr,none": 0.010007549970702514}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3530120481927711, "acc_stderr,none": 0.009579225840709719}}
|
| 8 |
+
{"created_at": "2025-09-17T19:09:33.467218", "global_step": 100000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.34982935153583616, "acc_stderr,none": 0.013936809212158287, "acc_norm,none": 0.3779863481228669, "acc_norm_stderr,none": 0.014169664520303103}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7070707070707071, "acc_stderr,none": 0.009338583737393597, "acc_norm,none": 0.6477272727272727, "acc_norm_stderr,none": 0.009801753933112771}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24555555555555555, "acc_stderr,none": 0.014355180865342966, "acc_norm,none": 0.24555555555555555, "acc_norm_stderr,none": 0.014355180865342966}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.01415271607913263, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.01415271607913263}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.014484319811433905, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.014484319811433905}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.21222222222222223, "acc_stderr,none": 0.013636956209422683, "acc_norm,none": 0.21222222222222223, "acc_norm_stderr,none": 0.013636956209422683}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687944, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687944}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4386576379207329, "acc_stderr,none": 0.004952087083128894, "acc_norm,none": 0.5737900816570405, "acc_norm_stderr,none": 0.004935143791573811}, "include_base_44_chinese": {"acc,none": 0.28623853211009176, "acc_stderr,none": 0.019328692429481053, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.19540229885057472, "acc_stderr,none": 0.042756781109738705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.323943661971831, "acc_stderr,none": 0.055934166129236414}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.04819560289115226}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.352112676056338, "acc_stderr,none": 0.05708756925195619}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.5, "acc_stderr,none": 0.12909944487358055}, "include_base_44_italian": {"acc,none": 0.23905109489051096, "acc_stderr,none": 0.018283743688816693, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666292}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23225806451612904, "acc_stderr,none": 0.034027706051285155}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.03393708648569707}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018687219332969945, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233136}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233136}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.7181719260065288, "acc_stderr,none": 0.010496675231258171, "acc_norm,none": 0.735038084874864, "acc_norm_stderr,none": 0.010296557993316052}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5044176706827309, "acc_stderr,none": 0.010021681681769338}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4779116465863454, "acc_stderr,none": 0.01001228864559178}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3550200803212851, "acc_stderr,none": 0.00959151273097429}}
|