craffel HF Staff commited on
Commit
5626798
·
verified ·
1 Parent(s): dc84b37

Upload bigscience-bloom/metrics.eval.jsonl with huggingface_hub

Browse files
bigscience-bloom/metrics.eval.jsonl CHANGED
@@ -43,3 +43,7 @@
43
  {"created_at": "2025-08-26T17:07:42.187545", "global_step": 88000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3378839590443686, "acc_stderr,none": 0.013822047922283514, "acc_norm,none": 0.3677474402730375, "acc_norm_stderr,none": 0.01409099561816847}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6957070707070707, "acc_stderr,none": 0.009441202922359183, "acc_norm,none": 0.6388888888888888, "acc_norm_stderr,none": 0.00985601342581124}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.20777777777777778, "acc_stderr,none": 0.013531414972025832, "acc_norm,none": 0.20777777777777778, "acc_norm_stderr,none": 0.013531414972025832}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.014266513886578923, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.014266513886578923}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945596, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945596}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683041, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683041}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4223262298346943, "acc_stderr,none": 0.0049292048643159725, "acc_norm,none": 0.5621390161322446, "acc_norm_stderr,none": 0.004951097802775946}, "include_base_44_chinese": {"acc,none": 0.24770642201834864, "acc_stderr,none": 0.018504824228214076, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.19540229885057472, "acc_stderr,none": 0.0427567811097387}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3218390804597701, "acc_stderr,none": 0.05037749206122548}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.23175182481751824, "acc_stderr,none": 0.01805240391805261, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2064516129032258, "acc_stderr,none": 0.03261635957463419}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2215568862275449, "acc_stderr,none": 0.03223309610157498}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_turkish": {"acc,none": 0.2664233576642336, "acc_stderr,none": 0.018873901735993865, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.034605799075530276}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511114}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.034843315926805875}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.14, "acc_stderr,none": 0.04956957592256418}, "piqa": {"alias": "piqa", "acc,none": 0.7279651795429815, "acc_stderr,none": 0.01038276378624738, "acc_norm,none": 0.7263329706202394, "acc_norm_stderr,none": 0.010402184206229216}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5160642570281124, "acc_stderr,none": 0.01001689893235568}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4666666666666667, "acc_stderr,none": 0.009999776793187629}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.00947974227395648}}
44
  {"created_at": "2025-08-26T19:03:26.581616", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3455631399317406, "acc_stderr,none": 0.013896938461145683, "acc_norm,none": 0.36945392491467577, "acc_norm_stderr,none": 0.014104578366491897}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.696969696969697, "acc_stderr,none": 0.00943014066927896, "acc_norm,none": 0.6393097643097643, "acc_norm_stderr,none": 0.009853512108416744}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24555555555555555, "acc_stderr,none": 0.014355180865342966, "acc_norm,none": 0.24555555555555555, "acc_norm_stderr,none": 0.014355180865342966}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.20666666666666667, "acc_stderr,none": 0.013504646568022659, "acc_norm,none": 0.20666666666666667, "acc_norm_stderr,none": 0.013504646568022659}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24, "acc_stderr,none": 0.01424401987979264, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.01424401987979264}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2477777777777778, "acc_stderr,none": 0.0143987373773361, "acc_norm,none": 0.2477777777777778, "acc_norm_stderr,none": 0.0143987373773361}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.26555555555555554, "acc_stderr,none": 0.014729137787585096, "acc_norm,none": 0.26555555555555554, "acc_norm_stderr,none": 0.014729137787585096}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4239195379406493, "acc_stderr,none": 0.004931679059919372, "acc_norm,none": 0.5621390161322446, "acc_norm_stderr,none": 0.004951097802775946}, "include_base_44_chinese": {"acc,none": 0.24036697247706423, "acc_stderr,none": 0.018323865179184166, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.0432478576664078}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278443}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348948}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.21350364963503649, "acc_stderr,none": 0.017567906003196875, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.03435182440245766}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.03125321962283342}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387366}, "include_base_44_turkish": {"acc,none": 0.2755474452554745, "acc_stderr,none": 0.01912421056964195, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.03550920185689628}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857155}, "piqa": {"alias": "piqa", "acc,none": 0.7290533188248096, "acc_stderr,none": 0.010369718937426841, "acc_norm,none": 0.7268770402611534, "acc_norm_stderr,none": 0.010395730264453278}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5180722891566265, "acc_stderr,none": 0.010015524156629813}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4598393574297189, "acc_stderr,none": 0.00998969181016967}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.00947974227395648}}
45
  {"created_at": "2025-08-26T20:59:13.286634", "global_step": 92000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3464163822525597, "acc_stderr,none": 0.013905011180063246, "acc_norm,none": 0.36945392491467577, "acc_norm_stderr,none": 0.0141045783664919}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.694023569023569, "acc_stderr,none": 0.009455822036426621, "acc_norm,none": 0.6464646464646465, "acc_norm_stderr,none": 0.0098097289481515}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.21888888888888888, "acc_stderr,none": 0.013790766978256945, "acc_norm,none": 0.21888888888888888, "acc_norm_stderr,none": 0.013790766978256945}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2, "acc_stderr,none": 0.013340746919301446, "acc_norm,none": 0.2, "acc_norm_stderr,none": 0.013340746919301446}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380034, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380034}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642549, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642549}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.42620991834295957, "acc_stderr,none": 0.004935143791573816, "acc_norm,none": 0.5655247958573989, "acc_norm_stderr,none": 0.004946748608271351}, "include_base_44_chinese": {"acc,none": 0.25321100917431194, "acc_stderr,none": 0.018690291369139276, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348947}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278444}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018287259600319905, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700356}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.034190730421806675}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.036332540727054406}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.19760479041916168, "acc_stderr,none": 0.030905719167240605}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_turkish": {"acc,none": 0.2846715328467153, "acc_stderr,none": 0.019259133065062176, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.3433734939759036, "acc_stderr,none": 0.03696584317010601}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345136}, "piqa": {"alias": "piqa", "acc,none": 0.733949945593036, "acc_stderr,none": 0.010310039263352833, "acc_norm,none": 0.7247007616974973, "acc_norm_stderr,none": 0.010421429277369528}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5156626506024097, "acc_stderr,none": 0.010017154458106742}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4650602409638554, "acc_stderr,none": 0.009997573294114558}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.009487992732201522}}
 
 
 
 
 
43
  {"created_at": "2025-08-26T17:07:42.187545", "global_step": 88000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3378839590443686, "acc_stderr,none": 0.013822047922283514, "acc_norm,none": 0.3677474402730375, "acc_norm_stderr,none": 0.01409099561816847}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6957070707070707, "acc_stderr,none": 0.009441202922359183, "acc_norm,none": 0.6388888888888888, "acc_norm_stderr,none": 0.00985601342581124}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.20777777777777778, "acc_stderr,none": 0.013531414972025832, "acc_norm,none": 0.20777777777777778, "acc_norm_stderr,none": 0.013531414972025832}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.014266513886578923, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.014266513886578923}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945596, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945596}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683041, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683041}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4223262298346943, "acc_stderr,none": 0.0049292048643159725, "acc_norm,none": 0.5621390161322446, "acc_norm_stderr,none": 0.004951097802775946}, "include_base_44_chinese": {"acc,none": 0.24770642201834864, "acc_stderr,none": 0.018504824228214076, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.19540229885057472, "acc_stderr,none": 0.0427567811097387}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3218390804597701, "acc_stderr,none": 0.05037749206122548}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.23175182481751824, "acc_stderr,none": 0.01805240391805261, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2064516129032258, "acc_stderr,none": 0.03261635957463419}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2215568862275449, "acc_stderr,none": 0.03223309610157498}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_turkish": {"acc,none": 0.2664233576642336, "acc_stderr,none": 0.018873901735993865, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.034605799075530276}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511114}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.034843315926805875}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.14, "acc_stderr,none": 0.04956957592256418}, "piqa": {"alias": "piqa", "acc,none": 0.7279651795429815, "acc_stderr,none": 0.01038276378624738, "acc_norm,none": 0.7263329706202394, "acc_norm_stderr,none": 0.010402184206229216}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5160642570281124, "acc_stderr,none": 0.01001689893235568}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4666666666666667, "acc_stderr,none": 0.009999776793187629}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.00947974227395648}}
44
  {"created_at": "2025-08-26T19:03:26.581616", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3455631399317406, "acc_stderr,none": 0.013896938461145683, "acc_norm,none": 0.36945392491467577, "acc_norm_stderr,none": 0.014104578366491897}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.696969696969697, "acc_stderr,none": 0.00943014066927896, "acc_norm,none": 0.6393097643097643, "acc_norm_stderr,none": 0.009853512108416744}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24555555555555555, "acc_stderr,none": 0.014355180865342966, "acc_norm,none": 0.24555555555555555, "acc_norm_stderr,none": 0.014355180865342966}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.20666666666666667, "acc_stderr,none": 0.013504646568022659, "acc_norm,none": 0.20666666666666667, "acc_norm_stderr,none": 0.013504646568022659}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24, "acc_stderr,none": 0.01424401987979264, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.01424401987979264}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2477777777777778, "acc_stderr,none": 0.0143987373773361, "acc_norm,none": 0.2477777777777778, "acc_norm_stderr,none": 0.0143987373773361}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.26555555555555554, "acc_stderr,none": 0.014729137787585096, "acc_norm,none": 0.26555555555555554, "acc_norm_stderr,none": 0.014729137787585096}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4239195379406493, "acc_stderr,none": 0.004931679059919372, "acc_norm,none": 0.5621390161322446, "acc_norm_stderr,none": 0.004951097802775946}, "include_base_44_chinese": {"acc,none": 0.24036697247706423, "acc_stderr,none": 0.018323865179184166, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.0432478576664078}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278443}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348948}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.21350364963503649, "acc_stderr,none": 0.017567906003196875, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.03435182440245766}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.03125321962283342}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387366}, "include_base_44_turkish": {"acc,none": 0.2755474452554745, "acc_stderr,none": 0.01912421056964195, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.03550920185689628}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857155}, "piqa": {"alias": "piqa", "acc,none": 0.7290533188248096, "acc_stderr,none": 0.010369718937426841, "acc_norm,none": 0.7268770402611534, "acc_norm_stderr,none": 0.010395730264453278}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5180722891566265, "acc_stderr,none": 0.010015524156629813}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4598393574297189, "acc_stderr,none": 0.00998969181016967}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.00947974227395648}}
45
  {"created_at": "2025-08-26T20:59:13.286634", "global_step": 92000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3464163822525597, "acc_stderr,none": 0.013905011180063246, "acc_norm,none": 0.36945392491467577, "acc_norm_stderr,none": 0.0141045783664919}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.694023569023569, "acc_stderr,none": 0.009455822036426621, "acc_norm,none": 0.6464646464646465, "acc_norm_stderr,none": 0.0098097289481515}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.21888888888888888, "acc_stderr,none": 0.013790766978256945, "acc_norm,none": 0.21888888888888888, "acc_norm_stderr,none": 0.013790766978256945}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2, "acc_stderr,none": 0.013340746919301446, "acc_norm,none": 0.2, "acc_norm_stderr,none": 0.013340746919301446}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380034, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380034}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642549, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642549}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.42620991834295957, "acc_stderr,none": 0.004935143791573816, "acc_norm,none": 0.5655247958573989, "acc_norm_stderr,none": 0.004946748608271351}, "include_base_44_chinese": {"acc,none": 0.25321100917431194, "acc_stderr,none": 0.018690291369139276, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348947}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278444}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018287259600319905, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700356}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.034190730421806675}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.036332540727054406}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.19760479041916168, "acc_stderr,none": 0.030905719167240605}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_turkish": {"acc,none": 0.2846715328467153, "acc_stderr,none": 0.019259133065062176, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.3433734939759036, "acc_stderr,none": 0.03696584317010601}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345136}, "piqa": {"alias": "piqa", "acc,none": 0.733949945593036, "acc_stderr,none": 0.010310039263352833, "acc_norm,none": 0.7247007616974973, "acc_norm_stderr,none": 0.010421429277369528}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5156626506024097, "acc_stderr,none": 0.010017154458106742}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4650602409638554, "acc_stderr,none": 0.009997573294114558}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.009487992732201522}}
46
+ {"created_at": "2025-08-27T10:20:31.220572", "global_step": 94000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.34726962457337884, "acc_stderr,none": 0.013913034529620444, "acc_norm,none": 0.36945392491467577, "acc_norm_stderr,none": 0.0141045783664919}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6914983164983165, "acc_stderr,none": 0.009477472342978122, "acc_norm,none": 0.6376262626262627, "acc_norm_stderr,none": 0.009863468202583776}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.21555555555555556, "acc_stderr,none": 0.01371452783236968, "acc_norm,none": 0.21555555555555556, "acc_norm_stderr,none": 0.01371452783236968}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2011111111111111, "acc_stderr,none": 0.013368459888798206, "acc_norm,none": 0.2011111111111111, "acc_norm_stderr,none": 0.013368459888798206}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683024, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683024}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687966, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687966}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4253136825333599, "acc_stderr,none": 0.004933800927560528, "acc_norm,none": 0.5658235411272655, "acc_norm_stderr,none": 0.004946353590937017}, "include_base_44_chinese": {"acc,none": 0.24954128440366974, "acc_stderr,none": 0.018574999041892748, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.05653887739133513}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921429}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115034}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.22627737226277372, "acc_stderr,none": 0.01793191527140125, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.24550898203592814, "acc_stderr,none": 0.033404631539455894}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23225806451612904, "acc_stderr,none": 0.03402770605128516}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_turkish": {"acc,none": 0.2846715328467153, "acc_stderr,none": 0.019310477487834033, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.034843315926805875}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680589}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.0362933532994786}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345136}, "piqa": {"alias": "piqa", "acc,none": 0.7312295973884657, "acc_stderr,none": 0.010343392940090016, "acc_norm,none": 0.7230685527747551, "acc_norm_stderr,none": 0.010440499969334533}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5108433734939759, "acc_stderr,none": 0.010019715824483477}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4646586345381526, "acc_stderr,none": 0.00999700613856723}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.009487992732201524}}
47
+ {"created_at": "2025-08-27T10:36:23.045889", "global_step": 96000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3455631399317406, "acc_stderr,none": 0.013896938461145683, "acc_norm,none": 0.3660409556313993, "acc_norm_stderr,none": 0.014077223108470142}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6923400673400674, "acc_stderr,none": 0.00947029257583118, "acc_norm,none": 0.6359427609427609, "acc_norm_stderr,none": 0.009873293392779118}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.20444444444444446, "acc_stderr,none": 0.01345064366056428, "acc_norm,none": 0.20444444444444446, "acc_norm_stderr,none": 0.01345064366056428}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.19666666666666666, "acc_stderr,none": 0.013256638897118556, "acc_norm,none": 0.19666666666666666, "acc_norm_stderr,none": 0.013256638897118556}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24222222222222223, "acc_stderr,none": 0.01428887637569982, "acc_norm,none": 0.24222222222222223, "acc_norm_stderr,none": 0.01428887637569982}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.0143332092596324, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.0143332092596324}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276198, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276198}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4260107548297152, "acc_stderr,none": 0.004934846809827196, "acc_norm,none": 0.566122286397132, "acc_norm_stderr,none": 0.004945956744943815}, "include_base_44_chinese": {"acc,none": 0.24770642201834864, "acc_stderr,none": 0.018494471883412737, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.19540229885057472, "acc_stderr,none": 0.0427567811097387}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.043247857666407784}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278444}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_italian": {"acc,none": 0.22445255474452555, "acc_stderr,none": 0.017887918922221645, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25161290322580643, "acc_stderr,none": 0.03496787488168003}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.19760479041916168, "acc_stderr,none": 0.030905719167240605}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387366}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.019413279400520442, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.03550920185689629}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.03647168523683228}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345136}, "piqa": {"alias": "piqa", "acc,none": 0.7301414581066377, "acc_stderr,none": 0.010356595421852214, "acc_norm,none": 0.7252448313384113, "acc_norm_stderr,none": 0.010415033676676044}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5196787148594377, "acc_stderr,none": 0.010014307727112695}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46265060240963857, "acc_stderr,none": 0.00999407262056142}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3405622489959839, "acc_stderr,none": 0.009498886690274443}}
48
+ {"created_at": "2025-08-27T10:43:35.189139", "global_step": 98000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3455631399317406, "acc_stderr,none": 0.013896938461145683, "acc_norm,none": 0.3660409556313993, "acc_norm_stderr,none": 0.014077223108470142}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6957070707070707, "acc_stderr,none": 0.009441202922359183, "acc_norm,none": 0.6397306397306397, "acc_norm_stderr,none": 0.009851002584732382}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.21444444444444444, "acc_stderr,none": 0.013688819788367383, "acc_norm,none": 0.21444444444444444, "acc_norm_stderr,none": 0.013688819788367383}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.19666666666666666, "acc_stderr,none": 0.013256638897118554, "acc_norm,none": 0.19666666666666666, "acc_norm_stderr,none": 0.013256638897118554}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356798, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356798}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.01452635475105518, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.01452635475105518}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632431, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632431}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4260107548297152, "acc_stderr,none": 0.004934846809827196, "acc_norm,none": 0.5670185222067318, "acc_norm_stderr,none": 0.004944755230598396}, "include_base_44_chinese": {"acc,none": 0.25688073394495414, "acc_stderr,none": 0.0186999816033704, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.352112676056338, "acc_stderr,none": 0.05708756925195619}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.043247857666407784}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.23722627737226276, "acc_stderr,none": 0.01821235669063419, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666293}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.23353293413173654, "acc_stderr,none": 0.03283724952964298}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.27741935483870966, "acc_stderr,none": 0.03607872492487903}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.03125321962283342}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387366}, "include_base_44_turkish": {"acc,none": 0.2956204379562044, "acc_stderr,none": 0.019363146856363523, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073529}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.30120481927710846, "acc_stderr,none": 0.0357160923005348}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.3795180722891566, "acc_stderr,none": 0.03777798822748018}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.0548839220351387}, "piqa": {"alias": "piqa", "acc,none": 0.7295973884657236, "acc_stderr,none": 0.010363167031620798, "acc_norm,none": 0.7257889009793254, "acc_norm_stderr,none": 0.01040861866493338}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5144578313253012, "acc_stderr,none": 0.010017882185606017}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4646586345381526, "acc_stderr,none": 0.009997006138567226}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646755}}
49
+ {"created_at": "2025-08-27T10:50:41.218351", "global_step": 100000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3438566552901024, "acc_stderr,none": 0.013880644570156217, "acc_norm,none": 0.36860068259385664, "acc_norm_stderr,none": 0.01409781067804219}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6961279461279462, "acc_stderr,none": 0.009437524848293738, "acc_norm,none": 0.6359427609427609, "acc_norm_stderr,none": 0.00987329339277912}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2088888888888889, "acc_stderr,none": 0.013558029238322257, "acc_norm,none": 0.2088888888888889, "acc_norm_stderr,none": 0.013558029238322257}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.19777777777777777, "acc_stderr,none": 0.013284837443113323, "acc_norm,none": 0.19777777777777777, "acc_norm_stderr,none": 0.013284837443113323}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642533, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642533}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642517, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642517}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683073, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683073}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.42630950009958174, "acc_stderr,none": 0.004935291975579175, "acc_norm,none": 0.5653256323441546, "acc_norm_stderr,none": 0.004947010937455354}, "include_base_44_chinese": {"acc,none": 0.25321100917431194, "acc_stderr,none": 0.01865712903476862, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.05653887739133513}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909664}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152295}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.22627737226277372, "acc_stderr,none": 0.01793054237173905, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.07201440432144052}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2645161290322581, "acc_stderr,none": 0.035542853823003935}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.19760479041916168, "acc_stderr,none": 0.030905719167240605}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387366}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.019402159287449018, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.033844291552331346}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511114}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.0368078369072758}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560375}, "piqa": {"alias": "piqa", "acc,none": 0.7290533188248096, "acc_stderr,none": 0.010369718937426841, "acc_norm,none": 0.7279651795429815, "acc_norm_stderr,none": 0.01038276378624737}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5160642570281124, "acc_stderr,none": 0.01001689893235568}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4578313253012048, "acc_stderr,none": 0.009986366819196476}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.009479742273956478}}