craffel HF Staff commited on
Commit
8e7b61d
·
verified ·
1 Parent(s): 7f7047a

Upload facebook-xglm-564M/metrics.eval.jsonl with huggingface_hub

Browse files
facebook-xglm-564M/metrics.eval.jsonl CHANGED
@@ -43,3 +43,8 @@
43
  {"created_at": "2025-08-26T15:35:59.152998", "global_step": 86000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.35921501706484643, "acc_stderr,none": 0.014020224155839152, "acc_norm,none": 0.37542662116040953, "acc_norm_stderr,none": 0.01415063143511173}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6868686868686869, "acc_stderr,none": 0.009516303879309537, "acc_norm,none": 0.6485690235690236, "acc_norm_stderr,none": 0.009796395582817722}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687938, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687938}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.014484319811433903, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.014484319811433903}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276203, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276203}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2611111111111111, "acc_stderr,none": 0.01464948638526211, "acc_norm,none": 0.2611111111111111, "acc_norm_stderr,none": 0.01464948638526211}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.43686516630153355, "acc_stderr,none": 0.0049498429673314115, "acc_norm,none": 0.5781716789484167, "acc_norm_stderr,none": 0.004928420903026554}, "include_base_44_chinese": {"acc,none": 0.23302752293577983, "acc_stderr,none": 0.018114874181389816, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.04324785766640779}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.047553821882784425}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2988505747126437, "acc_stderr,none": 0.04936090495978011}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909659}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.018851536661927463, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.281437125748503, "acc_stderr,none": 0.03490350467428358}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.036332540727054406}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851861}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.20802919708029197, "acc_stderr,none": 0.01734644868470675, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.031755547866299194}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.19879518072289157, "acc_stderr,none": 0.03106939026078942}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.12, "acc_stderr,none": 0.04642307659791979}, "piqa": {"alias": "piqa", "acc,none": 0.7241566920565833, "acc_stderr,none": 0.010427805502729115, "acc_norm,none": 0.7317736670293797, "acc_norm_stderr,none": 0.010336761992404485}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5325301204819277, "acc_stderr,none": 0.010000839483876004}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4650602409638554, "acc_stderr,none": 0.009997573294114558}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3538152610441767, "acc_stderr,none": 0.009584162770582848}}
44
  {"created_at": "2025-08-26T17:32:36.921875", "global_step": 88000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.35665529010238906, "acc_stderr,none": 0.013998056902620196, "acc_norm,none": 0.3728668941979522, "acc_norm_stderr,none": 0.014131176760131163}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6919191919191919, "acc_stderr,none": 0.009473887075826332, "acc_norm,none": 0.6418350168350169, "acc_norm_stderr,none": 0.009838331651451853}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198737, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198737}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132618, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132618}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.014266513886578938, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.014266513886578938}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792662, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792662}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.01456789134238004, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.01456789134238004}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4382593108942442, "acc_stderr,none": 0.004951594063272055, "acc_norm,none": 0.5795658235411273, "acc_norm_stderr,none": 0.004926198483948709}, "include_base_44_chinese": {"acc,none": 0.24220183486238533, "acc_stderr,none": 0.01839035439902614, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348948}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854672}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909663}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2773722627737226, "acc_stderr,none": 0.01905996802831519, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.37142857142857144, "acc_stderr,none": 0.08286583553358692}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.281437125748503, "acc_stderr,none": 0.034903504674283575}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.0, "acc_stderr,none": 0.0}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3225806451612903, "acc_stderr,none": 0.03766930374975112}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851863}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.20985401459854014, "acc_stderr,none": 0.01743974212745487, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.031755547866299194}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.1927710843373494, "acc_stderr,none": 0.030709824050565274}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "piqa": {"alias": "piqa", "acc,none": 0.7279651795429815, "acc_stderr,none": 0.010382763786247374, "acc_norm,none": 0.7295973884657236, "acc_norm_stderr,none": 0.010363167031620789}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.529718875502008, "acc_stderr,none": 0.010004353982613828}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4610441767068273, "acc_stderr,none": 0.009991608448389063}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3485943775100402, "acc_stderr,none": 0.009551542053301817}}
45
  {"created_at": "2025-08-26T19:29:01.431918", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.35238907849829354, "acc_stderr,none": 0.013960142600598682, "acc_norm,none": 0.3796928327645051, "acc_norm_stderr,none": 0.014182119866974872}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.694023569023569, "acc_stderr,none": 0.009455822036426621, "acc_norm,none": 0.6473063973063973, "acc_norm_stderr,none": 0.009804420599378657}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.014266513886578938, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.014266513886578938}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.0145053998443568, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.0145053998443568}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.0145053998443568, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.0145053998443568}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.014526354751055171, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.014526354751055171}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4400517825134435, "acc_stderr,none": 0.004953787146510921, "acc_norm,none": 0.5819557857000598, "acc_norm_stderr,none": 0.004922294797766664}, "include_base_44_chinese": {"acc,none": 0.22385321100917432, "acc_stderr,none": 0.017871328505760887, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909662}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909662}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.3083941605839416, "acc_stderr,none": 0.019690545218233064, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.4, "acc_stderr,none": 0.0840168050416806}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31736526946107785, "acc_stderr,none": 0.03612599731403395}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3548387096774194, "acc_stderr,none": 0.03855576159143203}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.03393708648569706}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018637237203320108, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.06546536707079771}, "piqa": {"alias": "piqa", "acc,none": 0.7236126224156693, "acc_stderr,none": 0.01043416238827563, "acc_norm,none": 0.7317736670293797, "acc_norm_stderr,none": 0.010336761992404483}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5337349397590362, "acc_stderr,none": 0.009999235684721594}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46907630522088356, "acc_stderr,none": 0.010002886789051677}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3481927710843373, "acc_stderr,none": 0.00954898064915339}}
 
 
 
 
 
 
43
  {"created_at": "2025-08-26T15:35:59.152998", "global_step": 86000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.35921501706484643, "acc_stderr,none": 0.014020224155839152, "acc_norm,none": 0.37542662116040953, "acc_norm_stderr,none": 0.01415063143511173}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6868686868686869, "acc_stderr,none": 0.009516303879309537, "acc_norm,none": 0.6485690235690236, "acc_norm_stderr,none": 0.009796395582817722}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687938, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687938}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.014484319811433903, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.014484319811433903}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276203, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276203}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2611111111111111, "acc_stderr,none": 0.01464948638526211, "acc_norm,none": 0.2611111111111111, "acc_norm_stderr,none": 0.01464948638526211}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.43686516630153355, "acc_stderr,none": 0.0049498429673314115, "acc_norm,none": 0.5781716789484167, "acc_norm_stderr,none": 0.004928420903026554}, "include_base_44_chinese": {"acc,none": 0.23302752293577983, "acc_stderr,none": 0.018114874181389816, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.04324785766640779}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.047553821882784425}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2988505747126437, "acc_stderr,none": 0.04936090495978011}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909659}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.018851536661927463, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.281437125748503, "acc_stderr,none": 0.03490350467428358}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.036332540727054406}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851861}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.20802919708029197, "acc_stderr,none": 0.01734644868470675, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.031755547866299194}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.19879518072289157, "acc_stderr,none": 0.03106939026078942}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.12, "acc_stderr,none": 0.04642307659791979}, "piqa": {"alias": "piqa", "acc,none": 0.7241566920565833, "acc_stderr,none": 0.010427805502729115, "acc_norm,none": 0.7317736670293797, "acc_norm_stderr,none": 0.010336761992404485}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5325301204819277, "acc_stderr,none": 0.010000839483876004}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4650602409638554, "acc_stderr,none": 0.009997573294114558}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3538152610441767, "acc_stderr,none": 0.009584162770582848}}
44
  {"created_at": "2025-08-26T17:32:36.921875", "global_step": 88000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.35665529010238906, "acc_stderr,none": 0.013998056902620196, "acc_norm,none": 0.3728668941979522, "acc_norm_stderr,none": 0.014131176760131163}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6919191919191919, "acc_stderr,none": 0.009473887075826332, "acc_norm,none": 0.6418350168350169, "acc_norm_stderr,none": 0.009838331651451853}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198737, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198737}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132618, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132618}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.014266513886578938, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.014266513886578938}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792662, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792662}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.01456789134238004, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.01456789134238004}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4382593108942442, "acc_stderr,none": 0.004951594063272055, "acc_norm,none": 0.5795658235411273, "acc_norm_stderr,none": 0.004926198483948709}, "include_base_44_chinese": {"acc,none": 0.24220183486238533, "acc_stderr,none": 0.01839035439902614, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348948}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854672}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909663}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2773722627737226, "acc_stderr,none": 0.01905996802831519, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.37142857142857144, "acc_stderr,none": 0.08286583553358692}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.281437125748503, "acc_stderr,none": 0.034903504674283575}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.0, "acc_stderr,none": 0.0}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3225806451612903, "acc_stderr,none": 0.03766930374975112}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851863}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.20985401459854014, "acc_stderr,none": 0.01743974212745487, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.031755547866299194}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.1927710843373494, "acc_stderr,none": 0.030709824050565274}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "piqa": {"alias": "piqa", "acc,none": 0.7279651795429815, "acc_stderr,none": 0.010382763786247374, "acc_norm,none": 0.7295973884657236, "acc_norm_stderr,none": 0.010363167031620789}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.529718875502008, "acc_stderr,none": 0.010004353982613828}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4610441767068273, "acc_stderr,none": 0.009991608448389063}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3485943775100402, "acc_stderr,none": 0.009551542053301817}}
45
  {"created_at": "2025-08-26T19:29:01.431918", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.35238907849829354, "acc_stderr,none": 0.013960142600598682, "acc_norm,none": 0.3796928327645051, "acc_norm_stderr,none": 0.014182119866974872}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.694023569023569, "acc_stderr,none": 0.009455822036426621, "acc_norm,none": 0.6473063973063973, "acc_norm_stderr,none": 0.009804420599378657}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.014266513886578938, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.014266513886578938}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.0145053998443568, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.0145053998443568}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.0145053998443568, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.0145053998443568}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.014526354751055171, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.014526354751055171}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4400517825134435, "acc_stderr,none": 0.004953787146510921, "acc_norm,none": 0.5819557857000598, "acc_norm_stderr,none": 0.004922294797766664}, "include_base_44_chinese": {"acc,none": 0.22385321100917432, "acc_stderr,none": 0.017871328505760887, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909662}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909662}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.3083941605839416, "acc_stderr,none": 0.019690545218233064, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.4, "acc_stderr,none": 0.0840168050416806}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31736526946107785, "acc_stderr,none": 0.03612599731403395}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3548387096774194, "acc_stderr,none": 0.03855576159143203}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.03393708648569706}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018637237203320108, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.06546536707079771}, "piqa": {"alias": "piqa", "acc,none": 0.7236126224156693, "acc_stderr,none": 0.01043416238827563, "acc_norm,none": 0.7317736670293797, "acc_norm_stderr,none": 0.010336761992404483}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5337349397590362, "acc_stderr,none": 0.009999235684721594}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46907630522088356, "acc_stderr,none": 0.010002886789051677}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3481927710843373, "acc_stderr,none": 0.00954898064915339}}
46
+ {"created_at": "2025-08-27T10:01:58.836495", "global_step": 92000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3515358361774744, "acc_stderr,none": 0.013952413699600933, "acc_norm,none": 0.37627986348122866, "acc_norm_stderr,none": 0.014157022555407163}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6927609427609428, "acc_stderr,none": 0.009466688832475376, "acc_norm,none": 0.6510942760942761, "acc_norm_stderr,none": 0.009780119894465769}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859514, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859514}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945573, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945573}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.01452635475105517, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.01452635475105517}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683048, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683048}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.014709405413413137, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.014709405413413137}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4400517825134435, "acc_stderr,none": 0.00495378714651092, "acc_norm,none": 0.5826528579964151, "acc_norm_stderr,none": 0.004921133864931887}, "include_base_44_chinese": {"acc,none": 0.23302752293577983, "acc_stderr,none": 0.01810143322676417, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.352112676056338, "acc_stderr,none": 0.0570875692519562}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909662}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.28102189781021897, "acc_stderr,none": 0.019196099527346216, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.07961491954505553}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.034673771737174536}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.33548387096774196, "acc_stderr,none": 0.038047683965235654}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.03367511880168704}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.2116788321167883, "acc_stderr,none": 0.017457229785514655, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.18072289156626506, "acc_stderr,none": 0.02995573785581014}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.1927710843373494, "acc_stderr,none": 0.030709824050565274}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898185}, "piqa": {"alias": "piqa", "acc,none": 0.7247007616974973, "acc_stderr,none": 0.01042142927736953, "acc_norm,none": 0.7328618063112078, "acc_norm_stderr,none": 0.010323440492612438}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5373493975903615, "acc_stderr,none": 0.009994072620561402}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46626506024096387, "acc_stderr,none": 0.00999923568472161}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3522088353413655, "acc_stderr,none": 0.009574259292495745}}
47
+ {"created_at": "2025-08-27T10:29:40.238175", "global_step": 94000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.35665529010238906, "acc_stderr,none": 0.013998056902620199, "acc_norm,none": 0.3779863481228669, "acc_norm_stderr,none": 0.0141696645203031}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6927609427609428, "acc_stderr,none": 0.009466688832475378, "acc_norm,none": 0.6536195286195287, "acc_norm_stderr,none": 0.009763542075695731}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218169, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218169}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2511111111111111, "acc_stderr,none": 0.014463114105170803, "acc_norm,none": 0.2511111111111111, "acc_norm_stderr,none": 0.014463114105170803}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25555555555555554, "acc_stderr,none": 0.014547185072254266, "acc_norm,none": 0.25555555555555554, "acc_norm_stderr,none": 0.014547185072254266}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581956, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581956}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4418442541326429, "acc_stderr,none": 0.004955914693717953, "acc_norm,none": 0.5832503485361482, "acc_norm_stderr,none": 0.004920130733271768}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.046870495038546706}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869696}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.28102189781021897, "acc_stderr,none": 0.01919766164297119, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.34285714285714286, "acc_stderr,none": 0.08140424227436863}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2874251497005988, "acc_stderr,none": 0.035125586477990835}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3096774193548387, "acc_stderr,none": 0.0372580781179416}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.034190730421806675}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_turkish": {"acc,none": 0.21715328467153286, "acc_stderr,none": 0.017673348051431236, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.03175554786629921}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "piqa": {"alias": "piqa", "acc,none": 0.7225244831338411, "acc_stderr,none": 0.010446818281039955, "acc_norm,none": 0.7317736670293797, "acc_norm_stderr,none": 0.010336761992404483}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5313253012048192, "acc_stderr,none": 0.01000238471976213}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4791164658634538, "acc_stderr,none": 0.010013327358568523}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3526104417670683, "acc_stderr,none": 0.009576746271768752}}
48
+ {"created_at": "2025-08-27T10:41:21.476594", "global_step": 96000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3575085324232082, "acc_stderr,none": 0.014005494275916573, "acc_norm,none": 0.37457337883959047, "acc_norm_stderr,none": 0.014144193471893444}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6957070707070707, "acc_stderr,none": 0.009441202922359185, "acc_norm,none": 0.6536195286195287, "acc_norm_stderr,none": 0.00976354207569573}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.01405925666321816, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.01405925666321816}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755677, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755677}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.014484319811433903, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.014484319811433903}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2511111111111111, "acc_stderr,none": 0.014463114105170807, "acc_norm,none": 0.2511111111111111, "acc_norm_stderr,none": 0.014463114105170807}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581956, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581956}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4420434176458873, "acc_stderr,none": 0.004956147046108963, "acc_norm,none": 0.5840470025891257, "acc_norm_stderr,none": 0.004918781662373954}, "include_base_44_chinese": {"acc,none": 0.23669724770642203, "acc_stderr,none": 0.018256164246066568, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.04324785766640779}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278444}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348947}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383254}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.291970802919708, "acc_stderr,none": 0.019385460572339714, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.4, "acc_stderr,none": 0.0840168050416806}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.0, "acc_stderr,none": 0.0}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3032258064516129, "acc_stderr,none": 0.03703980981843178}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.281437125748503, "acc_stderr,none": 0.034903504674283596}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.22262773722627738, "acc_stderr,none": 0.0178080517681108, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.032082844503563655}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.052372293656638154}, "piqa": {"alias": "piqa", "acc,none": 0.7241566920565833, "acc_stderr,none": 0.010427805502729115, "acc_norm,none": 0.7285092491838956, "acc_norm_stderr,none": 0.010376251176596137}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5293172690763053, "acc_stderr,none": 0.01000483004554399}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46546184738955826, "acc_stderr,none": 0.009998133936261186}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3530120481927711, "acc_stderr,none": 0.009579225840709716}}
49
+ {"created_at": "2025-08-27T10:46:07.850983", "global_step": 98000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3575085324232082, "acc_stderr,none": 0.014005494275916571, "acc_norm,none": 0.378839590443686, "acc_norm_stderr,none": 0.014175915490000324}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6961279461279462, "acc_stderr,none": 0.009437524848293738, "acc_norm,none": 0.6561447811447811, "acc_norm_stderr,none": 0.009746660584852442}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198731, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198731}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.01419863480930818, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.01419863480930818}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2577777777777778, "acc_stderr,none": 0.014588474089651663, "acc_norm,none": 0.2577777777777778, "acc_norm_stderr,none": 0.014588474089651663}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642524, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642524}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.27666666666666667, "acc_stderr,none": 0.014919965280947392, "acc_norm,none": 0.27666666666666667, "acc_norm_stderr,none": 0.014919965280947392}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.44303923521210914, "acc_stderr,none": 0.004957296691391587, "acc_norm,none": 0.5827524397530373, "acc_norm_stderr,none": 0.0049209671922553}, "include_base_44_chinese": {"acc,none": 0.24770642201834864, "acc_stderr,none": 0.018516732678650116, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.04819560289115227}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418898}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.28832116788321166, "acc_stderr,none": 0.01930685269072535, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.4, "acc_stderr,none": 0.0840168050416806}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31137724550898205, "acc_stderr,none": 0.03594016584565771}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.0, "acc_stderr,none": 0.0}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.036332540727054406}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.2116788321167883, "acc_stderr,none": 0.017467089267484137, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.19879518072289157, "acc_stderr,none": 0.03106939026078942}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.03175554786629921}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.14, "acc_stderr,none": 0.04956957592256421}, "piqa": {"alias": "piqa", "acc,none": 0.7241566920565833, "acc_stderr,none": 0.010427805502729115, "acc_norm,none": 0.7306855277475517, "acc_norm_stderr,none": 0.01035000407058876}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5289156626506024, "acc_stderr,none": 0.010005299609236084}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4674698795180723, "acc_stderr,none": 0.010000839483876022}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3530120481927711, "acc_stderr,none": 0.009579225840709719}}
50
+ {"created_at": "2025-08-27T10:50:42.598606", "global_step": 100000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.36006825938566556, "acc_stderr,none": 0.014027516814585188, "acc_norm,none": 0.37627986348122866, "acc_norm_stderr,none": 0.014157022555407163}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6927609427609428, "acc_stderr,none": 0.009466688832475376, "acc_norm,none": 0.6544612794612794, "acc_norm_stderr,none": 0.009757948730670308}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.21888888888888888, "acc_stderr,none": 0.013790766978256937, "acc_norm,none": 0.21888888888888888, "acc_norm_stderr,none": 0.013790766978256937}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792623, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792623}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356793, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356793}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2688888888888889, "acc_stderr,none": 0.014787619747567614, "acc_norm,none": 0.2688888888888889, "acc_norm_stderr,none": 0.014787619747567614}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4419438358892651, "acc_stderr,none": 0.004956030970911512, "acc_norm,none": 0.5826528579964151, "acc_norm_stderr,none": 0.004921133864931886}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018476419210408022, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.28735632183908044, "acc_stderr,none": 0.04879747731496575}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.0194004561734131, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.34285714285714286, "acc_stderr,none": 0.08140424227436863}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.32903225806451614, "acc_stderr,none": 0.03786253598588386}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.281437125748503, "acc_stderr,none": 0.034903504674283596}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.21532846715328466, "acc_stderr,none": 0.017617806459671245, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "piqa": {"alias": "piqa", "acc,none": 0.720348204570185, "acc_stderr,none": 0.010471899530306562, "acc_norm,none": 0.7328618063112078, "acc_norm_stderr,none": 0.01032344049261244}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5317269076305221, "acc_stderr,none": 0.010001876146466682}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4650602409638554, "acc_stderr,none": 0.009997573294114558}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3570281124497992, "acc_stderr,none": 0.009603615216109774}}