Upload google-bert-bert-base-multilingual-cased/metrics.eval.jsonl with huggingface_hub
Browse files
google-bert-bert-base-multilingual-cased/metrics.eval.jsonl
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"created_at": "2025-08-31T07:16:10.196273", "global_step": 10000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2568259385665529, "acc_stderr,none": 0.012766923794116798, "acc_norm,none": 0.2901023890784983, "acc_norm_stderr,none": 0.013261573677520764}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.577020202020202, "acc_stderr,none": 0.010137328382209097, "acc_norm,none": 0.5256734006734006, "acc_norm_stderr,none": 0.010246249665591227}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218197, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218197}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.25555555555555554, "acc_stderr,none": 0.01454718507225427, "acc_norm,none": 0.25555555555555554, "acc_norm_stderr,none": 0.01454718507225427}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579381, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579381}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.014526354751055178, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.014526354751055178}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.26, "acc_stderr,none": 0.014629271097998378, "acc_norm,none": 0.26, "acc_norm_stderr,none": 0.014629271097998378}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3298147779326827, "acc_stderr,none": 0.004691848665399068, "acc_norm,none": 0.3997211710814579, "acc_norm_stderr,none": 0.004888398535520491}, "include_base_44_chinese": {"acc,none": 0.24036697247706423, "acc_stderr,none": 0.018296408421773217, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.1724137931034483, "acc_stderr,none": 0.04073271884266333}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_italian": {"acc,none": 0.22445255474452555, "acc_stderr,none": 0.017836736627743197, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120039}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.03419073042180667}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2, "acc_stderr,none": 0.032232918561015164}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851862}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.2773722627737226, "acc_stderr,none": 0.019154301267794303, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.0654653670707977}, "piqa": {"alias": "piqa", "acc,none": 0.6653971708378672, "acc_stderr,none": 0.011009071725162507, "acc_norm,none": 0.6512513601741022, "acc_norm_stderr,none": 0.011119263056159599}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4751004016064257, "acc_stderr,none": 0.01000963798302251}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.40120481927710844, "acc_stderr,none": 0.009824484469158972}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939167}}
|
2 |
+
{"created_at": "2025-08-31T19:38:06.890472", "global_step": 20000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.27986348122866894, "acc_stderr,none": 0.013119040897725922, "acc_norm,none": 0.29692832764505117, "acc_norm_stderr,none": 0.013352025976725228}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6031144781144782, "acc_stderr,none": 0.010039236800583206, "acc_norm,none": 0.5534511784511784, "acc_norm_stderr,none": 0.010200990076245316}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218193, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218193}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.01384086369985956, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.01384086369985956}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276203, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276203}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2511111111111111, "acc_stderr,none": 0.01446311410517081, "acc_norm,none": 0.2511111111111111, "acc_norm_stderr,none": 0.01446311410517081}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.35530770762796254, "acc_stderr,none": 0.004776283203468099, "acc_norm,none": 0.44722166899024096, "acc_norm_stderr,none": 0.004961904949171384}, "include_base_44_chinese": {"acc,none": 0.23853211009174313, "acc_stderr,none": 0.01835045661767852, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.26824817518248173, "acc_stderr,none": 0.018820407592822497, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.3652694610778443, "acc_stderr,none": 0.037372085604367215}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387369}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.03435182440245766}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2215568862275449, "acc_stderr,none": 0.03223309610157498}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.2718978102189781, "acc_stderr,none": 0.019012939178842038, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031023}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.06546536707079771}, "piqa": {"alias": "piqa", "acc,none": 0.6751904243743199, "acc_stderr,none": 0.010926296238294029, "acc_norm,none": 0.6833514689880305, "acc_norm_stderr,none": 0.010853160531978481}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4931726907630522, "acc_stderr,none": 0.010021138522919163}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.44056224899598395, "acc_stderr,none": 0.009951008027814033}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.009487992732201526}}
|
3 |
+
{"created_at": "2025-09-03T05:24:31.937297", "global_step": 30000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.28071672354948807, "acc_stderr,none": 0.013131238126975578, "acc_norm,none": 0.2977815699658703, "acc_norm_stderr,none": 0.01336308010724448}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6119528619528619, "acc_stderr,none": 0.009999295905750662, "acc_norm,none": 0.5509259259259259, "acc_norm_stderr,none": 0.010206428316323363}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.21333333333333335, "acc_stderr,none": 0.013662962863011163, "acc_norm,none": 0.21333333333333335, "acc_norm_stderr,none": 0.013662962863011163}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579374, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579374}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.21666666666666667, "acc_stderr,none": 0.01374008783070018, "acc_norm,none": 0.21666666666666667, "acc_norm_stderr,none": 0.01374008783070018}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.20666666666666667, "acc_stderr,none": 0.013504646568022659, "acc_norm,none": 0.20666666666666667, "acc_norm_stderr,none": 0.013504646568022659}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.21333333333333335, "acc_stderr,none": 0.01366296286301119, "acc_norm,none": 0.21333333333333335, "acc_norm_stderr,none": 0.01366296286301119}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3717386974706234, "acc_stderr,none": 0.0048228145013589005, "acc_norm,none": 0.4738099980083649, "acc_norm_stderr,none": 0.004982931565945951}, "include_base_44_chinese": {"acc,none": 0.24403669724770644, "acc_stderr,none": 0.018472145893881665, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348947}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3218390804597701, "acc_stderr,none": 0.05037749206122548}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_italian": {"acc,none": 0.23357664233576642, "acc_stderr,none": 0.018129366822397682, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666293}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.24550898203592814, "acc_stderr,none": 0.033404631539455894}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.21935483870967742, "acc_stderr,none": 0.033345718921890366}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851861}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387369}, "include_base_44_turkish": {"acc,none": 0.2755474452554745, "acc_stderr,none": 0.0190698640831809, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.036643147772880864}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946896}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898185}, "piqa": {"alias": "piqa", "acc,none": 0.6724700761697497, "acc_stderr,none": 0.010949830482825485, "acc_norm,none": 0.6751904243743199, "acc_norm_stderr,none": 0.01092629623829403}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4959839357429719, "acc_stderr,none": 0.010021749574555898}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.44016064257028115, "acc_stderr,none": 0.009950040960088074}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.342570281124498, "acc_stderr,none": 0.00951233331947037}}
|
4 |
+
{"created_at": "2025-09-03T07:27:31.021311", "global_step": 40000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2832764505119454, "acc_stderr,none": 0.013167478735134575, "acc_norm,none": 0.3122866894197952, "acc_norm_stderr,none": 0.013542598541688065}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6334175084175084, "acc_stderr,none": 0.009887786585323952, "acc_norm,none": 0.5631313131313131, "acc_norm_stderr,none": 0.010177672928157697}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632407, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632407}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687945, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687945}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888451, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888451}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24, "acc_stderr,none": 0.01424401987979265, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.01424401987979265}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.37870942043417644, "acc_stderr,none": 0.0048407422067180865, "acc_norm,none": 0.4881497709619598, "acc_norm_stderr,none": 0.004988379805261153}, "include_base_44_chinese": {"acc,none": 0.25137614678899084, "acc_stderr,none": 0.018544741314436525, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.14084507042253522, "acc_stderr,none": 0.041577421166542894}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.28735632183908044, "acc_stderr,none": 0.048797477314965754}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3218390804597701, "acc_stderr,none": 0.05037749206122547}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_italian": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.018390844593010474, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.07201440432144052}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.035340161390504665}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2064516129032258, "acc_stderr,none": 0.0326163595746342}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018322862812609465, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233134}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "piqa": {"alias": "piqa", "acc,none": 0.6784548422198041, "acc_stderr,none": 0.010897500107575642, "acc_norm,none": 0.6828073993471164, "acc_norm_stderr,none": 0.01085815545438087}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5048192771084338, "acc_stderr,none": 0.010021607322475501}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.40843373493975904, "acc_stderr,none": 0.009852581919032235}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.00947142305417714}}
|
5 |
+
{"created_at": "2025-09-03T07:43:24.085929", "global_step": 50000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2935153583617747, "acc_stderr,none": 0.013307250444941111, "acc_norm,none": 0.32764505119453924, "acc_norm_stderr,none": 0.013715847940719342}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6456228956228957, "acc_stderr,none": 0.009815004030251746, "acc_norm,none": 0.5824915824915825, "acc_norm_stderr,none": 0.01011918737777602}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.20222222222222222, "acc_stderr,none": 0.013396013014924442, "acc_norm,none": 0.20222222222222222, "acc_norm_stderr,none": 0.013396013014924442}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24222222222222223, "acc_stderr,none": 0.014288876375699816, "acc_norm,none": 0.24222222222222223, "acc_norm_stderr,none": 0.014288876375699816}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3932483569010157, "acc_stderr,none": 0.0048747287565282125, "acc_norm,none": 0.5126468830910177, "acc_norm_stderr,none": 0.0049881849883452855}, "include_base_44_chinese": {"acc,none": 0.23669724770642203, "acc_stderr,none": 0.018324091587880113, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348946}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.046870495038546706}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348948}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018754324326283885, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851861}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2709677419354839, "acc_stderr,none": 0.03581556513964113}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.034673771737174536}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.22445255474452555, "acc_stderr,none": 0.017884753815273425, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "piqa": {"alias": "piqa", "acc,none": 0.6920565832426551, "acc_stderr,none": 0.010770892367463685, "acc_norm,none": 0.7013057671381937, "acc_norm_stderr,none": 0.010678556398149238}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4795180722891566, "acc_stderr,none": 0.010013660629930816}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.42208835341365464, "acc_stderr,none": 0.00989965271489542}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3397590361445783, "acc_stderr,none": 0.00949345492543825}}
|
6 |
+
{"created_at": "2025-09-03T07:57:09.280741", "global_step": 60000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3037542662116041, "acc_stderr,none": 0.013438909184778768, "acc_norm,none": 0.3293515358361775, "acc_norm_stderr,none": 0.013734057652635473}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6561447811447811, "acc_stderr,none": 0.009746660584852445, "acc_norm,none": 0.5904882154882155, "acc_norm_stderr,none": 0.010090368160990062}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.0144843198114339, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.0144843198114339}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.21666666666666667, "acc_stderr,none": 0.013740087830700177, "acc_norm,none": 0.21666666666666667, "acc_norm_stderr,none": 0.013740087830700177}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218186, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218186}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356793, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356793}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218193, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218193}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4001194981079466, "acc_stderr,none": 0.004889210628907949, "acc_norm,none": 0.5276837283409679, "acc_norm_stderr,none": 0.004982127315605207}, "include_base_44_chinese": {"acc,none": 0.20917431192660552, "acc_stderr,none": 0.01749303732528358, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152295}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.047554769059532744}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_italian": {"acc,none": 0.27007299270072993, "acc_stderr,none": 0.018947752997512255, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387485}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31736526946107785, "acc_stderr,none": 0.036125997314033945}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.03435182440245766}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.24550898203592814, "acc_stderr,none": 0.03340463153945588}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.5, "acc_stderr,none": 0.15075567228888181}, "include_base_44_turkish": {"acc,none": 0.2208029197080292, "acc_stderr,none": 0.017780501685203974, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.032082844503563655}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857155}, "piqa": {"alias": "piqa", "acc,none": 0.6926006528835691, "acc_stderr,none": 0.010765602506939073, "acc_norm,none": 0.7029379760609358, "acc_norm_stderr,none": 0.01066172540481479}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4835341365461847, "acc_stderr,none": 0.010016636930829975}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.43052208835341366, "acc_stderr,none": 0.009924844537285525}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101966}}
|
7 |
+
{"created_at": "2025-09-04T05:11:31.722386", "global_step": 70000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3250853242320819, "acc_stderr,none": 0.013688147309729125, "acc_norm,none": 0.35580204778157, "acc_norm_stderr,none": 0.013990571137918757}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6788720538720538, "acc_stderr,none": 0.009580787536986797, "acc_norm,none": 0.6035353535353535, "acc_norm_stderr,none": 0.010037412763064522}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218195, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218195}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687963, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687963}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.41127265484963155, "acc_stderr,none": 0.0049105884493300155, "acc_norm,none": 0.5483967337183828, "acc_norm_stderr,none": 0.004966351835028204}, "include_base_44_chinese": {"acc,none": 0.24403669724770644, "acc_stderr,none": 0.01848372282421882, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018569247333205148, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120039}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.03534016139050467}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.03633254072705441}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.28102189781021897, "acc_stderr,none": 0.01915219118227231, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031023}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.06998542122237653}, "piqa": {"alias": "piqa", "acc,none": 0.7094668117519043, "acc_stderr,none": 0.010592765034696536, "acc_norm,none": 0.7127312295973884, "acc_norm_stderr,none": 0.010557291761528632}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5004016064257029, "acc_stderr,none": 0.010022069634353863}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4353413654618474, "acc_stderr,none": 0.009937920221480507}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512704}}
|
8 |
+
{"created_at": "2025-09-04T06:31:48.827890", "global_step": 80000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3191126279863481, "acc_stderr,none": 0.013621696119173314, "acc_norm,none": 0.34982935153583616, "acc_norm_stderr,none": 0.013936809212158284}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6771885521885522, "acc_stderr,none": 0.009593950220366744, "acc_norm,none": 0.6069023569023569, "acc_norm_stderr,none": 0.010022540618945305}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687961, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687961}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.01405925666321819, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.01405925666321819}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132635, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132635}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683052, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683052}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276207, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276207}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.425911173073093, "acc_stderr,none": 0.004934698012050244, "acc_norm,none": 0.5656243776140211, "acc_norm_stderr,none": 0.004946617138983514}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.01850104240213322, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2988505747126437, "acc_stderr,none": 0.049360904959780114}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115034}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018306114689962162, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.22580645161290322, "acc_stderr,none": 0.03369244953981245}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018616234216771214, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511115}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898185}, "piqa": {"alias": "piqa", "acc,none": 0.7132752992383025, "acc_stderr,none": 0.01055131450310806, "acc_norm,none": 0.7170837867247007, "acc_norm_stderr,none": 0.010508949177489688}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5108433734939759, "acc_stderr,none": 0.01001971582448348}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.44899598393574297, "acc_stderr,none": 0.00996979347724083}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3417670682730924, "acc_stderr,none": 0.009506977398287618}}
|