ThomasTheMaker commited on
Commit
800d91b
·
verified ·
1 Parent(s): 7174962

Upload results_2025-05-11T03-57-31.270083.json with huggingface_hub

Browse files
results_2025-05-11T03-57-31.270083.json ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_general": {
3
+ "lighteval_sha": "?",
4
+ "num_fewshot_seeds": 1,
5
+ "max_samples": null,
6
+ "job_id": 0,
7
+ "start_time": 7391.813776305,
8
+ "end_time": 7504.824690543,
9
+ "total_evaluation_time_secondes": "113.01091423799971",
10
+ "model_name": "Qwen/Qwen3-0.6B",
11
+ "model_sha": "",
12
+ "model_dtype": null,
13
+ "model_size": null,
14
+ "generation_parameters": {
15
+ "early_stopping": null,
16
+ "repetition_penalty": null,
17
+ "frequency_penalty": null,
18
+ "length_penalty": null,
19
+ "presence_penalty": null,
20
+ "max_new_tokens": null,
21
+ "min_new_tokens": null,
22
+ "seed": null,
23
+ "stop_tokens": null,
24
+ "temperature": null,
25
+ "top_k": null,
26
+ "min_p": null,
27
+ "top_p": null,
28
+ "truncate_prompt": null,
29
+ "response_format": null
30
+ }
31
+ },
32
+ "results": {
33
+ "original|mmlu:anatomy|0": {
34
+ "acc": 0.1925925925925926,
35
+ "acc_stderr": 0.03406542058502653
36
+ },
37
+ "all": {
38
+ "acc": 0.1925925925925926,
39
+ "acc_stderr": 0.03406542058502653
40
+ }
41
+ },
42
+ "versions": {
43
+ "original|mmlu:anatomy|0": 0
44
+ },
45
+ "config_tasks": {
46
+ "original|mmlu:anatomy": {
47
+ "name": "mmlu:anatomy",
48
+ "prompt_function": "mmlu_anatomy",
49
+ "hf_repo": "cais/mmlu",
50
+ "hf_subset": "anatomy",
51
+ "metric": [
52
+ {
53
+ "metric_name": "acc",
54
+ "higher_is_better": true,
55
+ "category": "8",
56
+ "use_case": "1",
57
+ "sample_level_fn": "compute",
58
+ "corpus_level_fn": "mean"
59
+ }
60
+ ],
61
+ "hf_revision": null,
62
+ "hf_filter": null,
63
+ "hf_avail_splits": [
64
+ "auxiliary_train",
65
+ "test",
66
+ "validation",
67
+ "dev"
68
+ ],
69
+ "trust_dataset": true,
70
+ "evaluation_splits": [
71
+ "test"
72
+ ],
73
+ "few_shots_split": "dev",
74
+ "few_shots_select": "sequential",
75
+ "generation_size": 1,
76
+ "generation_grammar": null,
77
+ "stop_sequence": [
78
+ "\n"
79
+ ],
80
+ "num_samples": null,
81
+ "suite": [
82
+ "original",
83
+ "mmlu"
84
+ ],
85
+ "original_num_docs": 135,
86
+ "effective_num_docs": 135,
87
+ "must_remove_duplicate_docs": false,
88
+ "version": 0
89
+ }
90
+ },
91
+ "summary_tasks": {
92
+ "original|mmlu:anatomy|0": {
93
+ "hashes": {
94
+ "hash_examples": "2ace6ded4afc2a5e",
95
+ "hash_full_prompts": "2ace6ded4afc2a5e",
96
+ "hash_input_tokens": "2208f6c9b1418fb3",
97
+ "hash_cont_tokens": "b6f29f2efe9d60e5"
98
+ },
99
+ "truncated": 0,
100
+ "non_truncated": 135,
101
+ "padded": 0,
102
+ "non_padded": 540,
103
+ "effective_few_shots": 0.0,
104
+ "num_truncated_few_shots": 0
105
+ }
106
+ },
107
+ "summary_general": {
108
+ "hashes": {
109
+ "hash_examples": "b4397c7c7be2716f",
110
+ "hash_full_prompts": "b4397c7c7be2716f",
111
+ "hash_input_tokens": "b6d70b8b90cdad6b",
112
+ "hash_cont_tokens": "d0f0c11bb15a1909"
113
+ },
114
+ "truncated": 0,
115
+ "non_truncated": 135,
116
+ "padded": 0,
117
+ "non_padded": 540,
118
+ "num_truncated_few_shots": 0
119
+ }
120
+ }