vicgalle leaderboard-pr-bot commited on
Commit
d0b0740
1 Parent(s): 1333a88

Adding Evaluation Results (#2)

Browse files

- Adding Evaluation Results (755ab580b7846c4c9a8c727453a7e2fe470f074e)


Co-authored-by: Open LLM Leaderboard PR Bot <[email protected]>

Files changed (1) hide show
  1. README.md +114 -15
README.md CHANGED
@@ -1,6 +1,8 @@
1
  ---
2
  license: apache-2.0
3
  library_name: transformers
 
 
4
  model-index:
5
  - name: ConfigurableHermes-7B
6
  results:
@@ -19,8 +21,7 @@ model-index:
19
  value: 66.04
20
  name: normalized accuracy
21
  source:
22
- url: >-
23
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
24
  name: Open LLM Leaderboard
25
  - task:
26
  type: text-generation
@@ -36,8 +37,7 @@ model-index:
36
  value: 84.31
37
  name: normalized accuracy
38
  source:
39
- url: >-
40
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
41
  name: Open LLM Leaderboard
42
  - task:
43
  type: text-generation
@@ -54,8 +54,7 @@ model-index:
54
  value: 62.44
55
  name: accuracy
56
  source:
57
- url: >-
58
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
59
  name: Open LLM Leaderboard
60
  - task:
61
  type: text-generation
@@ -71,8 +70,7 @@ model-index:
71
  - type: mc2
72
  value: 61.71
73
  source:
74
- url: >-
75
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
76
  name: Open LLM Leaderboard
77
  - task:
78
  type: text-generation
@@ -89,8 +87,7 @@ model-index:
89
  value: 77.43
90
  name: accuracy
91
  source:
92
- url: >-
93
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
94
  name: Open LLM Leaderboard
95
  - task:
96
  type: text-generation
@@ -107,11 +104,100 @@ model-index:
107
  value: 61.41
108
  name: accuracy
109
  source:
110
- url: >-
111
- https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  name: Open LLM Leaderboard
113
- datasets:
114
- - vicgalle/configurable-system-prompt-multitask
115
  ---
116
 
117
  # ConfigurableHermes-7B
@@ -155,4 +241,17 @@ If you find this work, data and/or models useful for your research, please consi
155
  archivePrefix={arXiv},
156
  primaryClass={cs.CL}
157
  }
158
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  library_name: transformers
4
+ datasets:
5
+ - vicgalle/configurable-system-prompt-multitask
6
  model-index:
7
  - name: ConfigurableHermes-7B
8
  results:
 
21
  value: 66.04
22
  name: normalized accuracy
23
  source:
24
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
 
25
  name: Open LLM Leaderboard
26
  - task:
27
  type: text-generation
 
37
  value: 84.31
38
  name: normalized accuracy
39
  source:
40
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
 
41
  name: Open LLM Leaderboard
42
  - task:
43
  type: text-generation
 
54
  value: 62.44
55
  name: accuracy
56
  source:
57
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
 
58
  name: Open LLM Leaderboard
59
  - task:
60
  type: text-generation
 
70
  - type: mc2
71
  value: 61.71
72
  source:
73
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
 
74
  name: Open LLM Leaderboard
75
  - task:
76
  type: text-generation
 
87
  value: 77.43
88
  name: accuracy
89
  source:
90
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
 
91
  name: Open LLM Leaderboard
92
  - task:
93
  type: text-generation
 
104
  value: 61.41
105
  name: accuracy
106
  source:
107
+ url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
108
+ name: Open LLM Leaderboard
109
+ - task:
110
+ type: text-generation
111
+ name: Text Generation
112
+ dataset:
113
+ name: IFEval (0-Shot)
114
+ type: HuggingFaceH4/ifeval
115
+ args:
116
+ num_few_shot: 0
117
+ metrics:
118
+ - type: inst_level_strict_acc and prompt_level_strict_acc
119
+ value: 54.11
120
+ name: strict accuracy
121
+ source:
122
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
123
+ name: Open LLM Leaderboard
124
+ - task:
125
+ type: text-generation
126
+ name: Text Generation
127
+ dataset:
128
+ name: BBH (3-Shot)
129
+ type: BBH
130
+ args:
131
+ num_few_shot: 3
132
+ metrics:
133
+ - type: acc_norm
134
+ value: 23.16
135
+ name: normalized accuracy
136
+ source:
137
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
138
+ name: Open LLM Leaderboard
139
+ - task:
140
+ type: text-generation
141
+ name: Text Generation
142
+ dataset:
143
+ name: MATH Lvl 5 (4-Shot)
144
+ type: hendrycks/competition_math
145
+ args:
146
+ num_few_shot: 4
147
+ metrics:
148
+ - type: exact_match
149
+ value: 4.31
150
+ name: exact match
151
+ source:
152
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
153
+ name: Open LLM Leaderboard
154
+ - task:
155
+ type: text-generation
156
+ name: Text Generation
157
+ dataset:
158
+ name: GPQA (0-shot)
159
+ type: Idavidrein/gpqa
160
+ args:
161
+ num_few_shot: 0
162
+ metrics:
163
+ - type: acc_norm
164
+ value: 3.58
165
+ name: acc_norm
166
+ source:
167
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
168
+ name: Open LLM Leaderboard
169
+ - task:
170
+ type: text-generation
171
+ name: Text Generation
172
+ dataset:
173
+ name: MuSR (0-shot)
174
+ type: TAUR-Lab/MuSR
175
+ args:
176
+ num_few_shot: 0
177
+ metrics:
178
+ - type: acc_norm
179
+ value: 9.11
180
+ name: acc_norm
181
+ source:
182
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
183
+ name: Open LLM Leaderboard
184
+ - task:
185
+ type: text-generation
186
+ name: Text Generation
187
+ dataset:
188
+ name: MMLU-PRO (5-shot)
189
+ type: TIGER-Lab/MMLU-Pro
190
+ config: main
191
+ split: test
192
+ args:
193
+ num_few_shot: 5
194
+ metrics:
195
+ - type: acc
196
+ value: 22.5
197
+ name: accuracy
198
+ source:
199
+ url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=vicgalle/ConfigurableHermes-7B
200
  name: Open LLM Leaderboard
 
 
201
  ---
202
 
203
  # ConfigurableHermes-7B
 
241
  archivePrefix={arXiv},
242
  primaryClass={cs.CL}
243
  }
244
+ ```
245
+ # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
246
+ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_vicgalle__ConfigurableHermes-7B)
247
+
248
+ | Metric |Value|
249
+ |-------------------|----:|
250
+ |Avg. |19.46|
251
+ |IFEval (0-Shot) |54.11|
252
+ |BBH (3-Shot) |23.16|
253
+ |MATH Lvl 5 (4-Shot)| 4.31|
254
+ |GPQA (0-shot) | 3.58|
255
+ |MuSR (0-shot) | 9.11|
256
+ |MMLU-PRO (5-shot) |22.50|
257
+