jerryzh168 commited on
Commit
96c9f92
·
verified ·
1 Parent(s): ab391d2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -8
README.md CHANGED
@@ -101,19 +101,19 @@ lm_eval --model hf --model_args pretrained=pytorch/Phi-4-mini-instruct-float8dq
101
 
102
  | Benchmark | | |
103
  |----------------------------------|----------------|---------------------|
104
- | | Phi-4 mini-Ins | phi4-mini-int4wo |
105
  | **Popular aggregated benchmark** | | |
106
- | mmlu (0-shot) | | x |
107
- | mmlu_pro (5-shot) | | x |
108
  | **Reasoning** | | |
109
  | arc_challenge (0-shot) | 56.91 | 56.66 |
110
  | gpqa_main_zeroshot | 30.13 | x |
111
  | HellaSwag | 54.57 | 54.55 |
112
- | openbookqa | 33.00 | x |
113
- | piqa (0-shot) | 77.64 | x |
114
- | social_iqa | 49.59 | x |
115
- | truthfulqa_mc2 (0-shot) | 48.39 | x |
116
- | winogrande (0-shot) | 71.11 | x |
117
  | **Multilingual** | | |
118
  | mgsm_en_cot_en | 60.8 | 60.0 |
119
  | **Math** | | |
 
101
 
102
  | Benchmark | | |
103
  |----------------------------------|----------------|---------------------|
104
+ | | Phi-4 mini-Ins | phi4-mini-float8dq |
105
  | **Popular aggregated benchmark** | | |
106
+ | mmlu (0-shot) | 66.73 | x |
107
+ | mmlu_pro (5-shot) | 46.43 | x |
108
  | **Reasoning** | | |
109
  | arc_challenge (0-shot) | 56.91 | 56.66 |
110
  | gpqa_main_zeroshot | 30.13 | x |
111
  | HellaSwag | 54.57 | 54.55 |
112
+ | openbookqa | 33.00 | 33.60 |
113
+ | piqa (0-shot) | 77.64 | 77.48 |
114
+ | social_iqa | 49.59 | 49.28 |
115
+ | truthfulqa_mc2 (0-shot) | 48.39 | 48.09 |
116
+ | winogrande (0-shot) | 71.11 | 72.77 |
117
  | **Multilingual** | | |
118
  | mgsm_en_cot_en | 60.8 | 60.0 |
119
  | **Math** | | |