Update README.md
Browse files
README.md
CHANGED
@@ -101,19 +101,19 @@ lm_eval --model hf --model_args pretrained=pytorch/Phi-4-mini-instruct-float8dq
|
|
101 |
|
102 |
| Benchmark | | |
|
103 |
|----------------------------------|----------------|---------------------|
|
104 |
-
| | Phi-4 mini-Ins | phi4-mini-
|
105 |
| **Popular aggregated benchmark** | | |
|
106 |
-
| mmlu (0-shot) |
|
107 |
-
| mmlu_pro (5-shot) |
|
108 |
| **Reasoning** | | |
|
109 |
| arc_challenge (0-shot) | 56.91 | 56.66 |
|
110 |
| gpqa_main_zeroshot | 30.13 | x |
|
111 |
| HellaSwag | 54.57 | 54.55 |
|
112 |
-
| openbookqa | 33.00 |
|
113 |
-
| piqa (0-shot) | 77.64 |
|
114 |
-
| social_iqa | 49.59 |
|
115 |
-
| truthfulqa_mc2 (0-shot) | 48.39 |
|
116 |
-
| winogrande (0-shot) | 71.11 |
|
117 |
| **Multilingual** | | |
|
118 |
| mgsm_en_cot_en | 60.8 | 60.0 |
|
119 |
| **Math** | | |
|
|
|
101 |
|
102 |
| Benchmark | | |
|
103 |
|----------------------------------|----------------|---------------------|
|
104 |
+
| | Phi-4 mini-Ins | phi4-mini-float8dq |
|
105 |
| **Popular aggregated benchmark** | | |
|
106 |
+
| mmlu (0-shot) | 66.73 | x |
|
107 |
+
| mmlu_pro (5-shot) | 46.43 | x |
|
108 |
| **Reasoning** | | |
|
109 |
| arc_challenge (0-shot) | 56.91 | 56.66 |
|
110 |
| gpqa_main_zeroshot | 30.13 | x |
|
111 |
| HellaSwag | 54.57 | 54.55 |
|
112 |
+
| openbookqa | 33.00 | 33.60 |
|
113 |
+
| piqa (0-shot) | 77.64 | 77.48 |
|
114 |
+
| social_iqa | 49.59 | 49.28 |
|
115 |
+
| truthfulqa_mc2 (0-shot) | 48.39 | 48.09 |
|
116 |
+
| winogrande (0-shot) | 71.11 | 72.77 |
|
117 |
| **Multilingual** | | |
|
118 |
| mgsm_en_cot_en | 60.8 | 60.0 |
|
119 |
| **Math** | | |
|