Initial commit
Browse files- .gitattributes +3 -0
- math_eval/aime24/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/aime24/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json +9 -0
- math_eval/amc23/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/amc23/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json +9 -0
- math_eval/college_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl +3 -0
- math_eval/college_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json +9 -0
- math_eval/gaokao2023en/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/gaokao2023en/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json +9 -0
- math_eval/gsm8k/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/gsm8k/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json +9 -0
- math_eval/math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl +3 -0
- math_eval/math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json +9 -0
- math_eval/minerva_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl +0 -0
- math_eval/minerva_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json +20 -0
- math_eval/olympiadbench/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl +3 -0
- math_eval/olympiadbench/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json +9 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
math_eval/college_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
|
37 |
+
math_eval/math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
|
38 |
+
math_eval/olympiadbench/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
|
math_eval/aime24/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/aime24/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"num_samples": 30,
|
3 |
+
"num_scores": 30,
|
4 |
+
"timeout_samples": 0,
|
5 |
+
"empty_samples": 0,
|
6 |
+
"acc": 13.3,
|
7 |
+
"time_use_in_second": 43.51688075065613,
|
8 |
+
"time_use_in_minite": "0:43"
|
9 |
+
}
|
math_eval/amc23/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/amc23/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"num_samples": 40,
|
3 |
+
"num_scores": 40,
|
4 |
+
"timeout_samples": 0,
|
5 |
+
"empty_samples": 0,
|
6 |
+
"acc": 40.0,
|
7 |
+
"time_use_in_second": 45.59167838096619,
|
8 |
+
"time_use_in_minite": "0:45"
|
9 |
+
}
|
math_eval/college_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:368a2ac394e55fcd050fa1140f4ecf28d6a0cbd2ceebd6c2eb86fcba012b7527
|
3 |
+
size 18199361
|
math_eval/college_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"num_samples": 2818,
|
3 |
+
"num_scores": 2818,
|
4 |
+
"timeout_samples": 13,
|
5 |
+
"empty_samples": 21,
|
6 |
+
"acc": 42.8,
|
7 |
+
"time_use_in_second": 783.5445840358734,
|
8 |
+
"time_use_in_minite": "13:03"
|
9 |
+
}
|
math_eval/gaokao2023en/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/gaokao2023en/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"num_samples": 385,
|
3 |
+
"num_scores": 385,
|
4 |
+
"timeout_samples": 0,
|
5 |
+
"empty_samples": 3,
|
6 |
+
"acc": 54.3,
|
7 |
+
"time_use_in_second": 130.78554272651672,
|
8 |
+
"time_use_in_minite": "2:10"
|
9 |
+
}
|
math_eval/gsm8k/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/gsm8k/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"num_samples": 1319,
|
3 |
+
"num_scores": 1319,
|
4 |
+
"timeout_samples": 0,
|
5 |
+
"empty_samples": 1,
|
6 |
+
"acc": 80.0,
|
7 |
+
"time_use_in_second": 240.4810688495636,
|
8 |
+
"time_use_in_minite": "4:00"
|
9 |
+
}
|
math_eval/math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf4503235d1b51251d8c5cb7311d3a60e67278013556e3881800dfae7a648cbf
|
3 |
+
size 50309114
|
math_eval/math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"num_samples": 5000,
|
3 |
+
"num_scores": 5000,
|
4 |
+
"timeout_samples": 1,
|
5 |
+
"empty_samples": 46,
|
6 |
+
"acc": 61.8,
|
7 |
+
"time_use_in_second": 2204.3763966560364,
|
8 |
+
"time_use_in_minite": "36:44"
|
9 |
+
}
|
math_eval/minerva_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
math_eval/minerva_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"num_samples": 272,
|
3 |
+
"num_scores": 272,
|
4 |
+
"timeout_samples": 0,
|
5 |
+
"empty_samples": 4,
|
6 |
+
"acc": 23.9,
|
7 |
+
"type_acc": {
|
8 |
+
"Differential Equations (18.03 Spring 2010)": 54.2,
|
9 |
+
"Dynamics and Control (2.003 Spring 2005)": 50.0,
|
10 |
+
"Ecology I (1.018J Fall 2009)": 40.0,
|
11 |
+
"Information and Entropy (6.050J Spring 2008)": 33.3,
|
12 |
+
"Introduction to Astronomy (8.282J Spring 2006)": 9.4,
|
13 |
+
"Introduction to Solid State Chemistry (3.091 Fall 2010)": 9.3,
|
14 |
+
"Physical Chemistry (5.61 Fall 2017)": 0.0,
|
15 |
+
"Principles of Microeconomics (14.01 Fall 2011)": 50.0,
|
16 |
+
"Relativity (8.033 Fall 2006)": 0.0
|
17 |
+
},
|
18 |
+
"time_use_in_second": 116.57392597198486,
|
19 |
+
"time_use_in_minite": "1:56"
|
20 |
+
}
|
math_eval/olympiadbench/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:102cedef8e54800640a93e6a2842e221d78c6e39eb1c785c74baf7c82de8a84e
|
3 |
+
size 11002333
|
math_eval/olympiadbench/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"num_samples": 675,
|
3 |
+
"num_scores": 675,
|
4 |
+
"timeout_samples": 0,
|
5 |
+
"empty_samples": 8,
|
6 |
+
"acc": 28.9,
|
7 |
+
"time_use_in_second": 445.92377376556396,
|
8 |
+
"time_use_in_minite": "7:25"
|
9 |
+
}
|