PeterV09 commited on
Commit
f74dace
·
verified ·
1 Parent(s): 92c3f15

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ math_eval/college_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ math_eval/math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
38
+ math_eval/olympiadbench/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl filter=lfs diff=lfs merge=lfs -text
math_eval/aime24/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
math_eval/aime24/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 30,
3
+ "num_scores": 30,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 13.3,
7
+ "time_use_in_second": 43.51688075065613,
8
+ "time_use_in_minite": "0:43"
9
+ }
math_eval/amc23/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
math_eval/amc23/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 40,
3
+ "num_scores": 40,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 0,
6
+ "acc": 40.0,
7
+ "time_use_in_second": 45.59167838096619,
8
+ "time_use_in_minite": "0:45"
9
+ }
math_eval/college_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:368a2ac394e55fcd050fa1140f4ecf28d6a0cbd2ceebd6c2eb86fcba012b7527
3
+ size 18199361
math_eval/college_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 2818,
3
+ "num_scores": 2818,
4
+ "timeout_samples": 13,
5
+ "empty_samples": 21,
6
+ "acc": 42.8,
7
+ "time_use_in_second": 783.5445840358734,
8
+ "time_use_in_minite": "13:03"
9
+ }
math_eval/gaokao2023en/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
math_eval/gaokao2023en/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 385,
3
+ "num_scores": 385,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 3,
6
+ "acc": 54.3,
7
+ "time_use_in_second": 130.78554272651672,
8
+ "time_use_in_minite": "2:10"
9
+ }
math_eval/gsm8k/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
math_eval/gsm8k/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 1319,
3
+ "num_scores": 1319,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 1,
6
+ "acc": 80.0,
7
+ "time_use_in_second": 240.4810688495636,
8
+ "time_use_in_minite": "4:00"
9
+ }
math_eval/math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf4503235d1b51251d8c5cb7311d3a60e67278013556e3881800dfae7a648cbf
3
+ size 50309114
math_eval/math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 5000,
3
+ "num_scores": 5000,
4
+ "timeout_samples": 1,
5
+ "empty_samples": 46,
6
+ "acc": 61.8,
7
+ "time_use_in_second": 2204.3763966560364,
8
+ "time_use_in_minite": "36:44"
9
+ }
math_eval/minerva_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
math_eval/minerva_math/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 272,
3
+ "num_scores": 272,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 4,
6
+ "acc": 23.9,
7
+ "type_acc": {
8
+ "Differential Equations (18.03 Spring 2010)": 54.2,
9
+ "Dynamics and Control (2.003 Spring 2005)": 50.0,
10
+ "Ecology I (1.018J Fall 2009)": 40.0,
11
+ "Information and Entropy (6.050J Spring 2008)": 33.3,
12
+ "Introduction to Astronomy (8.282J Spring 2006)": 9.4,
13
+ "Introduction to Solid State Chemistry (3.091 Fall 2010)": 9.3,
14
+ "Physical Chemistry (5.61 Fall 2017)": 0.0,
15
+ "Principles of Microeconomics (14.01 Fall 2011)": 50.0,
16
+ "Relativity (8.033 Fall 2006)": 0.0
17
+ },
18
+ "time_use_in_second": 116.57392597198486,
19
+ "time_use_in_minite": "1:56"
20
+ }
math_eval/olympiadbench/test_qwen-boxed_-1_seed0_t0.0_s0_e-1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:102cedef8e54800640a93e6a2842e221d78c6e39eb1c785c74baf7c82de8a84e
3
+ size 11002333
math_eval/olympiadbench/test_qwen-boxed_-1_seed0_t0.0_s0_e-1_qwen-boxed_metrics.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 675,
3
+ "num_scores": 675,
4
+ "timeout_samples": 0,
5
+ "empty_samples": 8,
6
+ "acc": 28.9,
7
+ "time_use_in_second": 445.92377376556396,
8
+ "time_use_in_minite": "7:25"
9
+ }