OctoThinker-8B-Hybrid-Base / evaluation_results.json
koalazf99's picture
Upload folder using huggingface_hub
01bb44c verified
raw
history blame contribute delete
2.31 kB
{
"amc-cot": {
"cot": {
"accuracy": 0.075,
"n_samples": 40
},
"tool": {
"n_samples": 0
}
},
"asdiv-cot": {
"cot": {
"accuracy": 0.8532731376975169,
"n_samples": 2215
},
"tool": {
"n_samples": 0
}
},
"gsm8k-cot": {
"cot": {
"accuracy": 0.7710386656557998,
"n_samples": 1319
},
"tool": {
"n_samples": 0
}
},
"math-500-cot": {
"cot": {
"accuracy": 0.386,
"n_samples": 500
},
"tool": {
"n_samples": 0
}
},
"math-cot": {
"cot": {
"accuracy": 0.3854,
"n_samples": 5000
},
"tool": {
"n_samples": 0
}
},
"math_sat-cot": {
"cot": {
"accuracy": 0.875,
"n_samples": 32
},
"tool": {
"n_samples": 0
}
},
"mathqa-cot": {
"cot": {
"accuracy": 0.628,
"n_samples": 1000
},
"tool": {
"n_samples": 0
}
},
"mawps-cot": {
"cot": {
"accuracy": 0.9554479418886198,
"n_samples": 2065
},
"tool": {
"n_samples": 0
}
},
"mmlu-stem-cot": {
"cot": {
"accuracy": 0.6438038436050364,
"n_samples": 3018
},
"tool": {
"n_samples": 0
}
},
"ocw-courses-cot": {
"cot": {
"accuracy": 0.13970588235294118,
"n_samples": 272
},
"tool": {
"n_samples": 0
}
},
"olympiad-bench-cot": {
"cot": {
"accuracy": 0.10074074074074074,
"n_samples": 675
},
"tool": {
"n_samples": 0
}
},
"svamp-cot": {
"cot": {
"accuracy": 0.864,
"n_samples": 1000
},
"tool": {
"n_samples": 0
}
},
"tabmwp-cot": {
"cot": {
"accuracy": 0.716,
"n_samples": 1000
},
"tool": {
"n_samples": 0
}
}
}