{ | |
"amc-cot": { | |
"cot": { | |
"accuracy": 0.075, | |
"n_samples": 40 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"asdiv-cot": { | |
"cot": { | |
"accuracy": 0.8532731376975169, | |
"n_samples": 2215 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"gsm8k-cot": { | |
"cot": { | |
"accuracy": 0.7710386656557998, | |
"n_samples": 1319 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"math-500-cot": { | |
"cot": { | |
"accuracy": 0.386, | |
"n_samples": 500 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"math-cot": { | |
"cot": { | |
"accuracy": 0.3854, | |
"n_samples": 5000 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"math_sat-cot": { | |
"cot": { | |
"accuracy": 0.875, | |
"n_samples": 32 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"mathqa-cot": { | |
"cot": { | |
"accuracy": 0.628, | |
"n_samples": 1000 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"mawps-cot": { | |
"cot": { | |
"accuracy": 0.9554479418886198, | |
"n_samples": 2065 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"mmlu-stem-cot": { | |
"cot": { | |
"accuracy": 0.6438038436050364, | |
"n_samples": 3018 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"ocw-courses-cot": { | |
"cot": { | |
"accuracy": 0.13970588235294118, | |
"n_samples": 272 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"olympiad-bench-cot": { | |
"cot": { | |
"accuracy": 0.10074074074074074, | |
"n_samples": 675 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"svamp-cot": { | |
"cot": { | |
"accuracy": 0.864, | |
"n_samples": 1000 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
}, | |
"tabmwp-cot": { | |
"cot": { | |
"accuracy": 0.716, | |
"n_samples": 1000 | |
}, | |
"tool": { | |
"n_samples": 0 | |
} | |
} | |
} |