Spaces:
Runtime error
Runtime error
{ | |
"gpt-4o": { | |
"display_name": "gpt 4o", | |
"provider": "openai", | |
"open": false, | |
"size": "?B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025-02-26T10-14-16.106571" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025-02-26T10-14-16.106571" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025-02-26T10-14-16.106571" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025-02-26T10-14-16.106571" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": [ | |
"prompt_level_strict_acc" | |
], | |
"tags": { | |
"latest": "2025-02-26T10-14-16.106571" | |
} | |
} | |
} | |
}, | |
"claude-3-7-sonnet-20250219": { | |
"display_name": "Claude 3.7 Sonnet", | |
"provider": "anthropic", | |
"open": false, | |
"size": "?B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"default": "2025-02-25T14-35-15.137825", | |
"thinking": "2025-03-05T10-14-44.802711" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"default": "2025-02-25T12-43-49.294245", | |
"thinking": "2025-03-05T15-37-37.180318" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"default": "2025-02-25T12-37-52.771787", | |
"thinking": "2025-03-05T12-39-13.627801" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"default": "2025-02-25T12-37-52.771787", | |
"thinking": "2025-03-05T12-39-13.627801" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": [ | |
"prompt_level_strict_acc" | |
], | |
"tags": { | |
"default": "2025-02-25T12-24-45.750753", | |
"thinking": "2025-03-05T15-37-37.180318" | |
} | |
} | |
} | |
}, | |
"o3-mini-2025-01-31": { | |
"display_name": "o3-mini", | |
"provider": "openai", | |
"open": false, | |
"size": "?B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025-02-26T11-37-01.193437" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025-02-26T11-37-01.193437" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025-02-26T11-37-01.193437" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025-02-26T11-37-01.193437" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": [ | |
"prompt_level_strict_acc" | |
], | |
"tags": { | |
"latest": "2025-02-26T11-37-01.193437" | |
} | |
} | |
} | |
}, | |
"moonshotai/Moonlight-16B-A3B-Instruct": { | |
"display_name": "Moonlight", | |
"provider": "moonshotai", | |
"open": true, | |
"size": "16B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025_02_26T13_32_06.104265" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025_02_26T13_32_06.104265" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025_02_26T13_32_06.104265" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": [ | |
"extractive_match" | |
], | |
"tags": { | |
"latest": "2025_02_26T13_32_06.104265" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": [ | |
"prompt_level_strict_acc" | |
], | |
"tags": { | |
"latest": "2025_02_26T13_32_06.104265" | |
} | |
} | |
} | |
}, | |
"meta-llama/Llama-3.3-70B-Instruct": { | |
"display_name": "Llama 3.3 70B", | |
"provider": "meta", | |
"open": true, | |
"size": "70B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-26T17-13-13.448521" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-26T17-13-13.448521" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-26T17-13-13.448521" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-26T17-13-13.448521" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": ["prompt_level_strict_acc"], | |
"tags": { | |
"latest": "2025-02-26T17-13-13.448521" | |
} | |
} | |
} | |
}, | |
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { | |
"display_name": "DeepSeek Llama 70B", | |
"provider": "deepseek", | |
"open": true, | |
"size": "70B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-27T11-09-04.037858" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-27T11-09-04.037858" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-27T11-09-04.037858" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-27T11-09-04.037858" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": ["prompt_level_strict_acc"], | |
"tags": { | |
"latest": "2025-02-27T14-02-02.414381" | |
} | |
} | |
} | |
}, | |
"qihoo360/TinyR1-32B-Preview": { | |
"display_name": "TinyR1 32B", | |
"provider": "qihoo360", | |
"open": true, | |
"size": "32B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-27T13-32-41.564652" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-27T13-32-41.564652" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-27T13-32-41.564652" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-02-27T13-32-41.564652" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": ["prompt_level_strict_acc"], | |
"tags": { | |
"latest": "2025-02-27T13-32-41.564652" | |
} | |
} | |
} | |
}, | |
"openai/gpt-4.5-preview-2025-02-27": { | |
"display_name": "gpt 4.5", | |
"provider": "openai", | |
"open": false, | |
"size": "?B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-03T11-17-20.767980" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-03T11-35-34.241611" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-03T11-15-32.836958" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-03T11-15-32.836958" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": ["prompt_level_strict_acc"], | |
"tags": { | |
"latest": "2025-03-03T11-17-20.767980" | |
} | |
} | |
} | |
}, | |
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { | |
"display_name": "DeepSeek Qwen 32B", | |
"provider": "deepseek", | |
"open": true, | |
"size": "32B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-03T14-51-09.849491" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-03T14-51-09.849491" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-03T14-51-09.849491" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-03T14-51-09.849491" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": ["prompt_level_strict_acc"], | |
"tags": { | |
"latest": "2025-03-03T15-06-10.838105" | |
} | |
} | |
} | |
}, | |
"openai/deepseek-ai/DeepSeek-R1": { | |
"display_name": "DeepSeek R1", | |
"provider": "deepseek", | |
"open": true, | |
"size": "671B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-04T17-06-33.124766" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-04T17-06-33.124766" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-04T14-52-35.594174" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-04T14-25-05.009799" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": ["prompt_level_strict_acc"], | |
"tags": { | |
"latest": "2025-03-04T15-24-42.488745" | |
} | |
} | |
} | |
}, | |
"Qwen/QwQ-32B": { | |
"display_name": "QwQ 32B", | |
"provider": "Qwen", | |
"open": true, | |
"size": "32B", | |
"benchmarks": { | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-10T11-47-46.303371" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-10T11-47-46.303371" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-10T10-36-07.886033" | |
} | |
}, | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-10T10-36-07.886033" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": ["prompt_level_strict_acc"], | |
"tags": { | |
"latest": "2025-03-10T12-21-36.862202" | |
} | |
} | |
} | |
}, | |
"google/gemma-3-1b-it": { | |
"display_name": "Gemma 3", | |
"provider": "google", | |
"open": true, | |
"size": "1B", | |
"benchmarks": { | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-18T14-25-56.178612" | |
} | |
} | |
} | |
}, | |
"google/gemma-3-12b-it": { | |
"display_name": "Gemma 3 12B", | |
"provider": "google", | |
"open": true, | |
"size": "12B", | |
"benchmarks": { | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-18T14-36-23.368081" | |
} | |
} | |
} | |
}, | |
"google/gemma-3-27b-it": { | |
"display_name": "Gemma 3 27B", | |
"provider": "google", | |
"open": true, | |
"size": "27B", | |
"benchmarks": { | |
"aime_25": { | |
"subset": "lighteval|aime25|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-18T14-41-33.181467" | |
} | |
}, | |
"aime_24": { | |
"subset": "lighteval|aime24|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-18T15-11-34.174477" | |
} | |
}, | |
"ifeval": { | |
"subset": "extended|ifeval|0", | |
"metrics": ["prompt_level_strict_acc"], | |
"tags": { | |
"latest": "2025-03-18T15-20-14.979833" | |
} | |
}, | |
"gpqa_diamond": { | |
"subset": "lighteval|gpqa:diamond|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-18T15-20-14.979833" | |
} | |
}, | |
"math_500": { | |
"subset": "lighteval|math_500|0", | |
"metrics": ["extractive_match"], | |
"tags": { | |
"latest": "2025-03-18T15-20-14.979833" | |
} | |
} | |
} | |
} | |
} |