{ "gpt-4o": { "display_name": "gpt 4o", "provider": "openai", "open": false, "size": "?B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T10-14-16.106571" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T10-14-16.106571" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T10-14-16.106571" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T10-14-16.106571" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": [ "prompt_level_strict_acc" ], "tags": { "latest": "2025-02-26T10-14-16.106571" } } } }, "claude-3-7-sonnet-20250219": { "display_name": "Claude 3.7 Sonnet", "provider": "anthropic", "open": false, "size": "?B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": [ "extractive_match" ], "tags": { "default": "2025-02-25T14-35-15.137825", "thinking": "2025-03-05T10-14-44.802711" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": [ "extractive_match" ], "tags": { "default": "2025-02-25T12-43-49.294245", "thinking": "2025-03-05T15-37-37.180318" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": [ "extractive_match" ], "tags": { "default": "2025-02-25T12-37-52.771787", "thinking": "2025-03-05T12-39-13.627801" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": [ "extractive_match" ], "tags": { "default": "2025-02-25T12-37-52.771787", "thinking": "2025-03-05T12-39-13.627801" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": [ "prompt_level_strict_acc" ], "tags": { "default": "2025-02-25T12-24-45.750753", "thinking": "2025-03-05T15-37-37.180318" } } } }, "o3-mini-2025-01-31": { "display_name": "o3-mini", "provider": "openai", "open": false, "size": "?B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T11-37-01.193437" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T11-37-01.193437" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T11-37-01.193437" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025-02-26T11-37-01.193437" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": [ "prompt_level_strict_acc" ], "tags": { "latest": "2025-02-26T11-37-01.193437" } } } }, "moonshotai/Moonlight-16B-A3B-Instruct": { "display_name": "Moonlight", "provider": "moonshotai", "open": true, "size": "16B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025_02_26T13_32_06.104265" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025_02_26T13_32_06.104265" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025_02_26T13_32_06.104265" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": [ "extractive_match" ], "tags": { "latest": "2025_02_26T13_32_06.104265" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": [ "prompt_level_strict_acc" ], "tags": { "latest": "2025_02_26T13_32_06.104265" } } } }, "meta-llama/Llama-3.3-70B-Instruct": { "display_name": "Llama 3.3 70B", "provider": "meta", "open": true, "size": "70B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-26T17-13-13.448521" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-26T17-13-13.448521" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-26T17-13-13.448521" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-26T17-13-13.448521" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-02-26T17-13-13.448521" } } } }, "deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { "display_name": "DeepSeek Llama 70B", "provider": "deepseek", "open": true, "size": "70B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T11-09-04.037858" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T11-09-04.037858" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T11-09-04.037858" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T11-09-04.037858" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-02-27T14-02-02.414381" } } } }, "qihoo360/TinyR1-32B-Preview": { "display_name": "TinyR1 32B", "provider": "qihoo360", "open": true, "size": "32B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T13-32-41.564652" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T13-32-41.564652" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T13-32-41.564652" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-02-27T13-32-41.564652" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-02-27T13-32-41.564652" } } } }, "openai/gpt-4.5-preview-2025-02-27": { "display_name": "gpt 4.5", "provider": "openai", "open": false, "size": "?B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T11-17-20.767980" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T11-35-34.241611" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T11-15-32.836958" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T11-15-32.836958" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-03T11-17-20.767980" } } } }, "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { "display_name": "DeepSeek Qwen 32B", "provider": "deepseek", "open": true, "size": "32B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T14-51-09.849491" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T14-51-09.849491" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T14-51-09.849491" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-03T14-51-09.849491" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-03T15-06-10.838105" } } } }, "openai/deepseek-ai/DeepSeek-R1": { "display_name": "DeepSeek R1", "provider": "deepseek", "open": true, "size": "671B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-04T17-06-33.124766" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-04T17-06-33.124766" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-04T14-52-35.594174" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-04T14-25-05.009799" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-04T15-24-42.488745" } } } }, "Qwen/QwQ-32B": { "display_name": "QwQ 32B", "provider": "Qwen", "open": true, "size": "32B", "benchmarks": { "math_500": { "subset": "lighteval|math_500|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-10T11-47-46.303371" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-10T11-47-46.303371" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-10T10-36-07.886033" } }, "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-10T10-36-07.886033" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-10T12-21-36.862202" } } } }, "google/gemma-3-1b-it": { "display_name": "Gemma 3", "provider": "google", "open": true, "size": "1B", "benchmarks": { "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T14-25-56.178612" } } } }, "google/gemma-3-12b-it": { "display_name": "Gemma 3 12B", "provider": "google", "open": true, "size": "12B", "benchmarks": { "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T14-36-23.368081" } } } }, "google/gemma-3-27b-it": { "display_name": "Gemma 3 27B", "provider": "google", "open": true, "size": "27B", "benchmarks": { "aime_25": { "subset": "lighteval|aime25|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T14-41-33.181467" } }, "aime_24": { "subset": "lighteval|aime24|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T15-11-34.174477" } }, "ifeval": { "subset": "extended|ifeval|0", "metrics": ["prompt_level_strict_acc"], "tags": { "latest": "2025-03-18T15-20-14.979833" } }, "gpqa_diamond": { "subset": "lighteval|gpqa:diamond|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T15-20-14.979833" } }, "math_500": { "subset": "lighteval|math_500|0", "metrics": ["extractive_match"], "tags": { "latest": "2025-03-18T15-20-14.979833" } } } } }