import React, { useState, useEffect } from 'react'; import { chain } from 'lodash'; import './App.css'; const ScoreBar = ({ score, isVanilla = false }) => { if (score === undefined || score === null) return null; const percentage = score <= 1 ? score * 100 : score; const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green) const backgroundColor = `hsl(${hue}, 80%, 50%)`; const className = isVanilla ? "vanilla-bar" : "score-bar"; return (

{!isVanilla && ( {percentage.toFixed(1)}% )}

); }; const App = () => { const [allData, setAllData] = useState([]); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' }); const [searchQuery, setSearchQuery] = useState(''); const [showVanilla, setShowVanilla] = useState(true); const [showToolCalling, setShowToolCalling] = useState(false); useEffect(() => { const fetchData = async () => { try { setLoading(true); // Fetch all data from API const response = await fetch('https://smolagents-smolagents-leaderboard.hf.space/api/results'); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } const jsonData = await response.json(); setAllData(jsonData); } catch (err) { console.error('Error fetching data:', err); setError(err.message); } finally { setLoading(false); } }; fetchData(); }, []); const handleSort = (key) => { const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc'; setSortConfig({ key, direction }); }; const getFilteredData = () => { const validActionTypes = ['tool-calling', 'tool_calling', 'code']; return allData.filter(item => validActionTypes.includes(item.agent_action_type)); }; // Get vanilla score for a model const getVanillaScore = (modelId, metric) => { const vanillaEntry = allData.find(item => item.model_id === modelId && item.agent_action_type === 'vanilla' ); return vanillaEntry?.scores[metric]; }; const filteredAndSortedData = chain(getFilteredData()) .filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase())) .orderBy( [item => { if (sortConfig.key === 'model') { return item.model_id; } return item.scores[sortConfig.key] || 0; }], [sortConfig.direction] ) .value(); if (loading) return

Loading benchmark results...

; if (error) return

Error: {error}

; return (

Smolagents LLM Leaderboard

How do different LLMs compare for powering agents?

Uses smolagents with smolagents benchmark.

Models marked with "JSON" are ran on a ToolCallingAgent (for proprietary models, this uses their provider's built-in tool calling modes) - others are using CodeAgent.

setSearchQuery(e.target.value)} />

setShowVanilla(!showVanilla)} /> Show Vanilla Scores

{["Average", "GAIA", "MATH", "SimpleQA"].map(benchmark => ( ))} {filteredAndSortedData.map((item, index) => { const displayModelId = item.agent_action_type === "tool-calling" || item.agent_action_type === "tool_calling" ? `${item.model_id} - JSON` : item.model_id; return ( {["Average", "GAIA", "MATH", "SimpleQA"].map(metric => ( ))} ); })}

handleSort('model')}> Model {sortConfig.key === 'model' && ( sortConfig.direction === 'desc' ? '↓' : '↑' )}	handleSort(benchmark)}> {benchmark === "Average" ? benchmark : benchmark + ` subset`} {sortConfig.key === benchmark && ( sortConfig.direction === 'desc' ? '↓' : '↑' )}
{displayModelId} {showVanilla && ( getVanillaScore(item.model_id, "Average") !== undefined && ( Vanilla score below ) )}	{showVanilla && getVanillaScore(item.model_id, metric) !== undefined && ( )}

Hugging Face smolagents 2025

); }; export default App;