import React, { useState, useEffect } from 'react';
import { chain } from 'lodash';
import './App.css';
const ScoreBar = ({ score, isVanilla = false }) => {
if (score === undefined || score === null) return null;
const percentage = score <= 1 ? score * 100 : score;
const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green)
const backgroundColor = `hsl(${hue}, 80%, 50%)`;
const className = isVanilla ? "vanilla-bar" : "score-bar";
return (
{!isVanilla && (
{percentage.toFixed(1)}%
)}
);
};
const App = () => {
const [allData, setAllData] = useState([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState(null);
const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' });
const [searchQuery, setSearchQuery] = useState('');
const [showVanilla, setShowVanilla] = useState(true);
const [showToolCalling, setShowToolCalling] = useState(false);
useEffect(() => {
const fetchData = async () => {
try {
setLoading(true);
// Fetch all data from API
const response = await fetch('https://smolagents-smolagents-leaderboard.hf.space/api/results');
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const jsonData = await response.json();
setAllData(jsonData);
} catch (err) {
console.error('Error fetching data:', err);
setError(err.message);
} finally {
setLoading(false);
}
};
fetchData();
}, []);
const handleSort = (key) => {
const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc';
setSortConfig({ key, direction });
};
const getFilteredData = () => {
const validActionTypes = ['tool-calling', 'tool_calling', 'code'];
return allData.filter(item => validActionTypes.includes(item.agent_action_type));
};
// Get vanilla score for a model
const getVanillaScore = (modelId, metric) => {
const vanillaEntry = allData.find(item =>
item.model_id === modelId && item.agent_action_type === 'vanilla'
);
return vanillaEntry?.scores[metric];
};
const filteredAndSortedData = chain(getFilteredData())
.filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase()))
.orderBy(
[item => {
if (sortConfig.key === 'model') {
return item.model_id;
}
return item.scores[sortConfig.key] || 0;
}],
[sortConfig.direction]
)
.value();
if (loading) return Loading benchmark results...
;
if (error) return Error: {error}
;
return (
Smolagents LLM Leaderboard
How do different LLMs compare for powering agents?
Uses smolagents with smolagents benchmark.
Models marked with "JSON" are ran on a ToolCallingAgent (for proprietary models, this uses their provider's built-in tool calling modes) - others are using CodeAgent.
handleSort('model')}>
Model {sortConfig.key === 'model' && (
sortConfig.direction === 'desc' ? '↓' : '↑'
)}
|
{["Average", "GAIA", "MATH", "SimpleQA"].map(benchmark => (
handleSort(benchmark)}>
{benchmark === "Average" ? benchmark : benchmark + ` subset`} {sortConfig.key === benchmark && (
sortConfig.direction === 'desc' ? '↓' : '↑'
)}
|
))}
{filteredAndSortedData.map((item, index) => {
const displayModelId = item.agent_action_type === "tool-calling" || item.agent_action_type === "tool_calling"
? `${item.model_id} - JSON`
: item.model_id;
return (
{displayModelId}
{showVanilla && (
getVanillaScore(item.model_id, "Average") !== undefined && (
Vanilla score below
)
)}
|
{["Average", "GAIA", "MATH", "SimpleQA"].map(metric => (
{showVanilla && getVanillaScore(item.model_id, metric) !== undefined && (
)}
|
))}
);
})}
Hugging Face smolagents 2025
);
};
export default App;