Spaces:
Running
Running
| import pandas as pd | |
| import json | |
| import re | |
| # Load the CSV file | |
| leaderboard_df = [] | |
| with open("benchmark_results.csv", "r") as f: | |
| header = f.readline().strip().split(",") | |
| header = [h.strip() for h in header] | |
| for i, line in enumerate(f): | |
| leaderboard_df.append(line.strip().split(",", 13)) | |
| # Load metadata | |
| metadata = json.load(open('metadata.json')) | |
| for k, v in list(metadata.items()): | |
| metadata[k.split(",")[0]] = v | |
| # Create DataFrame | |
| leaderboard_df = pd.DataFrame(leaderboard_df, columns=header) | |
| # Filter and process DataFrame | |
| leaderboard_df = leaderboard_df[(leaderboard_df["Benchmark Version"] == "eq-bench_v2_pl") | ( | |
| leaderboard_df["Benchmark Version"] == 'eq-bench_pl')] | |
| leaderboard_df = leaderboard_df[["Model Path", "Benchmark Score", "Num Questions Parseable", "Error"]] | |
| def parse_parseable(x): | |
| if x["Num Questions Parseable"] == 'FAILED': | |
| m = re.match(r'(\d+)\.0 questions were parseable', x["Error"]) | |
| return m.group(1) | |
| return x["Num Questions Parseable"] | |
| leaderboard_df["Num Questions Parseable"] = leaderboard_df[["Num Questions Parseable", "Error"]].apply( | |
| lambda x: parse_parseable(x), axis=1) | |
| NUMBER_OF_QUESTIONS = 171.0 | |
| def fraction_to_percentage(numerator: float, denominator: float) -> float: | |
| return (numerator / denominator) * 100 | |
| leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].apply(lambda x: fraction_to_percentage(float(x), NUMBER_OF_QUESTIONS)) | |
| def get_params(model_name): | |
| if model_name in metadata: | |
| return metadata[model_name] | |
| else: | |
| print(model_name) | |
| return None | |
| leaderboard_df["Params"] = leaderboard_df["Model Path"].apply(lambda x: get_params(x)) | |
| leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].replace('FAILED', None) | |
| leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float) * ((leaderboard_df["Num Questions Parseable"].astype(float) / 100)) | |
| leaderboard_df.loc[leaderboard_df["Benchmark Score"] < 0, "Benchmark Score"] = 0 | |
| leaderboard_df = leaderboard_df.sort_values(by=["Benchmark Score", "Num Questions Parseable"], ascending=[False, False]) | |
| leaderboard_df = leaderboard_df.rename(columns={"Model Path": "Model", "Num Questions Parseable": "Percentage Questions Parseable"}) | |
| # Generate HTML with DataTables | |
| html = """ | |
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Leaderboard</title> | |
| <link rel="stylesheet" href="https://cdn.datatables.net/1.11.5/css/jquery.dataTables.min.css"> | |
| <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script> | |
| <script src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script> | |
| <style> | |
| body { | |
| font: 90%/1.45em "Helvetica Neue", HelveticaNeue, Verdana, Arial, Helvetica, sans-serif; | |
| margin: 0; | |
| padding: 20px; | |
| color: #333; | |
| background-color: #fff; | |
| } | |
| .numeric-cell { | |
| text-align: right; | |
| padding: 8px !important; | |
| } | |
| </style> | |
| <script> | |
| (function($) { | |
| $.fn.colorize = function(oOptions) { | |
| var settings = $.extend({ | |
| parse: function(e) { | |
| return parseFloat(e.html()); | |
| }, | |
| min: undefined, | |
| max: undefined, | |
| readable: true, | |
| themes: { | |
| "default": { | |
| color_min: "#C80000", | |
| color_mid: "#FFFFFF", | |
| color_max: "#10A54A" | |
| } | |
| }, | |
| theme: "default", | |
| center: undefined, | |
| percent: false | |
| }, oOptions); | |
| function getColor(color1, color2, ratio) { | |
| var hex = function(x) { | |
| x = x.toString(16); | |
| return (x.length == 1) ? '0' + x : x; | |
| } | |
| color1 = (color1.charAt(0) == "#") ? color1.slice(1) : color1 | |
| color2 = (color2.charAt(0) == "#") ? color2.slice(1) : color2 | |
| var r = Math.ceil(parseInt(color1.substring(0,2), 16) * ratio + parseInt(color2.substring(0,2), 16) * (1-ratio)); | |
| var g = Math.ceil(parseInt(color1.substring(2,4), 16) * ratio + parseInt(color2.substring(2,4), 16) * (1-ratio)); | |
| var b = Math.ceil(parseInt(color1.substring(4,6), 16) * ratio + parseInt(color2.substring(4,6), 16) * (1-ratio)); | |
| return "#" + (hex(r) + hex(g) + hex(b)).toUpperCase(); | |
| } | |
| function getContrastYIQ(hexcolor) { | |
| var hex = (hexcolor.charAt(0) == "#") ? hexcolor.slice(1) : hexcolor; | |
| var r = parseInt(hex.substr(0,2),16); | |
| var g = parseInt(hex.substr(2,2),16); | |
| var b = parseInt(hex.substr(4,2),16); | |
| var yiq = ((r*299)+(g*587)+(b*114))/1000; | |
| return (yiq >= 128) ? 'black' : 'white'; | |
| } | |
| var min = settings.min; | |
| var max = settings.max; | |
| if (min === undefined || max === undefined) { | |
| min = Infinity; | |
| max = -Infinity; | |
| this.each(function() { | |
| var value = parseFloat(settings.parse($(this))); | |
| if (!isNaN(value) && isFinite(value)) { | |
| min = Math.min(min, value); | |
| max = Math.max(max, value); | |
| } | |
| }); | |
| } | |
| var center = settings.center !== undefined ? settings.center : (max + min) / 2; | |
| var adj = Math.max(Math.abs(max - center), Math.abs(center - min)); | |
| this.each(function() { | |
| var value = parseFloat(settings.parse($(this))); | |
| if (isNaN(value) || !isFinite(value)) return; | |
| var ratio = (value - center) / adj; | |
| var color1, color2; | |
| if (value < center) { | |
| ratio = Math.abs(ratio); | |
| if (ratio > 1) ratio = 1; | |
| color1 = settings.themes[settings.theme].color_min; | |
| color2 = settings.themes[settings.theme].color_mid; | |
| } else { | |
| ratio = Math.abs(ratio); | |
| if (ratio > 1) ratio = 1; | |
| color1 = settings.themes[settings.theme].color_max; | |
| color2 = settings.themes[settings.theme].color_mid; | |
| } | |
| var color = getColor(color1, color2, ratio); | |
| $(this).css('background-color', color); | |
| if (settings.readable) | |
| $(this).css('color', getContrastYIQ(color)); | |
| }); | |
| return this; | |
| }; | |
| }(jQuery)); | |
| $(document).ready(function() { | |
| // Add custom filtering function | |
| $.fn.dataTable.ext.search.push(function(settings, data, dataIndex) { | |
| var searchValue = $('.dataTables_filter input').val(); | |
| if (!searchValue) return true; | |
| // Split search terms by semicolon and trim whitespace | |
| var searchTerms = searchValue.split(';').map(term => term.trim().toLowerCase()); | |
| var modelName = data[0].toLowerCase(); // Model name is in first column | |
| // Return true if ANY search terms are found in the model name (OR logic) | |
| return searchTerms.some(term => modelName.includes(term)); | |
| }); | |
| // Custom sorting function for benchmark scores | |
| $.fn.dataTable.ext.type.order['score-pre'] = function(data) { | |
| var score = parseFloat(data); | |
| return isNaN(score) ? -Infinity : score; | |
| }; | |
| // Get min/max values for each numeric column before initializing DataTables | |
| var columnRanges = { | |
| 1: { min: Infinity, max: -Infinity }, // Params | |
| 2: { min: Infinity, max: -Infinity }, // Benchmark Score | |
| 3: { min: Infinity, max: -Infinity } // Percentage Questions Parseable | |
| }; | |
| $('#leaderboard tbody td').each(function() { | |
| var columnIdx = $(this).index(); | |
| if (columnIdx in columnRanges) { | |
| var value = parseFloat($(this).text()); | |
| if (!isNaN(value) && isFinite(value)) { | |
| columnRanges[columnIdx].min = Math.min(columnRanges[columnIdx].min, value); | |
| columnRanges[columnIdx].max = Math.max(columnRanges[columnIdx].max, value); | |
| } | |
| } | |
| }); | |
| var table = $('#leaderboard').DataTable({ | |
| "order": [[2, "desc"]], // Sort by Benchmark Score by default | |
| "pageLength": 20, // Show 20 results per page | |
| "lengthMenu": [[10, 20, 50, 100, -1], [10, 20, 50, 100, "All"]], // Update length menu options | |
| "columnDefs": [ | |
| { | |
| "targets": [1], | |
| "className": "numeric-cell" | |
| }, | |
| { | |
| "type": "score", | |
| "targets": [2], // Apply custom sorting to Benchmark Score column | |
| "className": "numeric-cell" | |
| }, | |
| { | |
| "targets": [3], | |
| "className": "numeric-cell" | |
| } | |
| ], | |
| "drawCallback": function() { | |
| // Apply colorization with pre-calculated ranges | |
| $("#leaderboard tbody td:nth-child(2)").colorize({ | |
| parse: function(e) { return parseFloat($(e).text()); }, | |
| min: columnRanges[1].min, | |
| max: columnRanges[1].max, | |
| themes: { | |
| "default": { | |
| color_min: "#10A54A", // White for smaller models | |
| color_mid: "#FFD700", // Gold/yellow for medium models | |
| color_max: "#C80000" // Hot pink for larger models | |
| } | |
| } | |
| }); | |
| $("#leaderboard tbody td:nth-child(3)").colorize({ | |
| parse: function(e) { return parseFloat($(e).text()); }, | |
| min: columnRanges[2].min, | |
| max: columnRanges[2].max, | |
| themes: { | |
| "default": { | |
| color_min: "#C80000", // Red for lower scores | |
| color_mid: "#FFD700", // Gold/yellow for medium scores | |
| color_max: "#10A54A" // Green for higher scores | |
| } | |
| } | |
| }); | |
| $("#leaderboard tbody td:nth-child(4)").colorize({ | |
| parse: function(e) { return parseFloat($(e).text()); }, | |
| min: columnRanges[3].min, | |
| max: columnRanges[3].max, | |
| themes: { | |
| "default": { | |
| color_min: "#C80000", // Red for lower percentages | |
| color_mid: "#FFD700", // Gold/yellow for medium percentages | |
| color_max: "#10A54A" // Green for higher percentages | |
| } | |
| } | |
| }); | |
| }, | |
| // Override the default search behavior | |
| "search": { | |
| "smart": false | |
| }, | |
| // Update search on input change | |
| "initComplete": function() { | |
| var table = this.api(); | |
| $('.dataTables_filter input') | |
| .off() // Remove default binding | |
| .on('input', function() { | |
| table.draw(); | |
| }); | |
| } | |
| }); | |
| }); | |
| </script> | |
| </head> | |
| <body> | |
| <h1>Leaderboard</h1> | |
| <table id="leaderboard" class="display" style="width:100%"> | |
| <thead> | |
| <tr> | |
| <th>Model</th> | |
| <th>Params</th> | |
| <th>Benchmark Score</th> | |
| <th>Percentage Questions Parseable</th> | |
| <th>Error</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| """ | |
| # Add rows to the HTML table | |
| for _, row in leaderboard_df.iterrows(): | |
| html += f""" | |
| <tr> | |
| <td>{row['Model']}</td> | |
| <td>{row['Params']}</td> | |
| <td>{row['Benchmark Score']:.2f}</td> | |
| <td>{row['Percentage Questions Parseable']:.2f}</td> | |
| <td>{row['Error']}</td> | |
| </tr> | |
| """ | |
| # Close the HTML tags | |
| html += """ | |
| </tbody> | |
| </table> | |
| </body> | |
| </html> | |
| """ | |
| # Save the HTML to a file | |
| with open("leaderboard.html", "w") as file: | |
| file.write(html) | |
| print("HTML leaderboard generated and saved as leaderboard.html") | |