|
|
|
|
|
import streamlit as st |
|
import pandas as pd |
|
import plotly.express as px |
|
import plotly.graph_objects as go |
|
from huggingface_hub import HfApi, model_info |
|
import time |
|
import re |
|
import os |
|
import json |
|
import signal |
|
from contextlib import contextmanager |
|
import numpy as np |
|
from functools import partial |
|
import gc |
|
import sys |
|
import fnmatch |
|
|
|
|
|
st.set_page_config( |
|
page_title="Quantized Model Comparison", |
|
page_icon="📊", |
|
layout="wide", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
|
|
@contextmanager |
|
def timeout(time_seconds=60): |
|
def signal_handler(signum, frame): |
|
raise TimeoutError("Timed out!") |
|
|
|
signal.signal(signal.SIGALRM, signal_handler) |
|
signal.alarm(time_seconds) |
|
try: |
|
yield |
|
finally: |
|
signal.alarm(0) |
|
|
|
|
|
QUANTIZATION_KEYWORDS = [ |
|
"auto_round", "auto-round", "autoround", |
|
"autogptq", "auto_gptq", "auto-gptq", |
|
"autoawq", "auto_awq", "auto-awq" |
|
] |
|
|
|
|
|
def read_models_ignore_file(file_path=".modelsignore"): |
|
""" |
|
Read the .modelsignore file and return a list of patterns to ignore. |
|
Each line in the file represents a pattern. |
|
""" |
|
ignore_patterns = [] |
|
try: |
|
with open(file_path, 'r') as f: |
|
for line in f: |
|
|
|
line = line.strip() |
|
if line and not line.startswith('#'): |
|
ignore_patterns.append(line) |
|
return ignore_patterns |
|
except FileNotFoundError: |
|
return [] |
|
|
|
|
|
def should_ignore_model(model_id, ignore_patterns): |
|
""" |
|
Check if a model ID matches any pattern in the ignore list. |
|
|
|
Supports: |
|
- Exact matches |
|
- Glob patterns with wildcards (e.g., "*mistral*") |
|
""" |
|
if not ignore_patterns: |
|
return False |
|
|
|
for pattern in ignore_patterns: |
|
|
|
if fnmatch.fnmatch(model_id.lower(), pattern.lower()): |
|
return True |
|
|
|
return False |
|
|
|
|
|
@st.cache_data(ttl=3600) |
|
def get_user_models(username, ignore_patterns=None): |
|
api = HfApi() |
|
try: |
|
all_models = list(api.list_models(author=username)) |
|
model_ids = [model.id for model in all_models] |
|
|
|
|
|
if ignore_patterns: |
|
filtered_models = [] |
|
ignored_models = [] |
|
|
|
for model_id in model_ids: |
|
if should_ignore_model(model_id, ignore_patterns): |
|
ignored_models.append(model_id) |
|
else: |
|
filtered_models.append(model_id) |
|
|
|
return filtered_models, ignored_models, len(all_models) |
|
|
|
return model_ids, [], len(all_models) |
|
except Exception as e: |
|
st.error(f"Error fetching models: {str(e)}") |
|
return [], [], 0 |
|
|
|
|
|
@st.cache_data(ttl=3600) |
|
def get_model_metadata(model_id): |
|
try: |
|
api = HfApi() |
|
model_meta = model_info(repo_id=model_id) |
|
return model_meta |
|
except Exception as e: |
|
st.warning(f"Failed to fetch metadata for {model_id}: {str(e)}") |
|
return None |
|
|
|
|
|
def model_matches_keywords(model_id): |
|
model_name = model_id.lower() |
|
return any(keyword.lower() in model_name for keyword in QUANTIZATION_KEYWORDS) |
|
|
|
|
|
def extract_quantization_method(model_id): |
|
model_name = model_id.lower() |
|
|
|
if any(kw in model_name for kw in ["auto_round", "auto-round", "autoround"]): |
|
return "Intel AutoRound" |
|
elif any(kw in model_name for kw in ["autogptq", "auto_gptq", "auto-gptq"]): |
|
return "AutoGPTQ" |
|
elif any(kw in model_name for kw in ["autoawq", "auto_awq", "auto-awq"]): |
|
return "AutoAWQ" |
|
else: |
|
return "Unknown" |
|
|
|
|
|
def extract_model_metadata(model_id, repo_metadata=None): |
|
model_name = model_id.split("/")[-1] |
|
|
|
|
|
quant_method = extract_quantization_method(model_id) |
|
|
|
|
|
precision = "Unknown" |
|
if "int8" in model_name.lower(): |
|
precision = "INT8" |
|
elif "int4" in model_name.lower(): |
|
precision = "INT4" |
|
elif "fp16" in model_name.lower(): |
|
precision = "FP16" |
|
elif "fp32" in model_name.lower(): |
|
precision = "FP32" |
|
|
|
|
|
group_size = None |
|
gs_match = re.search(r'gs(\d+)', model_name.lower()) |
|
if gs_match: |
|
group_size = int(gs_match.group(1)) |
|
|
|
|
|
size_patterns = [r'(\d+(\.\d+)?)b', r'(\d+(\.\d+)?)m'] |
|
model_size = None |
|
|
|
for pattern in size_patterns: |
|
match = re.search(pattern, model_name.lower()) |
|
if match: |
|
size = float(match.group(1)) |
|
unit = match.group(0)[-1].lower() |
|
if unit == 'b': |
|
model_size = size |
|
elif unit == 'm': |
|
model_size = size / 1000 |
|
break |
|
|
|
|
|
base_model = re.sub(r'[-_]?(auto_?round|auto_?gptq|auto_?awq|intel)[-_]?', '', model_name, flags=re.IGNORECASE) |
|
base_model = re.sub(r'[-_]?(int4|int8|fp16|fp32)[-_]?', '', base_model, flags=re.IGNORECASE) |
|
base_model = re.sub(r'[-_]?gs\d+[-_]?', '', base_model, flags=re.IGNORECASE) |
|
|
|
|
|
downloads = None |
|
likes = None |
|
last_modified = None |
|
library_name = None |
|
model_tags = [] |
|
|
|
if repo_metadata: |
|
downloads = repo_metadata.downloads |
|
likes = repo_metadata.likes |
|
last_modified = repo_metadata.last_modified |
|
|
|
|
|
if hasattr(repo_metadata, "tags") and repo_metadata.tags: |
|
model_tags = repo_metadata.tags |
|
|
|
library_mapping = { |
|
"autoawq": "AutoAWQ", |
|
"gptq": "AutoGPTQ", |
|
"autogptq": "AutoGPTQ", |
|
"auto-gptq": "AutoGPTQ", |
|
"awq": "AutoAWQ", |
|
"quantization": "Quantized", |
|
"quantized": "Quantized", |
|
"intel": "Intel", |
|
"auto-round": "Intel AutoRound", |
|
"autoround": "Intel AutoRound" |
|
} |
|
|
|
for tag in model_tags: |
|
if tag.lower() in library_mapping: |
|
library_name = library_mapping[tag.lower()] |
|
break |
|
|
|
|
|
if not library_name: |
|
library_name = quant_method |
|
|
|
return { |
|
"model_name": model_name, |
|
"base_model": base_model, |
|
"quant_method": quant_method, |
|
"precision": precision, |
|
"group_size": group_size, |
|
"model_size": model_size, |
|
"downloads": downloads, |
|
"likes": likes, |
|
"last_modified": last_modified, |
|
"library": library_name, |
|
"tags": model_tags |
|
} |
|
|
|
|
|
@st.cache_data(ttl=3600) |
|
def get_model_stats(model_id): |
|
try: |
|
api = HfApi() |
|
sibling_files = api.list_repo_files(repo_id=model_id) |
|
|
|
|
|
config_file = None |
|
for file in sibling_files: |
|
if file.endswith("config.json") or file == "config.json": |
|
config_file = file |
|
break |
|
|
|
if config_file: |
|
|
|
config_content = api.hf_hub_download(repo_id=model_id, filename=config_file) |
|
|
|
with open(config_content, 'r') as f: |
|
config = json.load(f) |
|
|
|
|
|
stats = {} |
|
|
|
|
|
if "hidden_size" in config: |
|
stats["hidden_size"] = config["hidden_size"] |
|
|
|
|
|
if "vocab_size" in config: |
|
stats["vocab_size"] = config["vocab_size"] |
|
|
|
|
|
for key in ["num_hidden_layers", "n_layer", "num_layers"]: |
|
if key in config: |
|
stats["num_layers"] = config[key] |
|
break |
|
|
|
|
|
if "num_attention_heads" in config: |
|
stats["num_attention_heads"] = config["num_attention_heads"] |
|
|
|
|
|
for key in ["max_position_embeddings", "n_positions", "max_seq_len"]: |
|
if key in config: |
|
stats["max_seq_len"] = config[key] |
|
break |
|
|
|
return stats |
|
|
|
return {} |
|
except Exception as e: |
|
st.warning(f"Failed to fetch stats for {model_id}: {str(e)}") |
|
return {} |
|
|
|
|
|
def estimate_model_size_from_files(model_id): |
|
try: |
|
api = HfApi() |
|
sibling_files = list(api.list_repo_files(repo_id=model_id)) |
|
|
|
|
|
model_files = [f for f in sibling_files if f.endswith('.bin') or f.endswith('.safetensors')] |
|
|
|
total_size = 0 |
|
for file in model_files: |
|
file_info = api.hf_hub_file_info(repo_id=model_id, filename=file) |
|
total_size += file_info.size |
|
|
|
|
|
size_gb = total_size / (1024 ** 3) |
|
return size_gb |
|
except Exception as e: |
|
st.warning(f"Failed to estimate size for {model_id}: {str(e)}") |
|
return None |
|
|
|
|
|
def main(): |
|
st.title("🔍 Quantized Model Comparison Tool") |
|
st.write("Compare Intel AutoRound, AutoGPTQ, and AutoAWQ models") |
|
|
|
|
|
ignore_patterns = read_models_ignore_file() |
|
|
|
|
|
st.sidebar.header("Configuration") |
|
username = st.sidebar.text_input("HuggingFace Username", "fbaldassarri") |
|
|
|
|
|
if ignore_patterns: |
|
st.sidebar.info(f"Warning: file .modelsignore is not empty, {len(ignore_patterns)} models manually ignored:") |
|
if st.sidebar.expander("Show ignored patterns"): |
|
for pattern in ignore_patterns: |
|
st.sidebar.code(pattern) |
|
else: |
|
st.sidebar.info("No .modelsignore file found. All models will be included.") |
|
|
|
|
|
with st.spinner("Fetching models..."): |
|
all_model_ids, ignored_models, total_models = get_user_models(username, ignore_patterns) |
|
|
|
|
|
if ignored_models: |
|
st.sidebar.warning(f"{len(ignored_models)} models ignored based on .modelsignore patterns") |
|
with st.sidebar.expander("Show ignored models"): |
|
for ignored in ignored_models: |
|
st.sidebar.text(ignored) |
|
|
|
|
|
quantized_model_ids = [model_id for model_id in all_model_ids if model_matches_keywords(model_id)] |
|
|
|
st.sidebar.write(f"Found {len(quantized_model_ids)} quantized models out of {total_models} total models") |
|
|
|
|
|
quant_methods = ["Intel AutoRound", "AutoGPTQ", "AutoAWQ"] |
|
selected_quant_methods = st.sidebar.multiselect( |
|
"Filter by quantization method", |
|
options=quant_methods, |
|
default=quant_methods |
|
) |
|
|
|
|
|
additional_filter = st.sidebar.text_input("Additional model name filter", "") |
|
|
|
|
|
filtered_models = [] |
|
for model_id in quantized_model_ids: |
|
quant_method = extract_quantization_method(model_id) |
|
if quant_method in selected_quant_methods: |
|
if additional_filter.lower() in model_id.lower() or not additional_filter: |
|
filtered_models.append(model_id) |
|
|
|
|
|
model_groups = {} |
|
for model_id in filtered_models: |
|
metadata = extract_model_metadata(model_id) |
|
base_model = metadata["base_model"] |
|
if base_model not in model_groups: |
|
model_groups[base_model] = [] |
|
model_groups[base_model].append(model_id) |
|
|
|
|
|
base_model_options = list(model_groups.keys()) |
|
base_model_options.sort() |
|
|
|
selected_base_model = st.sidebar.selectbox( |
|
"Select base model to compare", |
|
options=["All"] + base_model_options |
|
) |
|
|
|
|
|
if selected_base_model == "All": |
|
model_selection_options = filtered_models |
|
else: |
|
model_selection_options = model_groups[selected_base_model] |
|
|
|
|
|
max_models_comparison = st.sidebar.slider("Maximum models to compare", 2, len(quantized_model_ids), 5) |
|
default_models = model_selection_options[:min(max_models_comparison, len(model_selection_options))] |
|
|
|
selected_models = st.sidebar.multiselect( |
|
"Select models to compare", |
|
options=model_selection_options, |
|
default=default_models |
|
) |
|
|
|
|
|
if len(selected_models) > max_models_comparison: |
|
st.warning(f"⚠️ Limited to {max_models_comparison} models for comparison (CPU constraints)") |
|
selected_models = selected_models[:max_models_comparison] |
|
|
|
|
|
st.sidebar.header("Comparison Method") |
|
|
|
compare_method = st.sidebar.radio( |
|
"Choose comparison method", |
|
["Metadata Comparison Only", "Metadata + Estimated Size"] |
|
) |
|
|
|
if st.button("Run Comparison") and selected_models: |
|
|
|
progress_bar = st.progress(0) |
|
status_text = st.empty() |
|
|
|
results = [] |
|
|
|
|
|
for i, model_id in enumerate(selected_models): |
|
status_text.text(f"Analyzing {model_id} ({i+1}/{len(selected_models)})") |
|
|
|
|
|
repo_meta = get_model_metadata(model_id) |
|
|
|
|
|
metadata = extract_model_metadata(model_id, repo_meta) |
|
model_result = metadata.copy() |
|
|
|
|
|
model_stats = get_model_stats(model_id) |
|
model_result.update(model_stats) |
|
|
|
|
|
if compare_method == "Metadata + Estimated Size": |
|
with st.spinner(f"Estimating size for {model_id}..."): |
|
try: |
|
estimated_size = estimate_model_size_from_files(model_id) |
|
model_result["estimated_size_gb"] = estimated_size |
|
except Exception as e: |
|
st.warning(f"Size estimation failed for {model_id}: {str(e)}") |
|
|
|
|
|
results.append(model_result) |
|
|
|
|
|
progress_bar.progress((i + 1) / len(selected_models)) |
|
|
|
|
|
progress_bar.empty() |
|
status_text.empty() |
|
|
|
|
|
if results: |
|
|
|
results_df = pd.DataFrame(results) |
|
|
|
|
|
if "last_modified" in results_df.columns: |
|
|
|
results_df["last_modified"] = pd.to_datetime(results_df["last_modified"]) |
|
|
|
|
|
|
|
now_utc = pd.Timestamp.now(tz='UTC') |
|
|
|
|
|
if results_df["last_modified"].dt.tz is None: |
|
|
|
results_df["last_modified"] = results_df["last_modified"].dt.tz_localize('UTC') |
|
|
|
|
|
results_df["days_since_update"] = (now_utc - results_df["last_modified"]).dt.days |
|
|
|
|
|
if "quant_method" in results_df.columns and "model_name" in results_df.columns: |
|
results_df = results_df.sort_values(["quant_method", "model_name"]) |
|
|
|
|
|
results_tabs = st.tabs(["Model Comparison", "Model Details", "Visualizations"]) |
|
|
|
with results_tabs[0]: |
|
st.subheader("Model Comparison") |
|
|
|
|
|
basic_cols = ["model_name", "quant_method", "precision", "group_size"] |
|
|
|
size_cols = [] |
|
if "model_size" in results_df.columns: |
|
size_cols.append("model_size") |
|
if "estimated_size_gb" in results_df.columns: |
|
size_cols.append("estimated_size_gb") |
|
|
|
arch_cols = [] |
|
for col in ["num_layers", "hidden_size", "num_attention_heads", "max_seq_len"]: |
|
if col in results_df.columns: |
|
arch_cols.append(col) |
|
|
|
stats_cols = [] |
|
for col in ["downloads", "likes", "days_since_update"]: |
|
if col in results_df.columns: |
|
stats_cols.append(col) |
|
|
|
|
|
display_cols = basic_cols + size_cols + arch_cols + stats_cols |
|
display_df = results_df[display_cols].copy() |
|
|
|
|
|
if "estimated_size_gb" in display_df.columns: |
|
display_df["estimated_size_gb"] = display_df["estimated_size_gb"].apply( |
|
lambda x: f"{x:.2f} GB" if pd.notna(x) else "Unknown" |
|
) |
|
|
|
if "model_size" in display_df.columns: |
|
display_df["model_size"] = display_df["model_size"].apply( |
|
lambda x: f"{x:.2f}B" if pd.notna(x) else "Unknown" |
|
) |
|
|
|
|
|
st.dataframe(display_df) |
|
|
|
with results_tabs[1]: |
|
st.subheader("Detailed Model Information") |
|
|
|
|
|
model_tabs = st.tabs([m.split("/")[-1] for m in selected_models]) |
|
|
|
for i, model_id in enumerate(selected_models): |
|
with model_tabs[i]: |
|
|
|
model_row = results_df[results_df["model_name"] == model_id.split("/")[-1]].iloc[0] |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.markdown("#### Model Information") |
|
st.markdown(f"**Repository:** {model_id}") |
|
st.markdown(f"**Base Model:** {model_row.get('base_model', 'Unknown')}") |
|
st.markdown(f"**Quantization:** {model_row.get('quant_method', 'Unknown')}") |
|
st.markdown(f"**Precision:** {model_row.get('precision', 'Unknown')}") |
|
|
|
if "group_size" in model_row and pd.notna(model_row["group_size"]): |
|
st.markdown(f"**Group Size:** {int(model_row['group_size'])}") |
|
|
|
if "estimated_size_gb" in model_row and pd.notna(model_row["estimated_size_gb"]): |
|
st.markdown(f"**Model Size:** {model_row['estimated_size_gb']:.2f} GB") |
|
|
|
with col2: |
|
st.markdown("#### Architecture Details") |
|
|
|
for col in ["hidden_size", "num_layers", "num_attention_heads", "max_seq_len", "vocab_size"]: |
|
if col in model_row and pd.notna(model_row[col]): |
|
st.markdown(f"**{col.replace('_', ' ').title()}:** {int(model_row[col])}") |
|
|
|
|
|
st.markdown("#### Repository Statistics") |
|
stat_cols = st.columns(3) |
|
|
|
with stat_cols[0]: |
|
if "downloads" in model_row and pd.notna(model_row["downloads"]): |
|
st.metric("Downloads", f"{int(model_row['downloads']):,}") |
|
|
|
with stat_cols[1]: |
|
if "likes" in model_row and pd.notna(model_row["likes"]): |
|
st.metric("Likes", f"{int(model_row['likes']):,}") |
|
|
|
with stat_cols[2]: |
|
if "days_since_update" in model_row and pd.notna(model_row["days_since_update"]): |
|
st.metric("Days Since Update", f"{int(model_row['days_since_update'])}") |
|
|
|
|
|
if "tags" in model_row and model_row["tags"]: |
|
st.markdown("#### Model Tags") |
|
tags_html = " ".join([f"<span style='background-color: #eee; padding: 0.2rem 0.5rem; border-radius: 0.5rem; margin-right: 0.5rem;'>{tag}</span>" for tag in model_row["tags"]]) |
|
st.markdown(tags_html, unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(f"[View on HuggingFace 🤗]({'https://huggingface.co/' + model_id})") |
|
|
|
with results_tabs[2]: |
|
st.subheader("Visualizations") |
|
|
|
viz_tabs = st.tabs(["Quantization Methods", "Model Architecture", "Repository Stats"]) |
|
|
|
with viz_tabs[0]: |
|
|
|
if "quant_method" in results_df.columns: |
|
method_counts = results_df["quant_method"].value_counts().reset_index() |
|
method_counts.columns = ["Method", "Count"] |
|
|
|
fig = px.pie( |
|
method_counts, |
|
names="Method", |
|
values="Count", |
|
title="Distribution of Quantization Methods", |
|
color="Method", |
|
color_discrete_map={ |
|
"Intel AutoRound": "#0071c5", |
|
"AutoGPTQ": "#ff4b4b", |
|
"AutoAWQ": "#1e88e5" |
|
} |
|
) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
if "precision" in results_df.columns: |
|
precision_counts = results_df["precision"].value_counts().reset_index() |
|
precision_counts.columns = ["Precision", "Count"] |
|
|
|
fig = px.bar( |
|
precision_counts, |
|
x="Precision", |
|
y="Count", |
|
title="Distribution of Precision Formats", |
|
color="Precision" |
|
) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
if "group_size" in results_df.columns and results_df["group_size"].notna().any(): |
|
valid_gs_data = results_df[results_df["group_size"].notna()] |
|
gs_counts = valid_gs_data["group_size"].value_counts().reset_index() |
|
gs_counts.columns = ["Group Size", "Count"] |
|
|
|
fig = px.bar( |
|
gs_counts, |
|
x="Group Size", |
|
y="Count", |
|
title="Distribution of Group Sizes", |
|
color="Group Size" |
|
) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
with viz_tabs[1]: |
|
|
|
if "estimated_size_gb" in results_df.columns and results_df["estimated_size_gb"].notna().any(): |
|
valid_size_data = results_df[results_df["estimated_size_gb"].notna()].sort_values("estimated_size_gb") |
|
|
|
fig = px.bar( |
|
valid_size_data, |
|
x="model_name", |
|
y="estimated_size_gb", |
|
color="quant_method", |
|
title="Model Size Comparison (GB)", |
|
labels={"estimated_size_gb": "Size (GB)", "model_name": "Model", "quant_method": "Method"} |
|
) |
|
fig.update_layout(xaxis_tickangle=-45) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
for arch_col in ["num_layers", "hidden_size", "num_attention_heads"]: |
|
if arch_col in results_df.columns and results_df[arch_col].notna().any(): |
|
valid_data = results_df[results_df[arch_col].notna()].sort_values(arch_col) |
|
|
|
fig = px.bar( |
|
valid_data, |
|
x="model_name", |
|
y=arch_col, |
|
color="quant_method", |
|
title=f"{arch_col.replace('_', ' ').title()} Comparison", |
|
labels={arch_col: arch_col.replace('_', ' ').title(), "model_name": "Model", "quant_method": "Method"} |
|
) |
|
fig.update_layout(xaxis_tickangle=-45) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
with viz_tabs[2]: |
|
|
|
if "downloads" in results_df.columns and results_df["downloads"].notna().any(): |
|
valid_data = results_df[results_df["downloads"].notna()].sort_values("downloads", ascending=False) |
|
|
|
fig = px.bar( |
|
valid_data, |
|
x="model_name", |
|
y="downloads", |
|
color="quant_method", |
|
title="Downloads Comparison", |
|
labels={"downloads": "Downloads", "model_name": "Model", "quant_method": "Method"} |
|
) |
|
fig.update_layout(xaxis_tickangle=-45) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
if "likes" in results_df.columns and results_df["likes"].notna().any(): |
|
valid_data = results_df[results_df["likes"].notna()].sort_values("likes", ascending=False) |
|
|
|
fig = px.bar( |
|
valid_data, |
|
x="model_name", |
|
y="likes", |
|
color="quant_method", |
|
title="Likes Comparison", |
|
labels={"likes": "Likes", "model_name": "Model", "quant_method": "Method"} |
|
) |
|
fig.update_layout(xaxis_tickangle=-45) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
if "days_since_update" in results_df.columns and results_df["days_since_update"].notna().any(): |
|
valid_data = results_df[results_df["days_since_update"].notna()].sort_values("days_since_update") |
|
|
|
fig = px.bar( |
|
valid_data, |
|
x="model_name", |
|
y="days_since_update", |
|
color="quant_method", |
|
title="Days Since Last Update", |
|
labels={"days_since_update": "Days", "model_name": "Model", "quant_method": "Method"} |
|
) |
|
fig.update_layout(xaxis_tickangle=-45) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
st.subheader("Export Results") |
|
|
|
|
|
export_df = results_df.copy() |
|
if "last_modified" in export_df.columns: |
|
|
|
export_df["last_modified"] = export_df["last_modified"].dt.strftime('%Y-%m-%d %H:%M:%S') |
|
|
|
csv_data = export_df.to_csv(index=False) |
|
|
|
st.download_button( |
|
"Download Results as CSV", |
|
data=csv_data, |
|
file_name=f"quantized_model_comparison_{username}_{time.strftime('%Y%m%d_%H%M')}.csv", |
|
mime="text/csv" |
|
) |
|
else: |
|
st.warning("No results were obtained. Please check for errors and try again.") |
|
|
|
|
|
if not st.session_state.get('comparison_run', False): |
|
st.info(""" |
|
## CPU-Optimized Model Comparison |
|
|
|
This tool is designed to compare your quantized models without requiring GPU resources, making it suitable for the free tier HuggingFace Space. |
|
|
|
### Features: |
|
|
|
- **Metadata Analysis**: Compare model architectures without loading models |
|
- **Repository Stats**: View downloads, likes, and update frequency |
|
- **Visualization**: Compare models across multiple dimensions |
|
- **Filtering**: Focus on specific quantization methods or model families |
|
- **Model Ignoring**: Use .modelsignore file to exclude specific models |
|
|
|
### Supported Quantization Methods: |
|
|
|
- **Intel AutoRound**: Intel's quantization solution |
|
- **AutoGPTQ**: Automatic GPTQ quantization |
|
- **AutoAWQ**: Activation-aware weight quantization |
|
|
|
### Instructions: |
|
|
|
1. Create a .modelsignore file to exclude models (optional) |
|
2. Select models using the sidebar filters |
|
3. Click "Run Comparison" to analyze without loading full models |
|
4. View results in the tabs and charts |
|
5. Download results as CSV for further analysis |
|
|
|
### .modelsignore Format: |
|
|
|
Add one pattern per line to ignore specific models: |
|
``` |
|
# Comments start with # |
|
fbaldassarri/llama-2-7b-* # Ignores all llama-2-7b models |
|
*mistral* # Ignores anything with "mistral" in the name |
|
fbaldassarri/exact-model-name # Ignores a specific model |
|
``` |
|
""") |
|
|
|
|
|
with st.expander("How to use .modelsignore file"): |
|
st.markdown(""" |
|
### .modelsignore File Format |
|
|
|
Create a file named `.modelsignore` in the same directory as app.py. Each line in this file represents a pattern for models to exclude from comparison. |
|
|
|
#### Pattern Format: |
|
- **Exact match**: `fbaldassarri/model-name` |
|
- **Wildcard match**: `*keyword*` (matches any model with "keyword" in the name) |
|
- **Prefix match**: `fbaldassarri/prefix*` (matches models starting with "prefix") |
|
- **Suffix match**: `*suffix` (matches models ending with "suffix") |
|
|
|
#### Example .modelsignore file: |
|
``` |
|
# Comments start with # |
|
|
|
# Ignore specific models |
|
fbaldassarri/llama-2-7b-auto-gptq |
|
|
|
# Ignore all models containing "phi" and "3b" |
|
*phi*3b* |
|
|
|
# Ignore all models starting with "gemma-" |
|
fbaldassarri/gemma-* |
|
|
|
# Ignore all mistral models |
|
*mistral* |
|
``` |
|
|
|
The tool will read this file at startup and filter out any matching models before analysis. |
|
""") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|