Ritvik
Updated app
d129378
raw
history blame
7 kB
import streamlit as st
from huggingface_hub import HfApi
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
st.set_page_config(page_title="HF Contributions", layout="wide")
api = HfApi()
# Function to fetch commits for a repository (optimized)
def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
try:
# Skip private/gated repos upfront
repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type)
if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated):
return []
commits = api.list_repo_commits(repo_id=repo_id, repo_type=repo_type)
commit_dates = [
pd.to_datetime(commit.created_at).tz_localize(None).date()
for commit in commits
if any(
(isinstance(author, str) and author.lower() == username.lower()) or
(isinstance(author, dict) and "user" in author and author["user"].lower() == username.lower())
for author in commit.authors
) and pd.to_datetime(commit.created_at).year == selected_year
]
return commit_dates
except Exception:
return [] # Silently skip inaccessible or errored repos
# Function to get commit events for a user
def get_commit_events(username, kind=None, selected_year=None):
commit_dates = []
items_with_type = []
kinds = [kind] if kind else ["model", "dataset", "space"]
for k in kinds:
try:
if k == "model":
items = list(api.list_models(author=username))
elif k == "dataset":
items = list(api.list_datasets(author=username))
elif k == "space":
items = list(api.list_spaces(author=username))
else:
items = []
items_with_type.extend((item, k) for item in items)
repo_ids = [item.id for item in items]
# Parallel fetch commits
with ThreadPoolExecutor(max_workers=10) as executor:
future_to_repo = {
executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id
for repo_id in repo_ids
}
for future in as_completed(future_to_repo):
commit_dates.extend(future.result())
except Exception as e:
st.warning(f"Error fetching {k}s for {username}: {str(e)}")
return pd.DataFrame(commit_dates, columns=["date"]), items_with_type
# Calendar heatmap function
def make_calendar_heatmap(df, title, year, color_palette="Greens"):
if df.empty:
st.info(f"No {title.lower()} found for {year}.")
return
df["count"] = 1
df = df.groupby("date").sum().reset_index()
df["date"] = pd.to_datetime(df["date"])
start = pd.Timestamp(f"{year}-01-01")
end = pd.Timestamp(f"{year}-12-31")
all_days = pd.date_range(start=start, end=end)
heatmap_data = pd.DataFrame(index=all_days).assign(count=0)
heatmap_data.loc[df.set_index("date").index, "count"] = df.set_index("date")["count"]
heatmap_data["dow"] = heatmap_data.index.dayofweek
heatmap_data["week"] = ((heatmap_data.index - start).days // 7)
heatmap_data = heatmap_data.reset_index().rename(columns={"index": "date"})
pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0)
month_labels = [d.strftime("%b") for d in pd.date_range(start, end, freq="MS")]
month_positions = [((pd.Timestamp(f"{year}-{i + 1}-01") - start).days // 7) for i in range(12)]
fig, ax = plt.subplots(figsize=(12, 1.2))
sns.heatmap(pivot, ax=ax, cmap=color_palette, linewidths=0.5, linecolor="white", square=True, cbar=False,
yticklabels=["M", "T", "W", "T", "F", "S", "S"])
ax.set_title(f"{title} ({year})", fontsize=12, pad=10)
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_xticks(month_positions)
ax.set_xticklabels(month_labels, fontsize=8)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8)
st.pyplot(fig)
# Sidebar
with st.sidebar:
st.title("πŸ‘€ Contributor")
username = st.selectbox(
"Select or type a username",
options=["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"],
index=0
)
st.markdown("<div style='text-align: center; margin: 10px 0;'>OR</div>", unsafe_allow_html=True)
custom = st.text_input("", placeholder="Enter custom username/org")
if custom.strip():
username = custom.strip()
year_options = list(range(datetime.now().year, 2017, -1))
selected_year = st.selectbox("πŸ—“οΈ Year", options=year_options)
# Main Content
st.title("πŸ€— Hugging Face Contributions")
if username:
with st.spinner("Fetching commit data..."):
all_df, all_items = get_commit_events(username, selected_year=selected_year)
st.subheader(f"{username}'s Activity in {selected_year}")
st.metric("Total Commits", len(all_df))
make_calendar_heatmap(all_df, "All Commits", selected_year)
# Updated Color Scheme Legend with five shades
st.markdown("""
<div style='text-align: center; margin-top: -10px; margin-bottom: 20px;'>
<span style='font-size: 12px; margin-right: 10px;'>Less</span>
<span style='display: inline-block; width: 15px; height: 15px; background-color: #f0f7f0; border: 1px solid #ccc;'></span>
<span style='display: inline-block; width: 15px; height: 15px; background-color: #c6e0c6; border: 1px solid #ccc;'></span>
<span style='display: inline-block; width: 15px; height: 15px; background-color: #77b577; border: 1px solid #ccc;'></span>
<span style='display: inline-block; width: 15px; height: 15px; background-color: #2e6b2e; border: 1px solid #ccc;'></span>
<span style='display: inline-block; width: 15px; height: 15px; background-color: #1a3c1a; border: 1px solid #ccc;'></span>
<span style='font-size: 12px; margin-left: 10px;'>More</span>
</div>
""", unsafe_allow_html=True)
# Metrics and heatmaps for each type
col1, col2, col3 = st.columns(3)
for col, kind, emoji, label in [
(col1, "model", "🧠", "Models"),
(col2, "dataset", "πŸ“¦", "Datasets"),
(col3, "space", "πŸš€", "Spaces")
]:
with col:
df_kind, _ = get_commit_events(username, kind=kind, selected_year=selected_year)
try:
total = len(list(getattr(api, f"list_{kind}s")(author=username)))
except Exception:
total = 0
st.metric(f"{emoji} {label}", total)
st.metric(f"Commits in {selected_year}", len(df_kind))
make_calendar_heatmap(df_kind, f"{label} Commits", selected_year)