import streamlit as st from huggingface_hub import HfApi import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime from concurrent.futures import ThreadPoolExecutor, as_completed st.set_page_config(page_title="HF Contributions", layout="wide") api = HfApi() # Function to fetch commits for a repository (optimized) def fetch_commits_for_repo(repo_id, repo_type, username, selected_year): try: # Skip private/gated repos upfront repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type) if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated): return [] commits = api.list_repo_commits(repo_id=repo_id, repo_type=repo_type) commit_dates = [ pd.to_datetime(commit.created_at).tz_localize(None).date() for commit in commits if any( (isinstance(author, str) and author.lower() == username.lower()) or (isinstance(author, dict) and "user" in author and author["user"].lower() == username.lower()) for author in commit.authors ) and pd.to_datetime(commit.created_at).year == selected_year ] return commit_dates except Exception: return [] # Silently skip inaccessible or errored repos # Function to get commit events for a user def get_commit_events(username, kind=None, selected_year=None): commit_dates = [] items_with_type = [] kinds = [kind] if kind else ["model", "dataset", "space"] for k in kinds: try: if k == "model": items = list(api.list_models(author=username)) elif k == "dataset": items = list(api.list_datasets(author=username)) elif k == "space": items = list(api.list_spaces(author=username)) else: items = [] items_with_type.extend((item, k) for item in items) repo_ids = [item.id for item in items] # Parallel fetch commits with ThreadPoolExecutor(max_workers=10) as executor: future_to_repo = { executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id for repo_id in repo_ids } for future in as_completed(future_to_repo): commit_dates.extend(future.result()) except Exception as e: st.warning(f"Error fetching {k}s for {username}: {str(e)}") return pd.DataFrame(commit_dates, columns=["date"]), items_with_type # Calendar heatmap function def make_calendar_heatmap(df, title, year, color_palette="Greens"): if df.empty: st.info(f"No {title.lower()} found for {year}.") return df["count"] = 1 df = df.groupby("date").sum().reset_index() df["date"] = pd.to_datetime(df["date"]) start = pd.Timestamp(f"{year}-01-01") end = pd.Timestamp(f"{year}-12-31") all_days = pd.date_range(start=start, end=end) heatmap_data = pd.DataFrame(index=all_days).assign(count=0) heatmap_data.loc[df.set_index("date").index, "count"] = df.set_index("date")["count"] heatmap_data["dow"] = heatmap_data.index.dayofweek heatmap_data["week"] = ((heatmap_data.index - start).days // 7) heatmap_data = heatmap_data.reset_index().rename(columns={"index": "date"}) pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0) month_labels = [d.strftime("%b") for d in pd.date_range(start, end, freq="MS")] month_positions = [((pd.Timestamp(f"{year}-{i + 1}-01") - start).days // 7) for i in range(12)] fig, ax = plt.subplots(figsize=(12, 1.2)) sns.heatmap(pivot, ax=ax, cmap=color_palette, linewidths=0.5, linecolor="white", square=True, cbar=False, yticklabels=["M", "T", "W", "T", "F", "S", "S"]) ax.set_title(f"{title} ({year})", fontsize=12, pad=10) ax.set_xlabel("") ax.set_ylabel("") ax.set_xticks(month_positions) ax.set_xticklabels(month_labels, fontsize=8) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8) st.pyplot(fig) # Sidebar with st.sidebar: st.title("👤 Contributor") username = st.selectbox( "Select or type a username", options=["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"], index=0 ) st.markdown("
OR
", unsafe_allow_html=True) custom = st.text_input("", placeholder="Enter custom username/org") if custom.strip(): username = custom.strip() year_options = list(range(datetime.now().year, 2017, -1)) selected_year = st.selectbox("🗓️ Year", options=year_options) # Main Content st.title("🤗 Hugging Face Contributions") if username: with st.spinner("Fetching commit data..."): all_df, all_items = get_commit_events(username, selected_year=selected_year) st.subheader(f"{username}'s Activity in {selected_year}") st.metric("Total Commits", len(all_df)) make_calendar_heatmap(all_df, "All Commits", selected_year) # Updated Color Scheme Legend with five shades st.markdown("""
Less More
""", unsafe_allow_html=True) # Metrics and heatmaps for each type col1, col2, col3 = st.columns(3) for col, kind, emoji, label in [ (col1, "model", "🧠", "Models"), (col2, "dataset", "📦", "Datasets"), (col3, "space", "🚀", "Spaces") ]: with col: df_kind, _ = get_commit_events(username, kind=kind, selected_year=selected_year) try: total = len(list(getattr(api, f"list_{kind}s")(author=username))) except Exception: total = 0 st.metric(f"{emoji} {label}", total) st.metric(f"Commits in {selected_year}", len(df_kind)) make_calendar_heatmap(df_kind, f"{label} Commits", selected_year)