import streamlit as st from huggingface_hub import HfApi import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime from concurrent.futures import ThreadPoolExecutor, as_completed from functools import lru_cache import time import requests from collections import Counter import numpy as np st.set_page_config(page_title="HF Contributions", layout="wide", initial_sidebar_state="expanded") # 향상된 UI 스타일링 st.markdown(""" """, unsafe_allow_html=True) api = HfApi() # Cache for API responses @lru_cache(maxsize=1000) def cached_repo_info(repo_id, repo_type): return api.repo_info(repo_id=repo_id, repo_type=repo_type) @lru_cache(maxsize=1000) def cached_list_commits(repo_id, repo_type): return list(api.list_repo_commits(repo_id=repo_id, repo_type=repo_type)) @lru_cache(maxsize=100) def cached_list_items(username, kind): if kind == "model": return list(api.list_models(author=username)) elif kind == "dataset": return list(api.list_datasets(author=username)) elif kind == "space": return list(api.list_spaces(author=username)) return [] # Function to fetch trending accounts and create stats @lru_cache(maxsize=1) def get_trending_accounts(limit=100): try: trending_data = {"spaces": [], "models": []} # Get spaces for stats calculation spaces_response = requests.get("https://huggingface.co/api/spaces", params={"limit": 10000}, timeout=30) # Get models for stats calculation models_response = requests.get("https://huggingface.co/api/models", params={"limit": 10000}, timeout=30) # Process spaces data spaces_owners = [] if spaces_response.status_code == 200: spaces = spaces_response.json() # Count spaces by owner owner_counts_spaces = {} for space in spaces: if '/' in space.get('id', ''): owner, _ = space.get('id', '').split('/', 1) else: owner = space.get('owner', '') if owner != 'None': owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1 # Get top owners by count for spaces top_owners_spaces = sorted(owner_counts_spaces.items(), key=lambda x: x[1], reverse=True)[:limit] trending_data["spaces"] = top_owners_spaces spaces_owners = [owner for owner, _ in top_owners_spaces] # Process models data models_owners = [] if models_response.status_code == 200: models = models_response.json() # Count models by owner owner_counts_models = {} for model in models: if '/' in model.get('id', ''): owner, _ = model.get('id', '').split('/', 1) else: owner = model.get('owner', '') if owner != 'None': owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1 # Get top owners by count for models top_owners_models = sorted(owner_counts_models.items(), key=lambda x: x[1], reverse=True)[:limit] trending_data["models"] = top_owners_models models_owners = [owner for owner, _ in top_owners_models] # Combine rankings for overall trending based on appearance in both lists combined_score = {} for i, owner in enumerate(spaces_owners): if owner not in combined_score: combined_score[owner] = 0 combined_score[owner] += (limit - i) # Higher rank gives more points for i, owner in enumerate(models_owners): if owner not in combined_score: combined_score[owner] = 0 combined_score[owner] += (limit - i) # Higher rank gives more points # Sort by combined score sorted_combined = sorted(combined_score.items(), key=lambda x: x[1], reverse=True)[:limit] trending_authors = [owner for owner, _ in sorted_combined] return trending_authors, trending_data["spaces"], trending_data["models"] except Exception as e: st.error(f"Error fetching trending accounts: {str(e)}") fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"] return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors] # Rate limiting class RateLimiter: def __init__(self, calls_per_second=10): self.calls_per_second = calls_per_second self.last_call = 0 def wait(self): current_time = time.time() time_since_last_call = current_time - self.last_call if time_since_last_call < (1.0 / self.calls_per_second): time.sleep((1.0 / self.calls_per_second) - time_since_last_call) self.last_call = time.time() rate_limiter = RateLimiter() # Function to fetch commits for a repository (optimized) def fetch_commits_for_repo(repo_id, repo_type, username, selected_year): try: rate_limiter.wait() # Skip private/gated repos upfront repo_info = cached_repo_info(repo_id, repo_type) if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated): return [], 0 # Get initial commit date initial_commit_date = pd.to_datetime(repo_info.created_at).tz_localize(None).date() commit_dates = [] commit_count = 0 # Add initial commit if it's from the selected year if initial_commit_date.year == selected_year: commit_dates.append(initial_commit_date) commit_count += 1 # Get all commits commits = cached_list_commits(repo_id, repo_type) for commit in commits: commit_date = pd.to_datetime(commit.created_at).tz_localize(None).date() if commit_date.year == selected_year: commit_dates.append(commit_date) commit_count += 1 return commit_dates, commit_count except Exception as e: return [], 0 # Function to get commit events for a user (optimized) def get_commit_events(username, kind=None, selected_year=None): commit_dates = [] items_with_type = [] kinds = [kind] if kind else ["model", "dataset", "space"] for k in kinds: try: items = cached_list_items(username, k) items_with_type.extend((item, k) for item in items) repo_ids = [item.id for item in items] # Optimized parallel fetch with chunking chunk_size = 5 # Process 5 repos at a time for i in range(0, len(repo_ids), chunk_size): chunk = repo_ids[i:i + chunk_size] with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor: future_to_repo = { executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id for repo_id in chunk } for future in as_completed(future_to_repo): repo_commits, repo_count = future.result() if repo_commits: # Only extend if we got commits commit_dates.extend(repo_commits) except Exception as e: st.warning(f"Error fetching {k}s for {username}: {str(e)}") # Create DataFrame with all commits df = pd.DataFrame(commit_dates, columns=["date"]) if not df.empty: df = df.drop_duplicates() # Remove any duplicate dates return df, items_with_type # Calendar heatmap function (optimized) def make_calendar_heatmap(df, title, year): if df.empty: st.info(f"No {title.lower()} found for {year}.") return # Optimize DataFrame operations df["count"] = 1 df = df.groupby("date", as_index=False).sum() df["date"] = pd.to_datetime(df["date"]) # Create date range more efficiently start = pd.Timestamp(f"{year}-01-01") end = pd.Timestamp(f"{year}-12-31") all_days = pd.date_range(start=start, end=end) # Optimize DataFrame creation and merging heatmap_data = pd.DataFrame({"date": all_days, "count": 0}) heatmap_data = heatmap_data.merge(df, on="date", how="left", suffixes=("", "_y")) heatmap_data["count"] = heatmap_data["count_y"].fillna(0) heatmap_data = heatmap_data.drop("count_y", axis=1) # Calculate week and day of week more efficiently heatmap_data["dow"] = heatmap_data["date"].dt.dayofweek heatmap_data["week"] = (heatmap_data["date"] - start).dt.days // 7 # Create pivot table more efficiently pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0) # Optimize month labels calculation month_labels = pd.date_range(start, end, freq="MS").strftime("%b") month_positions = pd.date_range(start, end, freq="MS").map(lambda x: (x - start).days // 7) # Create custom colormap with specific boundaries from matplotlib.colors import ListedColormap, BoundaryNorm colors = ['#ebedf0', '#9be9a8', '#40c463', '#30a14e', '#216e39'] # GitHub-style green colors bounds = [0, 1, 3, 11, 31, float('inf')] # Boundaries for color transitions cmap = ListedColormap(colors) norm = BoundaryNorm(bounds, cmap.N) # Create plot more efficiently fig, ax = plt.subplots(figsize=(12, 1.5)) # Convert pivot values to integers to ensure proper color mapping pivot_int = pivot.astype(int) # Create heatmap with explicit vmin and vmax sns.heatmap(pivot_int, ax=ax, cmap=cmap, norm=norm, linewidths=0.5, linecolor="white", square=True, cbar=False, yticklabels=["M", "T", "W", "T", "F", "S", "S"]) ax.set_title(f"{title}", fontsize=14, pad=10) ax.set_xlabel("") ax.set_ylabel("") ax.set_xticks(month_positions) ax.set_xticklabels(month_labels, fontsize=10) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=10) # 시각적 향상을 위한 figure 스타일링 fig.tight_layout() fig.patch.set_facecolor('#F8F9FA') st.pyplot(fig) # Function to create a fancy contribution radar chart def create_contribution_radar(username, models_count, spaces_count, datasets_count, commits_count): # Create radar chart for contribution metrics categories = ['Models', 'Spaces', 'Datasets', 'Activity'] values = [models_count, spaces_count, datasets_count, commits_count] # Normalize values for better visualization max_vals = [100, 100, 50, 500] # Reasonable max values for each category normalized = [min(v/m, 1.0) for v, m in zip(values, max_vals)] # Create radar chart angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist() angles += angles[:1] # Close the loop normalized += normalized[:1] # Close the loop fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={'polar': True}, facecolor='#F8F9FA') # Add background grid with improved styling ax.set_theta_offset(np.pi / 2) ax.set_theta_direction(-1) ax.set_thetagrids(np.degrees(angles[:-1]), categories, fontsize=12, fontweight='bold') # 그리드 스타일링 개선 ax.grid(color='#CCCCCC', linestyle='-', linewidth=0.5, alpha=0.7) # Draw the chart with improved color scheme ax.fill(angles, normalized, color='#4CAF50', alpha=0.25) ax.plot(angles, normalized, color='#4CAF50', linewidth=3) # Add value labels with improved styling for i, val in enumerate(values): angle = angles[i] x = (normalized[i] + 0.1) * np.cos(angle) y = (normalized[i] + 0.1) * np.sin(angle) ax.text(angle, normalized[i] + 0.1, str(val), ha='center', va='center', fontsize=12, fontweight='bold', color='#1976D2') # Add highlight circles circles = [0.25, 0.5, 0.75, 1.0] for circle in circles: ax.plot(angles, [circle] * len(angles), color='gray', alpha=0.3, linewidth=0.5, linestyle='--') ax.set_title(f"{username}'s Contribution Profile", fontsize=16, pad=20, fontweight='bold') # 배경 원 없애기 ax.set_facecolor('#F8F9FA') return fig # Function to create contribution distribution pie chart def create_contribution_pie(model_commits, dataset_commits, space_commits): labels = ['Models', 'Datasets', 'Spaces'] sizes = [model_commits, dataset_commits, space_commits] # Filter out zero values filtered_labels = [label for label, size in zip(labels, sizes) if size > 0] filtered_sizes = [size for size in sizes if size > 0] if not filtered_sizes: return None # No data to show # Use a more attractive color scheme colors = ['#FF9800', '#2196F3', '#4CAF50'] filtered_colors = [color for color, size in zip(colors, sizes) if size > 0] fig, ax = plt.subplots(figsize=(7, 7), facecolor='#F8F9FA') # Create exploded pie chart with improved styling explode = [0.1] * len(filtered_sizes) # Explode all slices for better visualization wedges, texts, autotexts = ax.pie( filtered_sizes, labels=None, # We'll add custom labels colors=filtered_colors, autopct='%1.1f%%', startangle=90, shadow=True, explode=explode, textprops={'fontsize': 14, 'weight': 'bold'}, wedgeprops={'edgecolor': 'white', 'linewidth': 2} ) # Customize the percentage text for autotext in autotexts: autotext.set_color('white') autotext.set_fontsize(12) autotext.set_weight('bold') # Add legend with custom styling ax.legend( wedges, [f"{label} ({size})" for label, size in zip(filtered_labels, filtered_sizes)], title="Contribution Types", loc="center left", bbox_to_anchor=(0.85, 0.5), fontsize=12 ) ax.set_title('Distribution of Contributions by Type', fontsize=16, pad=20, fontweight='bold') ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle return fig # Function to create monthly activity chart def create_monthly_activity(df, year): if df.empty: return None # Aggregate by month df['date'] = pd.to_datetime(df['date']) df['month'] = df['date'].dt.month df['month_name'] = df['date'].dt.strftime('%b') # Count by month and ensure all months are present month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] counts_by_month = df.groupby('month_name')['date'].count() monthly_counts = pd.Series([counts_by_month.get(m, 0) for m in month_order], index=month_order) # Create bar chart with improved styling fig, ax = plt.subplots(figsize=(14, 6), facecolor='#F8F9FA') # Create bars with gradient colors based on activity level norm = plt.Normalize(0, monthly_counts.max() if monthly_counts.max() > 0 else 1) colors = plt.cm.viridis(norm(monthly_counts.values)) bars = ax.bar(monthly_counts.index, monthly_counts.values, color=colors, width=0.7) # Highlight the month with most activity if monthly_counts.max() > 0: max_idx = monthly_counts.argmax() bars[max_idx].set_color('#FF5722') bars[max_idx].set_edgecolor('black') bars[max_idx].set_linewidth(1.5) # Add labels and styling with enhanced design ax.set_title(f'Monthly Activity in {year}', fontsize=18, pad=20, fontweight='bold') ax.set_xlabel('Month', fontsize=14, labelpad=10) ax.set_ylabel('Number of Contributions', fontsize=14, labelpad=10) # Add value labels on top of bars with improved styling for i, count in enumerate(monthly_counts.values): if count > 0: ax.text(i, count + 0.5, str(int(count)), ha='center', fontsize=12, fontweight='bold') # Add grid for better readability with improved styling ax.grid(axis='y', linestyle='--', alpha=0.7, color='#CCCCCC') ax.set_axisbelow(True) # Grid lines behind bars # Style the chart borders and background ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_linewidth(0.5) ax.spines['bottom'].set_linewidth(0.5) # Adjust tick parameters for better look ax.tick_params(axis='x', labelsize=12, pad=5) ax.tick_params(axis='y', labelsize=12, pad=5) plt.tight_layout() return fig # Function to render follower growth simulation def simulate_follower_data(username, spaces_count, models_count, total_commits): # Simulate follower growth based on contribution metrics # This is just a simulation for visual purposes import numpy as np from datetime import timedelta # Start with a base number of followers proportional to contribution metrics base_followers = max(10, int((spaces_count * 2 + models_count * 3 + total_commits/10) / 6)) # Generate timestamps for the past year end_date = datetime.now() start_date = end_date - timedelta(days=365) dates = pd.date_range(start=start_date, end=end_date, freq='W') # Weekly data points # Generate follower growth with some randomness followers = [] current = base_followers / 2 # Start from half the base for i in range(len(dates)): growth_factor = 1 + (np.random.random() * 0.1) # Random growth between 0% and 10% current = current * growth_factor followers.append(int(current)) # Ensure end value matches our base_followers estimate followers[-1] = base_followers # Create the chart with improved styling fig, ax = plt.subplots(figsize=(14, 6), facecolor='#F8F9FA') # Create gradient line for better visualization points = np.array([dates, followers]).T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) from matplotlib.collections import LineCollection norm = plt.Normalize(0, len(segments)) lc = LineCollection(segments, cmap='viridis', norm=norm, linewidth=3, alpha=0.8) lc.set_array(np.arange(len(segments))) line = ax.add_collection(lc) # Add markers ax.scatter(dates, followers, s=50, color='#9C27B0', alpha=0.8, zorder=10) # Add styling with enhanced design ax.set_title(f"Estimated Follower Growth for {username}", fontsize=18, pad=20, fontweight='bold') ax.set_xlabel("Date", fontsize=14, labelpad=10) ax.set_ylabel("Followers", fontsize=14, labelpad=10) # Format the axes limits ax.set_xlim(dates.min(), dates.max()) ax.set_ylim(0, max(followers) * 1.1) # Add grid for better readability with improved styling ax.grid(True, linestyle='--', alpha=0.7, color='#CCCCCC') ax.set_axisbelow(True) # Grid lines behind plot # Style the chart borders and background ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_linewidth(0.5) ax.spines['bottom'].set_linewidth(0.5) # Adjust tick parameters for better look ax.tick_params(axis='x', labelsize=12, rotation=45) ax.tick_params(axis='y', labelsize=12) # Add annotations for start and end points ax.annotate(f"Start: {followers[0]}", xy=(dates[0], followers[0]), xytext=(10, 10), textcoords='offset points', fontsize=12, fontweight='bold', color='#9C27B0', bbox=dict(boxstyle="round,pad=0.3", fc="#F3E5F5", ec="#9C27B0", alpha=0.8)) ax.annotate(f"Current: {followers[-1]}", xy=(dates[-1], followers[-1]), xytext=(-10, 10), textcoords='offset points', fontsize=12, fontweight='bold', color='#9C27B0', ha='right', bbox=dict(boxstyle="round,pad=0.3", fc="#F3E5F5", ec="#9C27B0", alpha=0.8)) plt.tight_layout() return fig # Function to create ranking position visualization def create_ranking_chart(username, overall_rank, spaces_rank, models_rank): if not (overall_rank or spaces_rank or models_rank): return None # Create a horizontal bar chart for rankings with improved styling fig, ax = plt.subplots(figsize=(12, 5), facecolor='#F8F9FA') categories = [] positions = [] colors = [] rank_values = [] if overall_rank: categories.append('Overall') positions.append(101 - overall_rank) # Invert rank for visualization (higher is better) colors.append('#673AB7') rank_values.append(overall_rank) if spaces_rank: categories.append('Spaces') positions.append(101 - spaces_rank) colors.append('#2196F3') rank_values.append(spaces_rank) if models_rank: categories.append('Models') positions.append(101 - models_rank) colors.append('#FF9800') rank_values.append(models_rank) # Create horizontal bars with enhanced styling bars = ax.barh(categories, positions, color=colors, alpha=0.8, height=0.6, edgecolor='white', linewidth=1.5) # Add rank values as text with improved styling for i, bar in enumerate(bars): ax.text(bar.get_width() + 2, bar.get_y() + bar.get_height()/2, f'Rank #{rank_values[i]}', va='center', fontsize=12, fontweight='bold', color=colors[i]) # Set chart properties with enhanced styling ax.set_xlim(0, 105) ax.set_title(f"Ranking Positions for {username} (Top 100)", fontsize=18, pad=20, fontweight='bold') ax.set_xlabel("Percentile (higher is better)", fontsize=14, labelpad=10) # Add explanatory text ax.text(50, -0.6, "← Lower rank (higher number) | Higher rank (lower number) →", ha='center', va='center', fontsize=10, fontweight='bold', color='#666666') # Add a vertical line at 90th percentile to highlight top 10 with improved styling ax.axvline(x=90, color='#FF5252', linestyle='--', alpha=0.7, linewidth=2) ax.text(92, len(categories)/2, 'Top 10', color='#D32F2F', fontsize=12, rotation=90, va='center', fontweight='bold') # Style the chart borders and background ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_linewidth(0.5) ax.spines['bottom'].set_linewidth(0.5) # Adjust tick parameters for better look ax.tick_params(axis='x', labelsize=12) ax.tick_params(axis='y', labelsize=14, pad=5) # Add grid for better readability ax.grid(axis='x', linestyle='--', alpha=0.5, color='#CCCCCC') ax.set_axisbelow(True) # Grid lines behind bars # Invert x-axis to show ranking position more intuitively ax.invert_xaxis() plt.tight_layout() return fig # Fetch trending accounts with a loading spinner (do this once at the beginning) with st.spinner("Loading trending accounts..."): trending_accounts, top_owners_spaces, top_owners_models = get_trending_accounts(limit=100) # Sidebar with st.sidebar: st.markdown('
This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.
' f'* Some metrics like follower growth are simulated for visualization purposes.
' f'📊 This is a simulation based on contribution metrics - for visualization purposes only
' 'Could not load {kind.capitalize()}s data
' f'Hugging Face Contributions Dashboard | Data fetched from Hugging Face API
', unsafe_allow_html=True) else: # If no username is selected, show welcome screen st.markdown(f'Please select a contributor from the sidebar to view their activity.
' f'