import streamlit as st from huggingface_hub import HfApi import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from datetime import datetime from concurrent.futures import ThreadPoolExecutor, as_completed from functools import lru_cache import time import requests from collections import Counter import numpy as np st.set_page_config(page_title="HF Contributions", layout="wide", initial_sidebar_state="expanded") # 향상된 UI 스타일링 st.markdown(""" """, unsafe_allow_html=True) api = HfApi() # Cache for API responses @lru_cache(maxsize=1000) def cached_repo_info(repo_id, repo_type): return api.repo_info(repo_id=repo_id, repo_type=repo_type) @lru_cache(maxsize=1000) def cached_list_commits(repo_id, repo_type): return list(api.list_repo_commits(repo_id=repo_id, repo_type=repo_type)) @lru_cache(maxsize=100) def cached_list_items(username, kind): if kind == "model": return list(api.list_models(author=username)) elif kind == "dataset": return list(api.list_datasets(author=username)) elif kind == "space": return list(api.list_spaces(author=username)) return [] # Function to fetch trending accounts and create stats @lru_cache(maxsize=1) def get_trending_accounts(limit=100): try: trending_data = {"spaces": [], "models": []} # Get spaces for stats calculation spaces_response = requests.get("https://huggingface.co/api/spaces", params={"limit": 10000}, timeout=30) # Get models for stats calculation models_response = requests.get("https://huggingface.co/api/models", params={"limit": 10000}, timeout=30) # Process spaces data spaces_owners = [] if spaces_response.status_code == 200: spaces = spaces_response.json() # Count spaces by owner owner_counts_spaces = {} for space in spaces: if '/' in space.get('id', ''): owner, _ = space.get('id', '').split('/', 1) else: owner = space.get('owner', '') if owner != 'None': owner_counts_spaces[owner] = owner_counts_spaces.get(owner, 0) + 1 # Get top owners by count for spaces top_owners_spaces = sorted(owner_counts_spaces.items(), key=lambda x: x[1], reverse=True)[:limit] trending_data["spaces"] = top_owners_spaces spaces_owners = [owner for owner, _ in top_owners_spaces] # Process models data models_owners = [] if models_response.status_code == 200: models = models_response.json() # Count models by owner owner_counts_models = {} for model in models: if '/' in model.get('id', ''): owner, _ = model.get('id', '').split('/', 1) else: owner = model.get('owner', '') if owner != 'None': owner_counts_models[owner] = owner_counts_models.get(owner, 0) + 1 # Get top owners by count for models top_owners_models = sorted(owner_counts_models.items(), key=lambda x: x[1], reverse=True)[:limit] trending_data["models"] = top_owners_models models_owners = [owner for owner, _ in top_owners_models] # Combine rankings for overall trending based on appearance in both lists combined_score = {} for i, owner in enumerate(spaces_owners): if owner not in combined_score: combined_score[owner] = 0 combined_score[owner] += (limit - i) # Higher rank gives more points for i, owner in enumerate(models_owners): if owner not in combined_score: combined_score[owner] = 0 combined_score[owner] += (limit - i) # Higher rank gives more points # Sort by combined score sorted_combined = sorted(combined_score.items(), key=lambda x: x[1], reverse=True)[:limit] trending_authors = [owner for owner, _ in sorted_combined] return trending_authors, trending_data["spaces"], trending_data["models"] except Exception as e: st.error(f"Error fetching trending accounts: {str(e)}") fallback_authors = ["ritvik77", "facebook", "google", "stabilityai", "Salesforce", "tiiuae", "bigscience"] return fallback_authors, [(author, 0) for author in fallback_authors], [(author, 0) for author in fallback_authors] # Rate limiting class RateLimiter: def __init__(self, calls_per_second=10): self.calls_per_second = calls_per_second self.last_call = 0 def wait(self): current_time = time.time() time_since_last_call = current_time - self.last_call if time_since_last_call < (1.0 / self.calls_per_second): time.sleep((1.0 / self.calls_per_second) - time_since_last_call) self.last_call = time.time() rate_limiter = RateLimiter() # Function to fetch commits for a repository (optimized) def fetch_commits_for_repo(repo_id, repo_type, username, selected_year): try: rate_limiter.wait() # Skip private/gated repos upfront repo_info = cached_repo_info(repo_id, repo_type) if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated): return [], 0 # Get initial commit date initial_commit_date = pd.to_datetime(repo_info.created_at).tz_localize(None).date() commit_dates = [] commit_count = 0 # Add initial commit if it's from the selected year if initial_commit_date.year == selected_year: commit_dates.append(initial_commit_date) commit_count += 1 # Get all commits commits = cached_list_commits(repo_id, repo_type) for commit in commits: commit_date = pd.to_datetime(commit.created_at).tz_localize(None).date() if commit_date.year == selected_year: commit_dates.append(commit_date) commit_count += 1 return commit_dates, commit_count except Exception as e: return [], 0 # Function to get commit events for a user (optimized) def get_commit_events(username, kind=None, selected_year=None): commit_dates = [] items_with_type = [] kinds = [kind] if kind else ["model", "dataset", "space"] for k in kinds: try: items = cached_list_items(username, k) items_with_type.extend((item, k) for item in items) repo_ids = [item.id for item in items] # Optimized parallel fetch with chunking chunk_size = 5 # Process 5 repos at a time for i in range(0, len(repo_ids), chunk_size): chunk = repo_ids[i:i + chunk_size] with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor: future_to_repo = { executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id for repo_id in chunk } for future in as_completed(future_to_repo): repo_commits, repo_count = future.result() if repo_commits: # Only extend if we got commits commit_dates.extend(repo_commits) except Exception as e: st.warning(f"Error fetching {k}s for {username}: {str(e)}") # Create DataFrame with all commits df = pd.DataFrame(commit_dates, columns=["date"]) if not df.empty: df = df.drop_duplicates() # Remove any duplicate dates return df, items_with_type # Calendar heatmap function (optimized) def make_calendar_heatmap(df, title, year): if df.empty: st.info(f"No {title.lower()} found for {year}.") return # Optimize DataFrame operations df["count"] = 1 df = df.groupby("date", as_index=False).sum() df["date"] = pd.to_datetime(df["date"]) # Create date range more efficiently start = pd.Timestamp(f"{year}-01-01") end = pd.Timestamp(f"{year}-12-31") all_days = pd.date_range(start=start, end=end) # Optimize DataFrame creation and merging heatmap_data = pd.DataFrame({"date": all_days, "count": 0}) heatmap_data = heatmap_data.merge(df, on="date", how="left", suffixes=("", "_y")) heatmap_data["count"] = heatmap_data["count_y"].fillna(0) heatmap_data = heatmap_data.drop("count_y", axis=1) # Calculate week and day of week more efficiently heatmap_data["dow"] = heatmap_data["date"].dt.dayofweek heatmap_data["week"] = (heatmap_data["date"] - start).dt.days // 7 # Create pivot table more efficiently pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0) # Optimize month labels calculation month_labels = pd.date_range(start, end, freq="MS").strftime("%b") month_positions = pd.date_range(start, end, freq="MS").map(lambda x: (x - start).days // 7) # Create custom colormap with specific boundaries from matplotlib.colors import ListedColormap, BoundaryNorm colors = ['#ebedf0', '#9be9a8', '#40c463', '#30a14e', '#216e39'] # GitHub-style green colors bounds = [0, 1, 3, 11, 31, float('inf')] # Boundaries for color transitions cmap = ListedColormap(colors) norm = BoundaryNorm(bounds, cmap.N) # Create plot more efficiently fig, ax = plt.subplots(figsize=(12, 1.5)) # Convert pivot values to integers to ensure proper color mapping pivot_int = pivot.astype(int) # Create heatmap with explicit vmin and vmax sns.heatmap(pivot_int, ax=ax, cmap=cmap, norm=norm, linewidths=0.5, linecolor="white", square=True, cbar=False, yticklabels=["M", "T", "W", "T", "F", "S", "S"]) ax.set_title(f"{title}", fontsize=14, pad=10) ax.set_xlabel("") ax.set_ylabel("") ax.set_xticks(month_positions) ax.set_xticklabels(month_labels, fontsize=10) ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=10) # 시각적 향상을 위한 figure 스타일링 fig.tight_layout() fig.patch.set_facecolor('#F8F9FA') st.pyplot(fig) # Function to create a fancy contribution radar chart def create_contribution_radar(username, models_count, spaces_count, datasets_count, commits_count): # Create radar chart for contribution metrics categories = ['Models', 'Spaces', 'Datasets', 'Activity'] values = [models_count, spaces_count, datasets_count, commits_count] # Normalize values for better visualization max_vals = [100, 100, 50, 500] # Reasonable max values for each category normalized = [min(v/m, 1.0) for v, m in zip(values, max_vals)] # Create radar chart angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist() angles += angles[:1] # Close the loop normalized += normalized[:1] # Close the loop fig, ax = plt.subplots(figsize=(6, 6), subplot_kw={'polar': True}, facecolor='#F8F9FA') # Add background grid with improved styling ax.set_theta_offset(np.pi / 2) ax.set_theta_direction(-1) ax.set_thetagrids(np.degrees(angles[:-1]), categories, fontsize=12, fontweight='bold') # 그리드 스타일링 개선 ax.grid(color='#CCCCCC', linestyle='-', linewidth=0.5, alpha=0.7) # Draw the chart with improved color scheme ax.fill(angles, normalized, color='#4CAF50', alpha=0.25) ax.plot(angles, normalized, color='#4CAF50', linewidth=3) # Add value labels with improved styling for i, val in enumerate(values): angle = angles[i] x = (normalized[i] + 0.1) * np.cos(angle) y = (normalized[i] + 0.1) * np.sin(angle) ax.text(angle, normalized[i] + 0.1, str(val), ha='center', va='center', fontsize=12, fontweight='bold', color='#1976D2') # Add highlight circles circles = [0.25, 0.5, 0.75, 1.0] for circle in circles: ax.plot(angles, [circle] * len(angles), color='gray', alpha=0.3, linewidth=0.5, linestyle='--') ax.set_title(f"{username}'s Contribution Profile", fontsize=16, pad=20, fontweight='bold') # 배경 원 없애기 ax.set_facecolor('#F8F9FA') return fig # Function to create contribution distribution pie chart def create_contribution_pie(model_commits, dataset_commits, space_commits): labels = ['Models', 'Datasets', 'Spaces'] sizes = [model_commits, dataset_commits, space_commits] # Filter out zero values filtered_labels = [label for label, size in zip(labels, sizes) if size > 0] filtered_sizes = [size for size in sizes if size > 0] if not filtered_sizes: return None # No data to show # Use a more attractive color scheme colors = ['#FF9800', '#2196F3', '#4CAF50'] filtered_colors = [color for color, size in zip(colors, sizes) if size > 0] fig, ax = plt.subplots(figsize=(7, 7), facecolor='#F8F9FA') # Create exploded pie chart with improved styling explode = [0.1] * len(filtered_sizes) # Explode all slices for better visualization wedges, texts, autotexts = ax.pie( filtered_sizes, labels=None, # We'll add custom labels colors=filtered_colors, autopct='%1.1f%%', startangle=90, shadow=True, explode=explode, textprops={'fontsize': 14, 'weight': 'bold'}, wedgeprops={'edgecolor': 'white', 'linewidth': 2} ) # Customize the percentage text for autotext in autotexts: autotext.set_color('white') autotext.set_fontsize(12) autotext.set_weight('bold') # Add legend with custom styling ax.legend( wedges, [f"{label} ({size})" for label, size in zip(filtered_labels, filtered_sizes)], title="Contribution Types", loc="center left", bbox_to_anchor=(0.85, 0.5), fontsize=12 ) ax.set_title('Distribution of Contributions by Type', fontsize=16, pad=20, fontweight='bold') ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle return fig # Function to create monthly activity chart def create_monthly_activity(df, year): if df.empty: return None # Aggregate by month df['date'] = pd.to_datetime(df['date']) df['month'] = df['date'].dt.month df['month_name'] = df['date'].dt.strftime('%b') # Count by month and ensure all months are present month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] counts_by_month = df.groupby('month_name')['date'].count() monthly_counts = pd.Series([counts_by_month.get(m, 0) for m in month_order], index=month_order) # Create bar chart with improved styling fig, ax = plt.subplots(figsize=(14, 6), facecolor='#F8F9FA') # Create bars with gradient colors based on activity level norm = plt.Normalize(0, monthly_counts.max() if monthly_counts.max() > 0 else 1) colors = plt.cm.viridis(norm(monthly_counts.values)) bars = ax.bar(monthly_counts.index, monthly_counts.values, color=colors, width=0.7) # Highlight the month with most activity if monthly_counts.max() > 0: max_idx = monthly_counts.argmax() bars[max_idx].set_color('#FF5722') bars[max_idx].set_edgecolor('black') bars[max_idx].set_linewidth(1.5) # Add labels and styling with enhanced design ax.set_title(f'Monthly Activity in {year}', fontsize=18, pad=20, fontweight='bold') ax.set_xlabel('Month', fontsize=14, labelpad=10) ax.set_ylabel('Number of Contributions', fontsize=14, labelpad=10) # Add value labels on top of bars with improved styling for i, count in enumerate(monthly_counts.values): if count > 0: ax.text(i, count + 0.5, str(int(count)), ha='center', fontsize=12, fontweight='bold') # Add grid for better readability with improved styling ax.grid(axis='y', linestyle='--', alpha=0.7, color='#CCCCCC') ax.set_axisbelow(True) # Grid lines behind bars # Style the chart borders and background ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_linewidth(0.5) ax.spines['bottom'].set_linewidth(0.5) # Adjust tick parameters for better look ax.tick_params(axis='x', labelsize=12, pad=5) ax.tick_params(axis='y', labelsize=12, pad=5) plt.tight_layout() return fig # Function to render follower growth simulation def simulate_follower_data(username, spaces_count, models_count, total_commits): # Simulate follower growth based on contribution metrics # This is just a simulation for visual purposes import numpy as np from datetime import timedelta # Start with a base number of followers proportional to contribution metrics base_followers = max(10, int((spaces_count * 2 + models_count * 3 + total_commits/10) / 6)) # Generate timestamps for the past year end_date = datetime.now() start_date = end_date - timedelta(days=365) dates = pd.date_range(start=start_date, end=end_date, freq='W') # Weekly data points # Generate follower growth with some randomness followers = [] current = base_followers / 2 # Start from half the base for i in range(len(dates)): growth_factor = 1 + (np.random.random() * 0.1) # Random growth between 0% and 10% current = current * growth_factor followers.append(int(current)) # Ensure end value matches our base_followers estimate followers[-1] = base_followers # Create the chart with improved styling fig, ax = plt.subplots(figsize=(14, 6), facecolor='#F8F9FA') # Create gradient line for better visualization points = np.array([dates, followers]).T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) from matplotlib.collections import LineCollection norm = plt.Normalize(0, len(segments)) lc = LineCollection(segments, cmap='viridis', norm=norm, linewidth=3, alpha=0.8) lc.set_array(np.arange(len(segments))) line = ax.add_collection(lc) # Add markers ax.scatter(dates, followers, s=50, color='#9C27B0', alpha=0.8, zorder=10) # Add styling with enhanced design ax.set_title(f"Estimated Follower Growth for {username}", fontsize=18, pad=20, fontweight='bold') ax.set_xlabel("Date", fontsize=14, labelpad=10) ax.set_ylabel("Followers", fontsize=14, labelpad=10) # Format the axes limits ax.set_xlim(dates.min(), dates.max()) ax.set_ylim(0, max(followers) * 1.1) # Add grid for better readability with improved styling ax.grid(True, linestyle='--', alpha=0.7, color='#CCCCCC') ax.set_axisbelow(True) # Grid lines behind plot # Style the chart borders and background ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_linewidth(0.5) ax.spines['bottom'].set_linewidth(0.5) # Adjust tick parameters for better look ax.tick_params(axis='x', labelsize=12, rotation=45) ax.tick_params(axis='y', labelsize=12) # Add annotations for start and end points ax.annotate(f"Start: {followers[0]}", xy=(dates[0], followers[0]), xytext=(10, 10), textcoords='offset points', fontsize=12, fontweight='bold', color='#9C27B0', bbox=dict(boxstyle="round,pad=0.3", fc="#F3E5F5", ec="#9C27B0", alpha=0.8)) ax.annotate(f"Current: {followers[-1]}", xy=(dates[-1], followers[-1]), xytext=(-10, 10), textcoords='offset points', fontsize=12, fontweight='bold', color='#9C27B0', ha='right', bbox=dict(boxstyle="round,pad=0.3", fc="#F3E5F5", ec="#9C27B0", alpha=0.8)) plt.tight_layout() return fig # Function to create ranking position visualization def create_ranking_chart(username, overall_rank, spaces_rank, models_rank): if not (overall_rank or spaces_rank or models_rank): return None # Create a horizontal bar chart for rankings with improved styling fig, ax = plt.subplots(figsize=(12, 5), facecolor='#F8F9FA') categories = [] positions = [] colors = [] rank_values = [] if overall_rank: categories.append('Overall') positions.append(101 - overall_rank) # Invert rank for visualization (higher is better) colors.append('#673AB7') rank_values.append(overall_rank) if spaces_rank: categories.append('Spaces') positions.append(101 - spaces_rank) colors.append('#2196F3') rank_values.append(spaces_rank) if models_rank: categories.append('Models') positions.append(101 - models_rank) colors.append('#FF9800') rank_values.append(models_rank) # Create horizontal bars with enhanced styling bars = ax.barh(categories, positions, color=colors, alpha=0.8, height=0.6, edgecolor='white', linewidth=1.5) # Add rank values as text with improved styling for i, bar in enumerate(bars): ax.text(bar.get_width() + 2, bar.get_y() + bar.get_height()/2, f'Rank #{rank_values[i]}', va='center', fontsize=12, fontweight='bold', color=colors[i]) # Set chart properties with enhanced styling ax.set_xlim(0, 105) ax.set_title(f"Ranking Positions for {username} (Top 100)", fontsize=18, pad=20, fontweight='bold') ax.set_xlabel("Percentile (higher is better)", fontsize=14, labelpad=10) # Add explanatory text ax.text(50, -0.6, "← Lower rank (higher number) | Higher rank (lower number) →", ha='center', va='center', fontsize=10, fontweight='bold', color='#666666') # Add a vertical line at 90th percentile to highlight top 10 with improved styling ax.axvline(x=90, color='#FF5252', linestyle='--', alpha=0.7, linewidth=2) ax.text(92, len(categories)/2, 'Top 10', color='#D32F2F', fontsize=12, rotation=90, va='center', fontweight='bold') # Style the chart borders and background ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_linewidth(0.5) ax.spines['bottom'].set_linewidth(0.5) # Adjust tick parameters for better look ax.tick_params(axis='x', labelsize=12) ax.tick_params(axis='y', labelsize=14, pad=5) # Add grid for better readability ax.grid(axis='x', linestyle='--', alpha=0.5, color='#CCCCCC') ax.set_axisbelow(True) # Grid lines behind bars # Invert x-axis to show ranking position more intuitively ax.invert_xaxis() plt.tight_layout() return fig # Fetch trending accounts with a loading spinner (do this once at the beginning) with st.spinner("Loading trending accounts..."): trending_accounts, top_owners_spaces, top_owners_models = get_trending_accounts(limit=100) # Sidebar with st.sidebar: st.markdown('

👤 Contributor

', unsafe_allow_html=True) # Create tabs for Spaces and Models rankings - ONLY SHOWING FIRST TWO TABS tab1, tab2 = st.tabs([ "Top 100 Overall", "Top Spaces & Models" ]) with tab1: # Show combined trending accounts list st.markdown('

🔥 Top 100 Contributors

', unsafe_allow_html=True) # Create a data frame for the table if trending_accounts: # Create a mapping from username to Spaces and Models rankings spaces_rank = {owner: idx+1 for idx, (owner, _) in enumerate(top_owners_spaces)} models_rank = {owner: idx+1 for idx, (owner, _) in enumerate(top_owners_models)} # Create the overall ranking dataframe with trophies for top 3 overall_data = [] for idx, username in enumerate(trending_accounts[:100]): # Add trophy emojis for top 3 rank_display = "" if idx == 0: rank_display = "🏆 " # Gold trophy for 1st place elif idx == 1: rank_display = "🏆 " # Silver trophy for 2nd place elif idx == 2: rank_display = "🏆 " # Bronze trophy for 3rd place # Use strings for all rankings to avoid type conversion issues spaces_position = str(spaces_rank.get(username, "-")) models_position = str(models_rank.get(username, "-")) overall_data.append([f"{rank_display}{username}", spaces_position, models_position]) ranking_data_overall = pd.DataFrame( overall_data, columns=["Contributor", "Spaces Rank", "Models Rank"] ) ranking_data_overall.index = ranking_data_overall.index + 1 # Start index from 1 for ranking st.dataframe( ranking_data_overall, height=900, # 약 30행 정도 보이도록 픽셀 단위 높이 설정 (필요에 따라 조정 가능) column_config={ "Contributor": st.column_config.TextColumn("Contributor"), "Spaces Rank": st.column_config.TextColumn("Spaces Rank"), "Models Rank": st.column_config.TextColumn("Models Rank") }, use_container_width=True, hide_index=False ) with tab2: # Show trending accounts by Spaces & Models st.markdown('

🚀 Spaces Leaders

', unsafe_allow_html=True) # Create a data frame for the Spaces table with medals for top 3 if top_owners_spaces: spaces_data = [] for idx, (owner, count) in enumerate(top_owners_spaces[:50]): # Add medal emojis for top 3 rank_display = "" if idx == 0: rank_display = "🥇 " # Gold medal for 1st place elif idx == 1: rank_display = "🥈 " # Silver medal for 2nd place elif idx == 2: rank_display = "🥉 " # Bronze medal for 3rd place spaces_data.append([f"{rank_display}{owner}", count]) ranking_data_spaces = pd.DataFrame(spaces_data, columns=["Contributor", "Spaces Count(Top 500 positions)"]) ranking_data_spaces.index = ranking_data_spaces.index + 1 # Start index from 1 for ranking st.dataframe( ranking_data_spaces, column_config={ "Contributor": st.column_config.TextColumn("Contributor"), "Spaces Count": st.column_config.NumberColumn("Spaces Count", format="%d") }, use_container_width=True, hide_index=False ) # Display the top Models accounts list with medals for top 3 st.markdown('

🧠 Models Leaders

', unsafe_allow_html=True) # Create a data frame for the Models table with medals for top 3 if top_owners_models: models_data = [] for idx, (owner, count) in enumerate(top_owners_models[:50]): # Add medal emojis for top 3 rank_display = "" if idx == 0: rank_display = "🥇 " # Gold medal for 1st place elif idx == 1: rank_display = "🥈 " # Silver medal for 2nd place elif idx == 2: rank_display = "🥉 " # Bronze medal for 3rd place models_data.append([f"{rank_display}{owner}", count]) ranking_data_models = pd.DataFrame(models_data, columns=["Contributor", "Models Count(Top 500 positions)"]) ranking_data_models.index = ranking_data_models.index + 1 # Start index from 1 for ranking st.dataframe( ranking_data_models, column_config={ "Contributor": st.column_config.TextColumn("Contributor"), "Models Count": st.column_config.NumberColumn("Models Count", format="%d") }, use_container_width=True, hide_index=False ) # Add visual divider st.markdown('

', unsafe_allow_html=True) # Display contributor selection with enhanced styling st.markdown('

Select Contributor

', unsafe_allow_html=True) selected_trending = st.selectbox( "Choose from trending accounts", options=trending_accounts[:100], # Limit to top 100 index=0 if trending_accounts else None, key="trending_selectbox" ) # Custom account input option with enhanced styling st.markdown('

- OR -

', unsafe_allow_html=True) custom = st.text_input("Enter a username/organization:", placeholder="e.g. facebook, google...") # Add visual divider st.markdown('

', unsafe_allow_html=True) # Set username based on selection or custom input if custom.strip(): username = custom.strip() elif selected_trending: username = selected_trending else: username = "facebook" # Default fallback # Year selection with enhanced styling st.markdown('

🗓️ Time Period

', unsafe_allow_html=True) year_options = list(range(datetime.now().year, 2017, -1)) selected_year = st.selectbox("Select Year:", options=year_options) # Additional options for customization with enhanced styling st.markdown('

⚙️ Display Options

', unsafe_allow_html=True) show_models = st.checkbox("Show Models", value=True) show_datasets = st.checkbox("Show Datasets", value=True) show_spaces = st.checkbox("Show Spaces", value=True) # Main Content st.markdown(f'

🤗 Hugging Face Contributions

', unsafe_allow_html=True) if username: # Create a header card with contributor info header_col1, header_col2 = st.columns([1, 2]) with header_col1: st.markdown(f'

' f'

👤 {username}

' f'

Analyzing contributions for {selected_year}

' f'

View Profile

' f'

', unsafe_allow_html=True) with header_col2: # Add explanation about the app st.markdown(f'

' f'

About This Analysis

' f'

This dashboard analyzes {username}\'s contributions to Hugging Face in {selected_year}, including models, datasets, and spaces.

' f'

* Some metrics like follower growth are simulated for visualization purposes.

' f'

', unsafe_allow_html=True) with st.spinner(f"Fetching contribution data for {username}..."): # Initialize variables for tracking overall_rank = None spaces_rank = None models_rank = None spaces_count = 0 models_count = 0 datasets_count = 0 # Display contributor rank if in top 100 if username in trending_accounts[:100]: overall_rank = trending_accounts.index(username) + 1 # Create a prominent ranking display st.markdown(f'

' f'

🏆 Ranked #{overall_rank} in Top Contributors

' f'

', unsafe_allow_html=True) # Find user in spaces ranking for i, (owner, count) in enumerate(top_owners_spaces): if owner == username: spaces_rank = i+1 spaces_count = count break # Find user in models ranking for i, (owner, count) in enumerate(top_owners_models): if owner == username: models_rank = i+1 models_count = count break # Display ranking visualization rank_chart = create_ranking_chart(username, overall_rank, spaces_rank, models_rank) if rank_chart: st.pyplot(rank_chart) # Create a dictionary to store commits by type commits_by_type = {} commit_counts_by_type = {} # Determine which types to fetch based on checkboxes types_to_fetch = [] if show_models: types_to_fetch.append("model") if show_datasets: types_to_fetch.append("dataset") if show_spaces: types_to_fetch.append("space") if not types_to_fetch: st.warning("Please select at least one content type to display (Models, Datasets, or Spaces)") st.stop() # Create a progress container progress_container = st.container() progress_container.markdown('

Fetching Repository Data...

', unsafe_allow_html=True) progress_bar = progress_container.progress(0) # Fetch commits for each selected type for type_index, kind in enumerate(types_to_fetch): try: items = cached_list_items(username, kind) # Update counts for radar chart if kind == "model": models_count = len(items) elif kind == "dataset": datasets_count = len(items) elif kind == "space": spaces_count = len(items) repo_ids = [item.id for item in items] progress_container.info(f"Found {len(repo_ids)} {kind}s for {username}") # Process repos in chunks chunk_size = 5 total_commits = 0 all_commit_dates = [] for i in range(0, len(repo_ids), chunk_size): chunk = repo_ids[i:i + chunk_size] with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor: future_to_repo = { executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year): repo_id for repo_id in chunk } for future in as_completed(future_to_repo): repo_commits, repo_count = future.result() if repo_commits: all_commit_dates.extend(repo_commits) total_commits += repo_count # Update progress for all types progress_per_type = 1.0 / len(types_to_fetch) current_type_progress = min(1.0, (i + len(chunk)) / max(1, len(repo_ids))) overall_progress = (type_index * progress_per_type) + (current_type_progress * progress_per_type) progress_bar.progress(overall_progress) commits_by_type[kind] = all_commit_dates commit_counts_by_type[kind] = total_commits except Exception as e: st.warning(f"Error fetching {kind}s for {username}: {str(e)}") commits_by_type[kind] = [] commit_counts_by_type[kind] = 0 # Complete progress progress_bar.progress(1.0) progress_container.success("Data fetching complete!") time.sleep(0.5) # Short pause for visual feedback progress_container.empty() # Clear the progress indicators # Calculate total commits across all types total_commits = sum(commit_counts_by_type.values()) # Main dashboard layout with improved structure st.markdown(f'

Activity Overview

', unsafe_allow_html=True) # Profile summary profile_col1, profile_col2 = st.columns([1, 2]) with profile_col1: # Create a stats card with key metrics st.markdown(f'

' f'

Contribution Stats

' f'

' f'Total Commits:{total_commits}

' f'

' f'Models:{models_count}

' f'

' f'Datasets:{datasets_count}

' f'

' f'Spaces:{spaces_count}

' f'

', unsafe_allow_html=True) # Type breakdown pie chart model_commits = commit_counts_by_type.get("model", 0) dataset_commits = commit_counts_by_type.get("dataset", 0) space_commits = commit_counts_by_type.get("space", 0) pie_chart = create_contribution_pie(model_commits, dataset_commits, space_commits) if pie_chart: st.pyplot(pie_chart) with profile_col2: # Display contribution radar chart radar_fig = create_contribution_radar(username, models_count, spaces_count, datasets_count, total_commits) st.pyplot(radar_fig) # Create DataFrame for all commits all_commits = [] for commits in commits_by_type.values(): all_commits.extend(commits) all_df = pd.DataFrame(all_commits, columns=["date"]) if not all_df.empty: all_df = all_df.drop_duplicates() # Remove any duplicate dates # Calendar heatmap for all commits in a separate section st.markdown(f'

Contribution Calendar

', unsafe_allow_html=True) if not all_df.empty: make_calendar_heatmap(all_df, "All Contributions", selected_year) else: st.info(f"No contributions found for {username} in {selected_year}") # Monthly activity chart st.markdown(f'

Monthly Activity

', unsafe_allow_html=True) monthly_fig = create_monthly_activity(all_df, selected_year) if monthly_fig: st.pyplot(monthly_fig) else: st.info(f"No activity data available for {username} in {selected_year}") # Follower growth simulation st.markdown(f'

Growth Projection

', unsafe_allow_html=True) st.markdown('

' '

📊 This is a simulation based on contribution metrics - for visualization purposes only

' '

', unsafe_allow_html=True) follower_chart = simulate_follower_data(username, spaces_count, models_count, total_commits) st.pyplot(follower_chart) # Analytics summary section if total_commits > 0: st.markdown(f'

📊 Analytics Summary

', unsafe_allow_html=True) # Contribution pattern analysis monthly_df = pd.DataFrame(all_commits, columns=["date"]) monthly_df['date'] = pd.to_datetime(monthly_df['date']) monthly_df['month'] = monthly_df['date'].dt.month if not monthly_df.empty: most_active_month = monthly_df['month'].value_counts().idxmax() month_name = datetime(2020, most_active_month, 1).strftime('%B') # Create a summary card st.markdown(f'

' f'

Activity Analysis for {username}

' f'

📈 Total Activity: {total_commits} contributions in {selected_year}
🗓️ Most Active Month: {month_name} with {monthly_df["month"].value_counts().max()} contributions
🧩 Repository Breakdown: {models_count} Models, {spaces_count} Spaces, {datasets_count} Datasets

', unsafe_allow_html=True) # Add ranking context if available if overall_rank: percentile = 100 - overall_rank st.markdown(f'

' f'

Ranking Analysis

' f'

🏆 Overall Ranking: #{overall_rank} (Top {percentile}% of contributors)

' if spaces_rank and spaces_rank <= 10: badge_html += f'🌟 Elite Spaces Contributor (#{spaces_rank})' elif spaces_rank and spaces_rank <= 30: badge_html += f'✨ Outstanding Spaces Contributor (#{spaces_rank})' if models_rank and models_rank <= 10: badge_html += f'🌟 Elite Models Contributor (#{models_rank})' elif models_rank and models_rank <= 30: badge_html += f'✨ Outstanding Models Contributor (#{models_rank})' badge_html += '

', unsafe_allow_html=True) # Detailed category analysis section st.markdown(f'

Detailed Category Analysis

', unsafe_allow_html=True) # Create category cards in columns cols = st.columns(len(types_to_fetch)) if types_to_fetch else st.columns(1) category_icons = { "model": "🧠", "dataset": "📦", "space": "🚀" } category_colors = { "model": "#FF9800", "dataset": "#2196F3", "space": "#4CAF50" } for i, kind in enumerate(types_to_fetch): with cols[i]: try: emoji = category_icons.get(kind, "📊") label = kind.capitalize() + "s" color = category_colors.get(kind, "#1E88E5") total = len(cached_list_items(username, kind)) commits = commits_by_type.get(kind, []) commit_count = commit_counts_by_type.get(kind, 0) # Create styled card header st.markdown(f'

' f'

{emoji} {label}

' f'

' f'Total:{total}

' f'

' f'Commits:{commit_count}

' f'

', unsafe_allow_html=True) # Create calendar for this type df_kind = pd.DataFrame(commits, columns=["date"]) if not df_kind.empty: df_kind = df_kind.drop_duplicates() # Remove any duplicate dates make_calendar_heatmap(df_kind, f"{label} Commits", selected_year) else: st.info(f"No {label.lower()} activity in {selected_year}") except Exception as e: st.warning(f"Error processing {kind.capitalize()}s: {str(e)}") # Show empty placeholder st.markdown(f'

' f'

⚠️ Error

' f'

Could not load {kind.capitalize()}s data

' f'

', unsafe_allow_html=True) # Footer st.markdown('

', unsafe_allow_html=True) st.markdown('

Hugging Face Contributions Dashboard | Data fetched from Hugging Face API

', unsafe_allow_html=True) else: # If no username is selected, show welcome screen st.markdown(f'

' f'

Welcome to Hugging Face Contributions Dashboard

' f'

Please select a contributor from the sidebar to view their activity.

' f'

', unsafe_allow_html=True)