Ritvik commited on
Commit
b68ab8a
·
1 Parent(s): 7268fc2

Updated app

Browse files
Files changed (1) hide show
  1. app.py +193 -69
app.py CHANGED
@@ -5,33 +5,85 @@ import matplotlib.pyplot as plt
5
  import seaborn as sns
6
  from datetime import datetime
7
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
 
8
 
9
  st.set_page_config(page_title="HF Contributions", layout="wide")
10
  api = HfApi()
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # Function to fetch commits for a repository (optimized)
13
  def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
14
  try:
 
15
  # Skip private/gated repos upfront
16
- repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type)
17
  if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated):
18
- return []
19
-
20
- commits = api.list_repo_commits(repo_id=repo_id, repo_type=repo_type)
21
- commit_dates = [
22
- pd.to_datetime(commit.created_at).tz_localize(None).date()
23
- for commit in commits
24
- if any(
25
- (isinstance(author, str) and author.lower() == username.lower()) or
26
- (isinstance(author, dict) and "user" in author and author["user"].lower() == username.lower())
27
- for author in commit.authors
28
- ) and pd.to_datetime(commit.created_at).year == selected_year
29
- ]
30
- return commit_dates
 
 
 
 
 
 
 
 
31
  except Exception:
32
- return [] # Silently skip inaccessible or errored repos
33
 
34
- # Function to get commit events for a user
 
35
  def get_commit_events(username, kind=None, selected_year=None):
36
  commit_dates = []
37
  items_with_type = []
@@ -39,54 +91,84 @@ def get_commit_events(username, kind=None, selected_year=None):
39
 
40
  for k in kinds:
41
  try:
42
- if k == "model":
43
- items = list(api.list_models(author=username))
44
- elif k == "dataset":
45
- items = list(api.list_datasets(author=username))
46
- elif k == "space":
47
- items = list(api.list_spaces(author=username))
48
- else:
49
- items = []
50
-
51
  items_with_type.extend((item, k) for item in items)
52
  repo_ids = [item.id for item in items]
53
 
54
- # Parallel fetch commits
55
- with ThreadPoolExecutor(max_workers=10) as executor:
56
- future_to_repo = {
57
- executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id
58
- for repo_id in repo_ids
59
- }
60
- for future in as_completed(future_to_repo):
61
- commit_dates.extend(future.result())
 
 
 
 
 
62
  except Exception as e:
63
  st.warning(f"Error fetching {k}s for {username}: {str(e)}")
64
 
65
- return pd.DataFrame(commit_dates, columns=["date"]), items_with_type
 
 
 
 
 
66
 
67
- # Calendar heatmap function
68
- def make_calendar_heatmap(df, title, year, color_palette="Greens"):
69
  if df.empty:
70
  st.info(f"No {title.lower()} found for {year}.")
71
  return
 
 
72
  df["count"] = 1
73
- df = df.groupby("date").sum().reset_index()
74
  df["date"] = pd.to_datetime(df["date"])
 
 
75
  start = pd.Timestamp(f"{year}-01-01")
76
  end = pd.Timestamp(f"{year}-12-31")
77
  all_days = pd.date_range(start=start, end=end)
78
- heatmap_data = pd.DataFrame(index=all_days).assign(count=0)
79
- heatmap_data.loc[df.set_index("date").index, "count"] = df.set_index("date")["count"]
80
- heatmap_data["dow"] = heatmap_data.index.dayofweek
81
- heatmap_data["week"] = ((heatmap_data.index - start).days // 7)
82
- heatmap_data = heatmap_data.reset_index().rename(columns={"index": "date"})
 
 
 
 
 
 
 
83
  pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0)
84
- month_labels = [d.strftime("%b") for d in pd.date_range(start, end, freq="MS")]
85
- month_positions = [((pd.Timestamp(f"{year}-{i + 1}-01") - start).days // 7) for i in range(12)]
 
 
 
 
 
 
 
 
 
 
 
86
  fig, ax = plt.subplots(figsize=(12, 1.2))
87
- sns.heatmap(pivot, ax=ax, cmap=color_palette, linewidths=0.5, linecolor="white", square=True, cbar=False,
88
- yticklabels=["M", "T", "W", "T", "F", "S", "S"])
89
- ax.set_title(f"{title} ({year})", fontsize=12, pad=10)
 
 
 
 
 
 
90
  ax.set_xlabel("")
91
  ax.set_ylabel("")
92
  ax.set_xticks(month_positions)
@@ -94,6 +176,7 @@ def make_calendar_heatmap(df, title, year, color_palette="Greens"):
94
  ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8)
95
  st.pyplot(fig)
96
 
 
97
  # Sidebar
98
  with st.sidebar:
99
  st.title("👤 Contributor")
@@ -113,23 +196,57 @@ with st.sidebar:
113
  st.title("🤗 Hugging Face Contributions")
114
  if username:
115
  with st.spinner("Fetching commit data..."):
116
- all_df, all_items = get_commit_events(username, selected_year=selected_year)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  st.subheader(f"{username}'s Activity in {selected_year}")
118
- st.metric("Total Commits", len(all_df))
119
- make_calendar_heatmap(all_df, "All Commits", selected_year)
120
 
121
- # Updated Color Scheme Legend with five shades
122
- st.markdown("""
123
- <div style='text-align: center; margin-top: -10px; margin-bottom: 20px;'>
124
- <span style='font-size: 12px; margin-right: 10px;'>Less</span>
125
- <span style='display: inline-block; width: 15px; height: 15px; background-color: #f0f7f0; border: 1px solid #ccc;'></span>
126
- <span style='display: inline-block; width: 15px; height: 15px; background-color: #c6e0c6; border: 1px solid #ccc;'></span>
127
- <span style='display: inline-block; width: 15px; height: 15px; background-color: #77b577; border: 1px solid #ccc;'></span>
128
- <span style='display: inline-block; width: 15px; height: 15px; background-color: #2e6b2e; border: 1px solid #ccc;'></span>
129
- <span style='display: inline-block; width: 15px; height: 15px; background-color: #1a3c1a; border: 1px solid #ccc;'></span>
130
- <span style='font-size: 12px; margin-left: 10px;'>More</span>
131
- </div>
132
- """, unsafe_allow_html=True)
133
 
134
  # Metrics and heatmaps for each type
135
  col1, col2, col3 = st.columns(3)
@@ -139,11 +256,18 @@ if username:
139
  (col3, "space", "🚀", "Spaces")
140
  ]:
141
  with col:
142
- df_kind, _ = get_commit_events(username, kind=kind, selected_year=selected_year)
143
  try:
144
- total = len(list(getattr(api, f"list_{kind}s")(author=username)))
145
- except Exception:
146
- total = 0
147
- st.metric(f"{emoji} {label}", total)
148
- st.metric(f"Commits in {selected_year}", len(df_kind))
149
- make_calendar_heatmap(df_kind, f"{label} Commits", selected_year)
 
 
 
 
 
 
 
 
 
5
  import seaborn as sns
6
  from datetime import datetime
7
  from concurrent.futures import ThreadPoolExecutor, as_completed
8
+ from functools import lru_cache
9
+ import time
10
 
11
  st.set_page_config(page_title="HF Contributions", layout="wide")
12
  api = HfApi()
13
 
14
+
15
+ # Cache for API responses
16
+ @lru_cache(maxsize=1000)
17
+ def cached_repo_info(repo_id, repo_type):
18
+ return api.repo_info(repo_id=repo_id, repo_type=repo_type)
19
+
20
+
21
+ @lru_cache(maxsize=1000)
22
+ def cached_list_commits(repo_id, repo_type):
23
+ return list(api.list_repo_commits(repo_id=repo_id, repo_type=repo_type))
24
+
25
+
26
+ @lru_cache(maxsize=100)
27
+ def cached_list_items(username, kind):
28
+ if kind == "model":
29
+ return list(api.list_models(author=username))
30
+ elif kind == "dataset":
31
+ return list(api.list_datasets(author=username))
32
+ elif kind == "space":
33
+ return list(api.list_spaces(author=username))
34
+ return []
35
+
36
+
37
+ # Rate limiting
38
+ class RateLimiter:
39
+ def __init__(self, calls_per_second=10):
40
+ self.calls_per_second = calls_per_second
41
+ self.last_call = 0
42
+
43
+ def wait(self):
44
+ current_time = time.time()
45
+ time_since_last_call = current_time - self.last_call
46
+ if time_since_last_call < (1.0 / self.calls_per_second):
47
+ time.sleep((1.0 / self.calls_per_second) - time_since_last_call)
48
+ self.last_call = time.time()
49
+
50
+
51
+ rate_limiter = RateLimiter()
52
+
53
+
54
  # Function to fetch commits for a repository (optimized)
55
  def fetch_commits_for_repo(repo_id, repo_type, username, selected_year):
56
  try:
57
+ rate_limiter.wait()
58
  # Skip private/gated repos upfront
59
+ repo_info = cached_repo_info(repo_id, repo_type)
60
  if repo_info.private or (hasattr(repo_info, 'gated') and repo_info.gated):
61
+ return [], []
62
+
63
+ # Get initial commit date
64
+ initial_commit_date = pd.to_datetime(repo_info.created_at).tz_localize(None).date()
65
+ commit_dates = []
66
+ commit_count = 0
67
+
68
+ # Add initial commit if it's from the selected year
69
+ if initial_commit_date.year == selected_year:
70
+ commit_dates.append(initial_commit_date)
71
+ commit_count += 1
72
+
73
+ # Get all commits
74
+ commits = cached_list_commits(repo_id, repo_type)
75
+ for commit in commits:
76
+ commit_date = pd.to_datetime(commit.created_at).tz_localize(None).date()
77
+ if commit_date.year == selected_year:
78
+ commit_dates.append(commit_date)
79
+ commit_count += 1
80
+
81
+ return commit_dates, commit_count
82
  except Exception:
83
+ return [], 0
84
 
85
+
86
+ # Function to get commit events for a user (optimized)
87
  def get_commit_events(username, kind=None, selected_year=None):
88
  commit_dates = []
89
  items_with_type = []
 
91
 
92
  for k in kinds:
93
  try:
94
+ items = cached_list_items(username, k)
 
 
 
 
 
 
 
 
95
  items_with_type.extend((item, k) for item in items)
96
  repo_ids = [item.id for item in items]
97
 
98
+ # Optimized parallel fetch with chunking
99
+ chunk_size = 5 # Process 5 repos at a time
100
+ for i in range(0, len(repo_ids), chunk_size):
101
+ chunk = repo_ids[i:i + chunk_size]
102
+ with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
103
+ future_to_repo = {
104
+ executor.submit(fetch_commits_for_repo, repo_id, k, username, selected_year): repo_id
105
+ for repo_id in chunk
106
+ }
107
+ for future in as_completed(future_to_repo):
108
+ repo_commits, repo_count = future.result()
109
+ if repo_commits: # Only extend if we got commits
110
+ commit_dates.extend(repo_commits)
111
  except Exception as e:
112
  st.warning(f"Error fetching {k}s for {username}: {str(e)}")
113
 
114
+ # Create DataFrame with all commits
115
+ df = pd.DataFrame(commit_dates, columns=["date"])
116
+ if not df.empty:
117
+ df = df.drop_duplicates() # Remove any duplicate dates
118
+ return df, items_with_type
119
+
120
 
121
+ # Calendar heatmap function (optimized)
122
+ def make_calendar_heatmap(df, title, year):
123
  if df.empty:
124
  st.info(f"No {title.lower()} found for {year}.")
125
  return
126
+
127
+ # Optimize DataFrame operations
128
  df["count"] = 1
129
+ df = df.groupby("date", as_index=False).sum()
130
  df["date"] = pd.to_datetime(df["date"])
131
+
132
+ # Create date range more efficiently
133
  start = pd.Timestamp(f"{year}-01-01")
134
  end = pd.Timestamp(f"{year}-12-31")
135
  all_days = pd.date_range(start=start, end=end)
136
+
137
+ # Optimize DataFrame creation and merging
138
+ heatmap_data = pd.DataFrame({"date": all_days, "count": 0})
139
+ heatmap_data = heatmap_data.merge(df, on="date", how="left", suffixes=("", "_y"))
140
+ heatmap_data["count"] = heatmap_data["count_y"].fillna(0)
141
+ heatmap_data = heatmap_data.drop("count_y", axis=1)
142
+
143
+ # Calculate week and day of week more efficiently
144
+ heatmap_data["dow"] = heatmap_data["date"].dt.dayofweek
145
+ heatmap_data["week"] = (heatmap_data["date"] - start).dt.days // 7
146
+
147
+ # Create pivot table more efficiently
148
  pivot = heatmap_data.pivot(index="dow", columns="week", values="count").fillna(0)
149
+
150
+ # Optimize month labels calculation
151
+ month_labels = pd.date_range(start, end, freq="MS").strftime("%b")
152
+ month_positions = pd.date_range(start, end, freq="MS").map(lambda x: (x - start).days // 7)
153
+
154
+ # Create custom colormap with specific boundaries
155
+ from matplotlib.colors import ListedColormap, BoundaryNorm
156
+ colors = ['#ebedf0', '#9be9a8', '#40c463', '#30a14e', '#216e39'] # GitHub-style green colors
157
+ bounds = [0, 1, 3, 11, 31, float('inf')] # Boundaries for color transitions
158
+ cmap = ListedColormap(colors)
159
+ norm = BoundaryNorm(bounds, cmap.N)
160
+
161
+ # Create plot more efficiently
162
  fig, ax = plt.subplots(figsize=(12, 1.2))
163
+
164
+ # Convert pivot values to integers to ensure proper color mapping
165
+ pivot_int = pivot.astype(int)
166
+
167
+ # Create heatmap with explicit vmin and vmax
168
+ sns.heatmap(pivot_int, ax=ax, cmap=cmap, norm=norm, linewidths=0.5, linecolor="white",
169
+ square=True, cbar=False, yticklabels=["M", "T", "W", "T", "F", "S", "S"])
170
+
171
+ ax.set_title(f"{title}", fontsize=12, pad=10)
172
  ax.set_xlabel("")
173
  ax.set_ylabel("")
174
  ax.set_xticks(month_positions)
 
176
  ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=8)
177
  st.pyplot(fig)
178
 
179
+
180
  # Sidebar
181
  with st.sidebar:
182
  st.title("👤 Contributor")
 
196
  st.title("🤗 Hugging Face Contributions")
197
  if username:
198
  with st.spinner("Fetching commit data..."):
199
+ # Create a dictionary to store commits by type
200
+ commits_by_type = {}
201
+ commit_counts_by_type = {}
202
+
203
+ # Fetch commits for each type separately
204
+ for kind in ["model", "dataset", "space"]:
205
+ try:
206
+ items = cached_list_items(username, kind)
207
+ repo_ids = [item.id for item in items]
208
+
209
+ # Process repos in chunks
210
+ chunk_size = 5
211
+ total_commits = 0
212
+ all_commit_dates = []
213
+
214
+ for i in range(0, len(repo_ids), chunk_size):
215
+ chunk = repo_ids[i:i + chunk_size]
216
+ with ThreadPoolExecutor(max_workers=min(5, len(chunk))) as executor:
217
+ future_to_repo = {
218
+ executor.submit(fetch_commits_for_repo, repo_id, kind, username, selected_year): repo_id
219
+ for repo_id in chunk
220
+ }
221
+ for future in as_completed(future_to_repo):
222
+ repo_commits, repo_count = future.result()
223
+ if repo_commits:
224
+ all_commit_dates.extend(repo_commits)
225
+ total_commits += repo_count
226
+
227
+ commits_by_type[kind] = all_commit_dates
228
+ commit_counts_by_type[kind] = total_commits
229
+
230
+ except Exception as e:
231
+ st.warning(f"Error fetching {kind}s for {username}: {str(e)}")
232
+ commits_by_type[kind] = []
233
+ commit_counts_by_type[kind] = 0
234
+
235
+ # Calculate total commits across all types
236
+ total_commits = sum(commit_counts_by_type.values())
237
+
238
  st.subheader(f"{username}'s Activity in {selected_year}")
239
+ st.metric("Total Commits", total_commits)
 
240
 
241
+ # Create DataFrame for all commits
242
+ all_commits = []
243
+ for commits in commits_by_type.values():
244
+ all_commits.extend(commits)
245
+ all_df = pd.DataFrame(all_commits, columns=["date"])
246
+ if not all_df.empty:
247
+ all_df = all_df.drop_duplicates() # Remove any duplicate dates
248
+
249
+ make_calendar_heatmap(all_df, "All Commits", selected_year)
 
 
 
250
 
251
  # Metrics and heatmaps for each type
252
  col1, col2, col3 = st.columns(3)
 
256
  (col3, "space", "🚀", "Spaces")
257
  ]:
258
  with col:
 
259
  try:
260
+ total = len(cached_list_items(username, kind))
261
+ commits = commits_by_type.get(kind, [])
262
+ commit_count = commit_counts_by_type.get(kind, 0)
263
+ df_kind = pd.DataFrame(commits, columns=["date"])
264
+ if not df_kind.empty:
265
+ df_kind = df_kind.drop_duplicates() # Remove any duplicate dates
266
+ st.metric(f"{emoji} {label}", total)
267
+ st.metric(f"Commits in {selected_year}", commit_count)
268
+ make_calendar_heatmap(df_kind, f"{label} Commits", selected_year)
269
+ except Exception as e:
270
+ st.warning(f"Error processing {label}: {str(e)}")
271
+ st.metric(f"{emoji} {label}", 0)
272
+ st.metric(f"Commits in {selected_year}", 0)
273
+ make_calendar_heatmap(pd.DataFrame(), f"{label} Commits", selected_year)