list-of-demos-test / demo_list.py
hysts's picture
hysts HF staff
Set colors for sleep time
cc8cf29
raw
history blame
6.14 kB
import datetime
import operator
import pathlib
import pandas as pd
import tqdm.auto
import yaml
from huggingface_hub import HfApi
repo_dir = pathlib.Path(__file__).parent
class DemoList:
COLUMN_INFO = [
['status', 'markdown'],
['hardware', 'markdown'],
['title', 'markdown'],
['owner', 'markdown'],
['arxiv', 'markdown'],
['github', 'markdown'],
['likes', 'number'],
['tags', 'str'],
['last_modified', 'str'],
['created', 'str'],
['sdk', 'markdown'],
['sdk_version', 'str'],
['suggested_hardware', 'markdown'],
['sleep_time', 'markdown'],
['replicas', 'markdown'],
]
TO_TIME_STR = {
-1: 'null',
300: '5 minutes',
600: '10 minutes',
900: '15 minutes',
1800: '30 minutes',
3600: '1 hour',
36000: '10 hours',
86400: '24 hours',
172800: '48 hours',
259200: '72 hours',
604800: '1 week',
}
def __init__(self):
self.api = HfApi()
self._raw_data = self.load_data()
self.df_raw = pd.DataFrame(self._raw_data)
self.df = self.prettify_df()
@property
def column_names(self):
return list(map(operator.itemgetter(0), self.COLUMN_INFO))
@property
def column_datatype(self):
return list(map(operator.itemgetter(1), self.COLUMN_INFO))
@staticmethod
def get_space_id(url: str) -> str:
return '/'.join(url.split('/')[-2:])
def load_data(self) -> list[dict]:
with open(repo_dir / 'list.yaml') as f:
data = yaml.safe_load(f)
res = []
for url in tqdm.auto.tqdm(list(data)):
space_id = self.get_space_id(url)
space_info = self.api.space_info(repo_id=space_id)
card = space_info.cardData
info = data[url]
for tag in ['arxiv', 'github', 'tags']:
if tag not in info:
info[tag] = []
info['url'] = url
info['owner'] = space_id.split('/')[0]
info['title'] = card['title']
info['sdk'] = card['sdk']
info['sdk_version'] = card.get('sdk_version', '')
info['likes'] = space_info.likes
info['last_modified'] = space_info.lastModified
info['status'] = space_info.runtime['stage']
info['suggested_hardware'] = card.get('suggested_hardware', '')
info['hardware'] = space_info.runtime['hardware']['current']
if info['hardware'] is None:
info['hardware'] = space_info.runtime['hardware']['requested']
if info['hardware'] == 'cpu-basic':
info['sleep_time'] = 172800
else:
info['sleep_time'] = space_info.runtime['gcTimeout'] or -1
resources = space_info.runtime['resources']
info['replicas'] = -1 if resources is None else resources[
'replicas']
res.append(info)
return res
def get_arxiv_link(self, links: list[str]) -> str:
links = [self.create_link(link.split('/')[-1], link) for link in links]
return '\n'.join(links)
def get_github_link(self, links: list[str]) -> str:
links = [self.create_link('github', link) for link in links]
return '\n'.join(links)
def get_tag_list(self, tags: list[str]) -> str:
return ', '.join(tags)
@staticmethod
def create_link(text: str, url: str) -> str:
return f'<a href={url} target="_blank">{text}</a>'
def to_div(self, text: str | None, category_name: str) -> str:
if text is None:
text = ''
class_name = f'{category_name}-{text.lower()}'
return f'<div class="{class_name}">{text}</div>'
@staticmethod
def format_timestamp(timestamp: str) -> str:
s = datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.000Z')
return s.strftime('%Y/%m/%d %H:%M:%S')
@staticmethod
def add_div_tag_to_replicas(replicas: int) -> str:
if replicas == -1:
return ''
if replicas == 1:
return '1'
return f'<div class="multiple-replicas">{replicas}</div>'
@staticmethod
def add_div_tag_to_sleep_time(sleep_time_s: str, hardware: str) -> str:
if hardware == 'cpu-basic':
return f'<div class="sleep-time-cpu-basic">{sleep_time_s}</div>'
s = sleep_time_s.replace(' ', '-')
return f'<div class="sleep-time-{s}">{sleep_time_s}</div>'
def prettify_df(self) -> pd.DataFrame:
new_rows = []
for _, row in self.df_raw.copy().iterrows():
new_row = {
'status':
self.to_div(row.status, 'status'),
'hardware':
self.to_div(row.hardware, 'hardware'),
'suggested_hardware':
self.to_div(row.suggested_hardware, 'hardware'),
'title':
self.create_link(row.title, row.url),
'owner':
self.create_link(row.owner,
f'https://huggingface.co/{row.owner}'),
'arxiv':
self.get_arxiv_link(row.arxiv),
'github':
self.get_github_link(row.github),
'likes':
row.likes,
'tags':
self.get_tag_list(row.tags),
'last_modified':
self.format_timestamp(row.last_modified),
'created':
self.format_timestamp(row.created),
'sdk':
self.to_div(row.sdk, 'sdk'),
'sdk_version':
row.sdk_version,
'sleep_time':
self.add_div_tag_to_sleep_time(
self.TO_TIME_STR[row.sleep_time], row.hardware),
'replicas':
self.add_div_tag_to_replicas(row.replicas),
}
new_rows.append(new_row)
df = pd.DataFrame(new_rows).loc[:, self.column_names]
return df