|
|
|
|
|
import functools as func |
|
import glob |
|
import re |
|
from os.path import basename, splitext |
|
|
|
import numpy as np |
|
import titlecase |
|
|
|
|
|
def title2anchor(name): |
|
return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-', |
|
name.strip().lower())).strip('-') |
|
|
|
|
|
|
|
|
|
files = sorted(glob.glob('*_models.md')) |
|
|
|
stats = [] |
|
|
|
for f in files: |
|
with open(f, 'r') as content_file: |
|
content = content_file.read() |
|
|
|
|
|
|
|
expr = r'(^## \s*?.*?\s+?)>\s*?(\[.*?\]\(.*?\))' |
|
content = re.sub(expr, r'\1\2', content, flags=re.MULTILINE) |
|
with open(f, 'w') as content_file: |
|
content_file.write(content) |
|
|
|
|
|
title = content.split('\n')[0].replace('#', '') |
|
|
|
|
|
exclude_papertype = ['ABSTRACT', 'IMAGE'] |
|
exclude_expr = ''.join(f'(?!{s})' for s in exclude_papertype) |
|
expr = rf'<!-- \[{exclude_expr}([A-Z]+?)\] -->'\ |
|
r'\s*\n.*?\btitle\s*=\s*{(.*?)}' |
|
papers = set( |
|
(papertype, titlecase.titlecase(paper.lower().strip())) |
|
for (papertype, paper) in re.findall(expr, content, re.DOTALL)) |
|
print(papers) |
|
|
|
revcontent = '\n'.join(list(reversed(content.splitlines()))) |
|
paperlinks = {} |
|
for _, p in papers: |
|
q = p.replace('\\', '\\\\').replace('?', '\\?') |
|
paper_link = title2anchor( |
|
re.search( |
|
rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n', |
|
revcontent, re.DOTALL | re.IGNORECASE).group(1)) |
|
paperlinks[p] = f'[{p}]({splitext(basename(f))[0]}.html#{paper_link})' |
|
paperlist = '\n'.join( |
|
sorted(f' - [{t}] {paperlinks[x]}' for t, x in papers)) |
|
|
|
configs = set(x.lower().strip() |
|
for x in re.findall(r'https.*configs/.*\.py', content)) |
|
|
|
|
|
ckpts = set(x.lower().strip() |
|
for x in re.findall(r'https://download.*\.pth', content) |
|
if 'mmocr' in x) |
|
|
|
statsmsg = f""" |
|
## [{title}]({f}) |
|
|
|
* 模型权重文件数量: {len(ckpts)} |
|
* 配置文件数量: {len(configs)} |
|
* 论文数量: {len(papers)} |
|
{paperlist} |
|
|
|
""" |
|
|
|
stats.append((papers, configs, ckpts, statsmsg)) |
|
|
|
allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _, _ in stats]) |
|
allconfigs = func.reduce(lambda a, b: a.union(b), [c for _, c, _, _ in stats]) |
|
allckpts = func.reduce(lambda a, b: a.union(b), [c for _, _, c, _ in stats]) |
|
msglist = '\n'.join(x for _, _, _, x in stats) |
|
|
|
papertypes, papercounts = np.unique([t for t, _ in allpapers], |
|
return_counts=True) |
|
countstr = '\n'.join( |
|
[f' - {t}: {c}' for t, c in zip(papertypes, papercounts)]) |
|
|
|
modelzoo = f""" |
|
# 统计数据 |
|
|
|
* 模型权重文件数量: {len(allckpts)} |
|
* 配置文件数量: {len(allconfigs)} |
|
* 论文数量: {len(allpapers)} |
|
{countstr} |
|
|
|
{msglist} |
|
""" |
|
|
|
with open('modelzoo.md', 'w') as f: |
|
f.write(modelzoo) |
|
|