File size: 4,204 Bytes
18c5186
5092824
 
 
 
76036fd
8235d81
 
6d5352f
5092824
8235d81
5092824
18c5186
 
4810e2a
 
 
 
 
 
18c5186
8235d81
5092824
6d5352f
8235d81
5092824
 
 
8235d81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76036fd
5092824
6d5352f
5092824
 
 
4810e2a
5092824
 
 
18c5186
8235d81
76036fd
8235d81
5092824
8235d81
6d5352f
5092824
 
 
 
 
8235d81
 
 
 
 
 
2dc5d96
8235d81
5092824
 
8235d81
 
 
 
 
 
 
 
5092824
 
 
 
 
 
 
 
 
 
 
 
 
 
8235d81
76036fd
5092824
 
 
8235d81
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
import pandas as pd


class PaperList:
    def __init__(self) -> None:
        self.organization_name = "ICML2023"
        self.table = pd.read_csv("papers.csv")
        self._preprocess_table()

        self.table_header = """
            <tr>
                <td width="38%">Title</td>
                <td width="25%">Authors</td>
                <td width="5%">arXiv</td>
                <td width="5%">GitHub</td>
                <td width="7%">Paper pages</td>
                <td width="5%">Spaces</td>
                <td width="5%">Models</td>
                <td width="5%">Datasets</td>
                <td width="5%">Claimed</td>
            </tr>"""

    def _preprocess_table(self) -> None:
        self.table["title_lowercase"] = self.table.title.str.lower()

        rows = []
        for row in self.table.itertuples():
            title = f"{row.title}"
            arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if isinstance(row.arxiv, str) else ""
            github = f'<a href="{row.github}" target="_blank">GitHub</a>' if isinstance(row.github, str) else ""
            hf_paper = (
                f'<a href="{row.hf_paper}" target="_blank">Paper page</a>' if isinstance(row.hf_paper, str) else ""
            )
            hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if isinstance(row.hf_space, str) else ""
            hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(row.hf_model, str) else ""
            hf_dataset = (
                f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(row.hf_dataset, str) else ""
            )
            author_linked = "✅" if ~np.isnan(row.n_linked_authors) and row.n_linked_authors > 0 else ""
            n_linked_authors = "" if np.isnan(row.n_linked_authors) else int(row.n_linked_authors)
            n_authors = "" if np.isnan(row.n_authors) else int(row.n_authors)
            claimed_paper = "" if n_linked_authors == "" else f"{n_linked_authors}/{n_authors} {author_linked}"
            new_row = f"""
                <tr>
                    <td>{title}</td>
                    <td>{row.authors}</td>
                    <td>{arxiv}</td>
                    <td>{github}</td>
                    <td>{hf_paper}</td>
                    <td>{hf_space}</td>
                    <td>{hf_model}</td>
                    <td>{hf_dataset}</td>
                    <td>{claimed_paper}</td>
                </tr>"""
            rows.append(new_row)
        self.table["html_table_content"] = rows

    def render(self, search_query: str, case_sensitive: bool, filter_names: list[str]) -> tuple[str, str]:
        df = self.table
        if search_query:
            if case_sensitive:
                df = df[df.title.str.contains(search_query)]
            else:
                df = df[df.title_lowercase.str.contains(search_query.lower())]
        has_arxiv = "arXiv" in filter_names
        has_github = "GitHub" in filter_names
        has_hf_space = "Space" in filter_names
        has_hf_model = "Model" in filter_names
        has_hf_dataset = "Dataset" in filter_names
        df = self.filter_table(df, has_arxiv, has_github, has_hf_space, has_hf_model, has_hf_dataset)
        n_claimed = len(df[df.n_linked_authors > 0])
        return f"{len(df)} ({n_claimed} claimed)", self.to_html(df, self.table_header)

    @staticmethod
    def filter_table(
        df: pd.DataFrame,
        has_arxiv: bool,
        has_github: bool,
        has_hf_space: bool,
        has_hf_model: bool,
        has_hf_dataset: bool,
    ) -> pd.DataFrame:
        if has_arxiv:
            df = df[~df.arxiv.isna()]
        if has_github:
            df = df[~df.github.isna()]
        if has_hf_space:
            df = df[~df.hf_space.isna()]
        if has_hf_model:
            df = df[~df.hf_model.isna()]
        if has_hf_dataset:
            df = df[~df.hf_dataset.isna()]
        return df

    @staticmethod
    def to_html(df: pd.DataFrame, table_header: str) -> str:
        table_data = "".join(df.html_table_content)
        return f"""
        <table>
            {table_header}
            {table_data}
        </table>"""