Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		Tristan Thrush
		
	commited on
		
		
					Commit 
							
							·
						
						1060543
	
1
								Parent(s):
							
							3ee9aeb
								
rolled back latest changes except requirement versions
Browse files
    	
        app.py
    CHANGED
    
    | @@ -130,21 +130,8 @@ dataframe = get_data_wrapper() | |
| 130 |  | 
| 131 | 
             
            st.markdown("# 🤗 Leaderboards")
         | 
| 132 |  | 
| 133 | 
            -
            query_params = st.experimental_get_query_params()
         | 
| 134 | 
            -
            if "first_query_params" not in st.session_state:
         | 
| 135 | 
            -
                st.session_state.first_query_params = query_params
         | 
| 136 | 
            -
            first_query_params = st.session_state.first_query_params
         | 
| 137 | 
            -
             | 
| 138 | 
            -
            default_task = first_query_params.get("task", [None])[0]
         | 
| 139 | 
            -
            default_only_verified = bool(int(first_query_params.get("only_verified", [0])[0]))
         | 
| 140 | 
            -
            print(default_only_verified)
         | 
| 141 | 
            -
            default_dataset = first_query_params.get("dataset", [None])[0]
         | 
| 142 | 
            -
            default_split = first_query_params.get("split", [None])[0]
         | 
| 143 | 
            -
            default_config = first_query_params.get("config", [None])[0]
         | 
| 144 | 
            -
             | 
| 145 | 
             
            only_verified_results = st.sidebar.checkbox(
         | 
| 146 | 
             
                "Filter for Verified Results",
         | 
| 147 | 
            -
                value=default_only_verified,
         | 
| 148 | 
             
                help="Select this checkbox if you want to see only results produced by the Hugging Face model evaluator, and no self-reported results."
         | 
| 149 | 
             
            )
         | 
| 150 |  | 
| @@ -152,12 +139,10 @@ selectable_tasks = list(set(dataframe.pipeline_tag)) | |
| 152 | 
             
            if None in selectable_tasks:
         | 
| 153 | 
             
                selectable_tasks.remove(None)
         | 
| 154 | 
             
            selectable_tasks.sort(key=lambda name: name.lower())
         | 
| 155 | 
            -
            selectable_tasks = ["-any-"] + selectable_tasks
         | 
| 156 |  | 
| 157 | 
             
            task = st.sidebar.selectbox(
         | 
| 158 | 
             
                "Task",
         | 
| 159 | 
            -
                selectable_tasks,
         | 
| 160 | 
            -
                index=(selectable_tasks).index(default_task) if default_task in selectable_tasks else 0,
         | 
| 161 | 
             
                help="Filter the selectable datasets by task. Leave as \"-any-\" to see all selectable datasets."
         | 
| 162 | 
             
            )
         | 
| 163 |  | 
| @@ -168,6 +153,16 @@ selectable_datasets = ["-any-"] + sorted(list(set(dataframe.dataset.tolist())), | |
| 168 | 
             
            if "" in selectable_datasets:
         | 
| 169 | 
             
                selectable_datasets.remove("")
         | 
| 170 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 171 | 
             
            dataset = st.sidebar.selectbox(
         | 
| 172 | 
             
                "Dataset",
         | 
| 173 | 
             
                selectable_datasets,
         | 
| @@ -177,7 +172,7 @@ dataset = st.sidebar.selectbox( | |
| 177 |  | 
| 178 | 
             
            dataframe = dataframe[dataframe.only_verified == only_verified_results]
         | 
| 179 |  | 
| 180 | 
            -
            st.experimental_set_query_params(**{"dataset": [dataset] | 
| 181 |  | 
| 182 | 
             
            if dataset != "-any-":
         | 
| 183 | 
             
                dataset_df = dataframe[dataframe.dataset == dataset]
         | 
| @@ -194,7 +189,6 @@ if len(dataset_df) > 0: | |
| 194 | 
             
                    config = st.sidebar.selectbox(
         | 
| 195 | 
             
                        "Config",
         | 
| 196 | 
             
                        selectable_configs,
         | 
| 197 | 
            -
                        index=selectable_configs.index(default_config) if default_config in selectable_configs else 0,
         | 
| 198 | 
             
                        help="Filter the results on the current leaderboard by the dataset config. Self-reported results might not report the config, which is why \"-unspecified-\" is an option."
         | 
| 199 | 
             
                    )
         | 
| 200 | 
             
                    dataset_df = dataset_df[dataset_df.config == config]
         | 
| @@ -203,12 +197,8 @@ if len(dataset_df) > 0: | |
| 203 | 
             
                    split = st.sidebar.selectbox(
         | 
| 204 | 
             
                        "Split",
         | 
| 205 | 
             
                        selectable_splits,
         | 
| 206 | 
            -
                        index=selectable_splits.index(default_split) if default_split in selectable_splits else 0,
         | 
| 207 | 
             
                        help="Filter the results on the current leaderboard by the dataset split. Self-reported results might not report the split, which is why \"-unspecified-\" is an option."
         | 
| 208 | 
             
                    )
         | 
| 209 | 
            -
             | 
| 210 | 
            -
                    st.experimental_set_query_params(**{"dataset": [dataset], "only_verified": [int(only_verified_results)], "task": [task], "config": [config], "split": [split]})
         | 
| 211 | 
            -
             | 
| 212 | 
             
                    dataset_df = dataset_df[dataset_df.split == split]
         | 
| 213 |  | 
| 214 | 
             
                not_selectable_metrics = ["model_id", "dataset", "split", "config", "pipeline_tag", "only_verified"]
         | 
| @@ -292,4 +282,4 @@ if len(dataset_df) > 0: | |
| 292 | 
             
            else:
         | 
| 293 | 
             
                st.markdown(
         | 
| 294 | 
             
                    "No " + ("verified" if only_verified_results else "unverified") + " results to display. Try toggling the verified results filter."
         | 
| 295 | 
            -
                )
         | 
|  | |
| 130 |  | 
| 131 | 
             
            st.markdown("# 🤗 Leaderboards")
         | 
| 132 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 133 | 
             
            only_verified_results = st.sidebar.checkbox(
         | 
| 134 | 
             
                "Filter for Verified Results",
         | 
|  | |
| 135 | 
             
                help="Select this checkbox if you want to see only results produced by the Hugging Face model evaluator, and no self-reported results."
         | 
| 136 | 
             
            )
         | 
| 137 |  | 
|  | |
| 139 | 
             
            if None in selectable_tasks:
         | 
| 140 | 
             
                selectable_tasks.remove(None)
         | 
| 141 | 
             
            selectable_tasks.sort(key=lambda name: name.lower())
         | 
|  | |
| 142 |  | 
| 143 | 
             
            task = st.sidebar.selectbox(
         | 
| 144 | 
             
                "Task",
         | 
| 145 | 
            +
                ["-any-"] + selectable_tasks,
         | 
|  | |
| 146 | 
             
                help="Filter the selectable datasets by task. Leave as \"-any-\" to see all selectable datasets."
         | 
| 147 | 
             
            )
         | 
| 148 |  | 
|  | |
| 153 | 
             
            if "" in selectable_datasets:
         | 
| 154 | 
             
                selectable_datasets.remove("")
         | 
| 155 |  | 
| 156 | 
            +
            query_params = st.experimental_get_query_params()
         | 
| 157 | 
            +
            if "first_query_params" not in st.session_state:
         | 
| 158 | 
            +
                st.session_state.first_query_params = query_params
         | 
| 159 | 
            +
            first_query_params = st.session_state.first_query_params
         | 
| 160 | 
            +
             | 
| 161 | 
            +
            default_dataset = None
         | 
| 162 | 
            +
            if "dataset" in first_query_params:
         | 
| 163 | 
            +
                if len(first_query_params["dataset"]) > 0 and first_query_params["dataset"][0] in selectable_datasets:
         | 
| 164 | 
            +
                    default_dataset = first_query_params["dataset"][0]
         | 
| 165 | 
            +
             | 
| 166 | 
             
            dataset = st.sidebar.selectbox(
         | 
| 167 | 
             
                "Dataset",
         | 
| 168 | 
             
                selectable_datasets,
         | 
|  | |
| 172 |  | 
| 173 | 
             
            dataframe = dataframe[dataframe.only_verified == only_verified_results]
         | 
| 174 |  | 
| 175 | 
            +
            st.experimental_set_query_params(**{"dataset": [dataset]})
         | 
| 176 |  | 
| 177 | 
             
            if dataset != "-any-":
         | 
| 178 | 
             
                dataset_df = dataframe[dataframe.dataset == dataset]
         | 
|  | |
| 189 | 
             
                    config = st.sidebar.selectbox(
         | 
| 190 | 
             
                        "Config",
         | 
| 191 | 
             
                        selectable_configs,
         | 
|  | |
| 192 | 
             
                        help="Filter the results on the current leaderboard by the dataset config. Self-reported results might not report the config, which is why \"-unspecified-\" is an option."
         | 
| 193 | 
             
                    )
         | 
| 194 | 
             
                    dataset_df = dataset_df[dataset_df.config == config]
         | 
|  | |
| 197 | 
             
                    split = st.sidebar.selectbox(
         | 
| 198 | 
             
                        "Split",
         | 
| 199 | 
             
                        selectable_splits,
         | 
|  | |
| 200 | 
             
                        help="Filter the results on the current leaderboard by the dataset split. Self-reported results might not report the split, which is why \"-unspecified-\" is an option."
         | 
| 201 | 
             
                    )
         | 
|  | |
|  | |
|  | |
| 202 | 
             
                    dataset_df = dataset_df[dataset_df.split == split]
         | 
| 203 |  | 
| 204 | 
             
                not_selectable_metrics = ["model_id", "dataset", "split", "config", "pipeline_tag", "only_verified"]
         | 
|  | |
| 282 | 
             
            else:
         | 
| 283 | 
             
                st.markdown(
         | 
| 284 | 
             
                    "No " + ("verified" if only_verified_results else "unverified") + " results to display. Try toggling the verified results filter."
         | 
| 285 | 
            +
                )
         | 
    	
        utils.py
    CHANGED
    
    | @@ -13,7 +13,6 @@ metric_ranges = { | |
| 13 | 
             
                "accuracy": (0,1),
         | 
| 14 | 
             
                "precision": (0,1),
         | 
| 15 | 
             
                "recall": (0,1),
         | 
| 16 | 
            -
                "f1": (0,1),
         | 
| 17 | 
             
                "macro f1": (0,1),
         | 
| 18 | 
             
                "micro f1": (0,1),
         | 
| 19 | 
             
                "pearson": (-1, 1),
         | 
|  | |
| 13 | 
             
                "accuracy": (0,1),
         | 
| 14 | 
             
                "precision": (0,1),
         | 
| 15 | 
             
                "recall": (0,1),
         | 
|  | |
| 16 | 
             
                "macro f1": (0,1),
         | 
| 17 | 
             
                "micro f1": (0,1),
         | 
| 18 | 
             
                "pearson": (-1, 1),
         | 
