Spaces:

librarian-bots
/

MetaRefine

Runtime error

App Files Files Community

davanstrien HF Staff commited on Jun 30, 2023

Commit

1771fc5

1 Parent(s): 8b71d33

remove diskcache import

Browse files

Files changed (1) hide show

app.py +35 -30

app.py CHANGED Viewed

@@ -1,36 +1,27 @@
-import os
 import copy
 from dataclasses import asdict, dataclass
 from functools import lru_cache
 from json import JSONDecodeError
 from typing import Any, Dict, List, Optional, Union
-from huggingface_hub.utils import GatedRepoError
-import gradio as gr
-from requests.exceptions import HTTPError
-import requests
-from diskcache import Cache
-from huggingface_hub import (
-    HfApi,
-    hf_hub_url,
-    list_repo_commits,
-    logging,
-    model_info,
-)
-from tqdm.auto import tqdm
-from tqdm.contrib.concurrent import thread_map
 import backoff
-from huggingface_hub.utils import EntryNotFoundError, disable_progress_bars
 import httpx
 import orjson
-import httpx
-from functools import lru_cache
 from httpx import Client
-from httpx_caching import CachingClient
-from httpx_caching import OneDayCacheHeuristic
-from cachetools import cached, TTLCache
-from datetime import timedelta
-from datetime import datetime
 cache = TTLCache(maxsize=500_000, ttl=timedelta(hours=24), timer=datetime.now)
@@ -414,6 +405,7 @@ def filter_search_results(
     min_score=None,
     min_model_card_length=None,
 ):  # TODO make code more intuitive
     results = thread_map(parse_single_result, results)
     for i, parsed_result in tqdm(enumerate(results)):
         # parsed_result = parse_single_result(result)
@@ -441,12 +433,12 @@ def filter_search_results(
 def sort_search_results(
     filtered_search_results,
-    first_sort="metadata_score",
-    second_sort="original_position",  # TODO expose these in results
 ):
     return sorted(
         list(filtered_search_results),
-        key=lambda x: (x[first_sort], x[second_sort]),
         reverse=True,
     )
@@ -527,22 +519,35 @@ def search_hub(query: str, min_score=None, min_model_card_length=None):
 with gr.Blocks() as demo:
-    with gr.Tab("Hub search with metadata quality filter"):
         gr.Markdown("#  &#129303; Hub model search with metadata quality filters")
         with gr.Row():
             with gr.Column():
                 query = gr.Textbox("x-ray", label="Search query")
             with gr.Column():
                 button = gr.Button("Search")
                 with gr.Row():
-                    # gr.Checkbox(False, label="Must have licence?")
                     mim_model_card_length = gr.Number(
                         None, label="Minimum model card length"
                     )
                     min_metadata_score = gr.Slider(0, label="Minimum metadata score")
         filter_results = gr.Markdown("Filter results vs original search")
         results_markdown = gr.Markdown("Search results")
         button.click(
             search_hub,
             [query, min_metadata_score, mim_model_card_length],

 import copy
+import os
 from dataclasses import asdict, dataclass
+from datetime import datetime, timedelta
 from functools import lru_cache
 from json import JSONDecodeError
 from typing import Any, Dict, List, Optional, Union
 import backoff
+import gradio as gr
 import httpx
 import orjson
+import requests
+from cachetools import TTLCache, cached
 from httpx import Client
+from httpx_caching import CachingClient, OneDayCacheHeuristic
+# from diskcache import Cache
+from huggingface_hub import (HfApi, hf_hub_url, list_repo_commits, logging,
+                             model_info)
+from huggingface_hub.utils import (EntryNotFoundError, GatedRepoError,
+                                   disable_progress_bars)
+from requests.exceptions import HTTPError
+from tqdm.auto import tqdm
+from tqdm.contrib.concurrent import thread_map
 cache = TTLCache(maxsize=500_000, ttl=timedelta(hours=24), timer=datetime.now)
     min_score=None,
     min_model_card_length=None,
 ):  # TODO make code more intuitive
+    # TODO setup filters as separate functions and chain results
     results = thread_map(parse_single_result, results)
     for i, parsed_result in tqdm(enumerate(results)):
         # parsed_result = parse_single_result(result)
 def sort_search_results(
     filtered_search_results,
+    first_sort_key="metadata_score",
+    second_sort_key="original_position",  # TODO expose these in results
 ):
     return sorted(
         list(filtered_search_results),
+        key=lambda x: (x[first_sort_key], x[second_sort_key]),
         reverse=True,
     )
 with gr.Blocks() as demo:
+    with gr.Tab("Hub Search with metadata quality filter"):
         gr.Markdown("#  &#129303; Hub model search with metadata quality filters")
+        gr.Markdown(
+            """This search tool relies on the full-text search API.
+                Your search is passed to this API and the returned models are assessed for metadata quality. See the next tab in the app for more info on how this is calculated.
+                If you don't specify any minimum requirements you will get back your results with metadata quality info
+                for each result. The results are ordered by:
+                - Metadata quality i.e. a model with 80% metadata quality will rank higher than one with 75%
+                - Original search order i.e. if two models have the same metadata quality the one that appeared first in the original search will rank higher.
+                If there is interest in this app I will expose more options for filtering and sorting results.
+                    """
+        )
         with gr.Row():
             with gr.Column():
                 query = gr.Textbox("x-ray", label="Search query")
             with gr.Column():
                 button = gr.Button("Search")
                 with gr.Row():
+                    # literal_search = gr.Checkbox(False, label="Literal_search")
+                    # TODO add option for exact matching i.e. phrase matching
+                    # gr.Checkbox(False, label="Must have license?")
                     mim_model_card_length = gr.Number(
                         None, label="Minimum model card length"
                     )
                     min_metadata_score = gr.Slider(0, label="Minimum metadata score")
         filter_results = gr.Markdown("Filter results vs original search")
         results_markdown = gr.Markdown("Search results")
         button.click(
             search_hub,
             [query, min_metadata_score, mim_model_card_length],