Spaces:
Runtime error
Runtime error
Commit
·
1771fc5
1
Parent(s):
8b71d33
remove diskcache import
Browse files
app.py
CHANGED
@@ -1,36 +1,27 @@
|
|
1 |
-
import os
|
2 |
import copy
|
|
|
3 |
from dataclasses import asdict, dataclass
|
|
|
4 |
from functools import lru_cache
|
5 |
from json import JSONDecodeError
|
6 |
from typing import Any, Dict, List, Optional, Union
|
7 |
-
|
8 |
-
import gradio as gr
|
9 |
-
from requests.exceptions import HTTPError
|
10 |
-
import requests
|
11 |
-
from diskcache import Cache
|
12 |
-
from huggingface_hub import (
|
13 |
-
HfApi,
|
14 |
-
hf_hub_url,
|
15 |
-
list_repo_commits,
|
16 |
-
logging,
|
17 |
-
model_info,
|
18 |
-
)
|
19 |
-
from tqdm.auto import tqdm
|
20 |
-
from tqdm.contrib.concurrent import thread_map
|
21 |
import backoff
|
22 |
-
|
23 |
import httpx
|
24 |
import orjson
|
25 |
-
import
|
26 |
-
from
|
27 |
from httpx import Client
|
28 |
-
from httpx_caching import CachingClient
|
29 |
-
from
|
30 |
-
|
31 |
-
|
32 |
-
from
|
33 |
-
|
|
|
|
|
|
|
34 |
|
35 |
cache = TTLCache(maxsize=500_000, ttl=timedelta(hours=24), timer=datetime.now)
|
36 |
|
@@ -414,6 +405,7 @@ def filter_search_results(
|
|
414 |
min_score=None,
|
415 |
min_model_card_length=None,
|
416 |
): # TODO make code more intuitive
|
|
|
417 |
results = thread_map(parse_single_result, results)
|
418 |
for i, parsed_result in tqdm(enumerate(results)):
|
419 |
# parsed_result = parse_single_result(result)
|
@@ -441,12 +433,12 @@ def filter_search_results(
|
|
441 |
|
442 |
def sort_search_results(
|
443 |
filtered_search_results,
|
444 |
-
|
445 |
-
|
446 |
):
|
447 |
return sorted(
|
448 |
list(filtered_search_results),
|
449 |
-
key=lambda x: (x[
|
450 |
reverse=True,
|
451 |
)
|
452 |
|
@@ -527,22 +519,35 @@ def search_hub(query: str, min_score=None, min_model_card_length=None):
|
|
527 |
|
528 |
|
529 |
with gr.Blocks() as demo:
|
530 |
-
with gr.Tab("Hub
|
531 |
gr.Markdown("# 🤗 Hub model search with metadata quality filters")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
with gr.Row():
|
533 |
with gr.Column():
|
534 |
query = gr.Textbox("x-ray", label="Search query")
|
535 |
with gr.Column():
|
536 |
button = gr.Button("Search")
|
537 |
with gr.Row():
|
538 |
-
# gr.Checkbox(False, label="
|
|
|
|
|
539 |
mim_model_card_length = gr.Number(
|
540 |
None, label="Minimum model card length"
|
541 |
)
|
542 |
min_metadata_score = gr.Slider(0, label="Minimum metadata score")
|
543 |
filter_results = gr.Markdown("Filter results vs original search")
|
544 |
results_markdown = gr.Markdown("Search results")
|
545 |
-
|
546 |
button.click(
|
547 |
search_hub,
|
548 |
[query, min_metadata_score, mim_model_card_length],
|
|
|
|
|
1 |
import copy
|
2 |
+
import os
|
3 |
from dataclasses import asdict, dataclass
|
4 |
+
from datetime import datetime, timedelta
|
5 |
from functools import lru_cache
|
6 |
from json import JSONDecodeError
|
7 |
from typing import Any, Dict, List, Optional, Union
|
8 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
import backoff
|
10 |
+
import gradio as gr
|
11 |
import httpx
|
12 |
import orjson
|
13 |
+
import requests
|
14 |
+
from cachetools import TTLCache, cached
|
15 |
from httpx import Client
|
16 |
+
from httpx_caching import CachingClient, OneDayCacheHeuristic
|
17 |
+
# from diskcache import Cache
|
18 |
+
from huggingface_hub import (HfApi, hf_hub_url, list_repo_commits, logging,
|
19 |
+
model_info)
|
20 |
+
from huggingface_hub.utils import (EntryNotFoundError, GatedRepoError,
|
21 |
+
disable_progress_bars)
|
22 |
+
from requests.exceptions import HTTPError
|
23 |
+
from tqdm.auto import tqdm
|
24 |
+
from tqdm.contrib.concurrent import thread_map
|
25 |
|
26 |
cache = TTLCache(maxsize=500_000, ttl=timedelta(hours=24), timer=datetime.now)
|
27 |
|
|
|
405 |
min_score=None,
|
406 |
min_model_card_length=None,
|
407 |
): # TODO make code more intuitive
|
408 |
+
# TODO setup filters as separate functions and chain results
|
409 |
results = thread_map(parse_single_result, results)
|
410 |
for i, parsed_result in tqdm(enumerate(results)):
|
411 |
# parsed_result = parse_single_result(result)
|
|
|
433 |
|
434 |
def sort_search_results(
|
435 |
filtered_search_results,
|
436 |
+
first_sort_key="metadata_score",
|
437 |
+
second_sort_key="original_position", # TODO expose these in results
|
438 |
):
|
439 |
return sorted(
|
440 |
list(filtered_search_results),
|
441 |
+
key=lambda x: (x[first_sort_key], x[second_sort_key]),
|
442 |
reverse=True,
|
443 |
)
|
444 |
|
|
|
519 |
|
520 |
|
521 |
with gr.Blocks() as demo:
|
522 |
+
with gr.Tab("Hub Search with metadata quality filter"):
|
523 |
gr.Markdown("# 🤗 Hub model search with metadata quality filters")
|
524 |
+
gr.Markdown(
|
525 |
+
"""This search tool relies on the full-text search API.
|
526 |
+
Your search is passed to this API and the returned models are assessed for metadata quality. See the next tab in the app for more info on how this is calculated.
|
527 |
+
If you don't specify any minimum requirements you will get back your results with metadata quality info
|
528 |
+
for each result. The results are ordered by:
|
529 |
+
|
530 |
+
- Metadata quality i.e. a model with 80% metadata quality will rank higher than one with 75%
|
531 |
+
- Original search order i.e. if two models have the same metadata quality the one that appeared first in the original search will rank higher.
|
532 |
+
|
533 |
+
If there is interest in this app I will expose more options for filtering and sorting results.
|
534 |
+
"""
|
535 |
+
)
|
536 |
with gr.Row():
|
537 |
with gr.Column():
|
538 |
query = gr.Textbox("x-ray", label="Search query")
|
539 |
with gr.Column():
|
540 |
button = gr.Button("Search")
|
541 |
with gr.Row():
|
542 |
+
# literal_search = gr.Checkbox(False, label="Literal_search")
|
543 |
+
# TODO add option for exact matching i.e. phrase matching
|
544 |
+
# gr.Checkbox(False, label="Must have license?")
|
545 |
mim_model_card_length = gr.Number(
|
546 |
None, label="Minimum model card length"
|
547 |
)
|
548 |
min_metadata_score = gr.Slider(0, label="Minimum metadata score")
|
549 |
filter_results = gr.Markdown("Filter results vs original search")
|
550 |
results_markdown = gr.Markdown("Search results")
|
|
|
551 |
button.click(
|
552 |
search_hub,
|
553 |
[query, min_metadata_score, mim_model_card_length],
|