davanstrien HF staff commited on
Commit
1771fc5
·
1 Parent(s): 8b71d33

remove diskcache import

Browse files
Files changed (1) hide show
  1. app.py +35 -30
app.py CHANGED
@@ -1,36 +1,27 @@
1
- import os
2
  import copy
 
3
  from dataclasses import asdict, dataclass
 
4
  from functools import lru_cache
5
  from json import JSONDecodeError
6
  from typing import Any, Dict, List, Optional, Union
7
- from huggingface_hub.utils import GatedRepoError
8
- import gradio as gr
9
- from requests.exceptions import HTTPError
10
- import requests
11
- from diskcache import Cache
12
- from huggingface_hub import (
13
- HfApi,
14
- hf_hub_url,
15
- list_repo_commits,
16
- logging,
17
- model_info,
18
- )
19
- from tqdm.auto import tqdm
20
- from tqdm.contrib.concurrent import thread_map
21
  import backoff
22
- from huggingface_hub.utils import EntryNotFoundError, disable_progress_bars
23
  import httpx
24
  import orjson
25
- import httpx
26
- from functools import lru_cache
27
  from httpx import Client
28
- from httpx_caching import CachingClient
29
- from httpx_caching import OneDayCacheHeuristic
30
-
31
- from cachetools import cached, TTLCache
32
- from datetime import timedelta
33
- from datetime import datetime
 
 
 
34
 
35
  cache = TTLCache(maxsize=500_000, ttl=timedelta(hours=24), timer=datetime.now)
36
 
@@ -414,6 +405,7 @@ def filter_search_results(
414
  min_score=None,
415
  min_model_card_length=None,
416
  ): # TODO make code more intuitive
 
417
  results = thread_map(parse_single_result, results)
418
  for i, parsed_result in tqdm(enumerate(results)):
419
  # parsed_result = parse_single_result(result)
@@ -441,12 +433,12 @@ def filter_search_results(
441
 
442
  def sort_search_results(
443
  filtered_search_results,
444
- first_sort="metadata_score",
445
- second_sort="original_position", # TODO expose these in results
446
  ):
447
  return sorted(
448
  list(filtered_search_results),
449
- key=lambda x: (x[first_sort], x[second_sort]),
450
  reverse=True,
451
  )
452
 
@@ -527,22 +519,35 @@ def search_hub(query: str, min_score=None, min_model_card_length=None):
527
 
528
 
529
  with gr.Blocks() as demo:
530
- with gr.Tab("Hub search with metadata quality filter"):
531
  gr.Markdown("# 🤗 Hub model search with metadata quality filters")
 
 
 
 
 
 
 
 
 
 
 
 
532
  with gr.Row():
533
  with gr.Column():
534
  query = gr.Textbox("x-ray", label="Search query")
535
  with gr.Column():
536
  button = gr.Button("Search")
537
  with gr.Row():
538
- # gr.Checkbox(False, label="Must have licence?")
 
 
539
  mim_model_card_length = gr.Number(
540
  None, label="Minimum model card length"
541
  )
542
  min_metadata_score = gr.Slider(0, label="Minimum metadata score")
543
  filter_results = gr.Markdown("Filter results vs original search")
544
  results_markdown = gr.Markdown("Search results")
545
-
546
  button.click(
547
  search_hub,
548
  [query, min_metadata_score, mim_model_card_length],
 
 
1
  import copy
2
+ import os
3
  from dataclasses import asdict, dataclass
4
+ from datetime import datetime, timedelta
5
  from functools import lru_cache
6
  from json import JSONDecodeError
7
  from typing import Any, Dict, List, Optional, Union
8
+
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  import backoff
10
+ import gradio as gr
11
  import httpx
12
  import orjson
13
+ import requests
14
+ from cachetools import TTLCache, cached
15
  from httpx import Client
16
+ from httpx_caching import CachingClient, OneDayCacheHeuristic
17
+ # from diskcache import Cache
18
+ from huggingface_hub import (HfApi, hf_hub_url, list_repo_commits, logging,
19
+ model_info)
20
+ from huggingface_hub.utils import (EntryNotFoundError, GatedRepoError,
21
+ disable_progress_bars)
22
+ from requests.exceptions import HTTPError
23
+ from tqdm.auto import tqdm
24
+ from tqdm.contrib.concurrent import thread_map
25
 
26
  cache = TTLCache(maxsize=500_000, ttl=timedelta(hours=24), timer=datetime.now)
27
 
 
405
  min_score=None,
406
  min_model_card_length=None,
407
  ): # TODO make code more intuitive
408
+ # TODO setup filters as separate functions and chain results
409
  results = thread_map(parse_single_result, results)
410
  for i, parsed_result in tqdm(enumerate(results)):
411
  # parsed_result = parse_single_result(result)
 
433
 
434
  def sort_search_results(
435
  filtered_search_results,
436
+ first_sort_key="metadata_score",
437
+ second_sort_key="original_position", # TODO expose these in results
438
  ):
439
  return sorted(
440
  list(filtered_search_results),
441
+ key=lambda x: (x[first_sort_key], x[second_sort_key]),
442
  reverse=True,
443
  )
444
 
 
519
 
520
 
521
  with gr.Blocks() as demo:
522
+ with gr.Tab("Hub Search with metadata quality filter"):
523
  gr.Markdown("# 🤗 Hub model search with metadata quality filters")
524
+ gr.Markdown(
525
+ """This search tool relies on the full-text search API.
526
+ Your search is passed to this API and the returned models are assessed for metadata quality. See the next tab in the app for more info on how this is calculated.
527
+ If you don't specify any minimum requirements you will get back your results with metadata quality info
528
+ for each result. The results are ordered by:
529
+
530
+ - Metadata quality i.e. a model with 80% metadata quality will rank higher than one with 75%
531
+ - Original search order i.e. if two models have the same metadata quality the one that appeared first in the original search will rank higher.
532
+
533
+ If there is interest in this app I will expose more options for filtering and sorting results.
534
+ """
535
+ )
536
  with gr.Row():
537
  with gr.Column():
538
  query = gr.Textbox("x-ray", label="Search query")
539
  with gr.Column():
540
  button = gr.Button("Search")
541
  with gr.Row():
542
+ # literal_search = gr.Checkbox(False, label="Literal_search")
543
+ # TODO add option for exact matching i.e. phrase matching
544
+ # gr.Checkbox(False, label="Must have license?")
545
  mim_model_card_length = gr.Number(
546
  None, label="Minimum model card length"
547
  )
548
  min_metadata_score = gr.Slider(0, label="Minimum metadata score")
549
  filter_results = gr.Markdown("Filter results vs original search")
550
  results_markdown = gr.Markdown("Search results")
 
551
  button.click(
552
  search_hub,
553
  [query, min_metadata_score, mim_model_card_length],