Spaces:

librarian-bots
/

MetaRefine

Runtime error

App Files Files Community

davanstrien HF Staff commited on Jul 3, 2023

Commit

cd41390

1 Parent(s): 5cc903d

async version with more cachine

Browse files

Files changed (2) hide show

app.py +21 -9
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -3,23 +3,27 @@ import copy
 import os
 from dataclasses import asdict, dataclass
 from datetime import datetime, timedelta
 from json import JSONDecodeError
 from typing import Any, Dict, List, Optional, Union
 import gradio as gr
 import httpx
 import orjson
 from cashews import NOT_NONE, cache
-from httpx import AsyncClient
 from huggingface_hub import hf_hub_url, logging
 from huggingface_hub.utils import disable_progress_bars
 from rich import print
 from tqdm.auto import tqdm
-from httpx import Client
-from datetime import datetime, timedelta
-cache.setup(
-    "mem://"
-)
 disable_progress_bars()
@@ -58,6 +62,7 @@ async def _try_load_model_card(hub_id, client=None):
         length = None
     return card_text, length
 def _try_parse_card_data(hub_json_data):
     data = {}
     keys = ["license", "language", "datasets"]
@@ -72,7 +77,7 @@ def _try_parse_card_data(hub_json_data):
     return data
-@dataclass
 class ModelMetadata:
     hub_id: str
     tags: Optional[List[str]]
@@ -89,7 +94,7 @@ class ModelMetadata:
     created_at: Optional[datetime] = None
     @classmethod
-    @cache(ttl=timedelta(hours=3), condition=NOT_NONE)
     async def from_hub(cls, hub_id, client=None):
         try:
             if not client:
@@ -224,6 +229,7 @@ ALL_PIPELINES = {
 }
 def generate_task_scores_dict():
     task_scores = {}
     for task in ALL_PIPELINES:
@@ -262,6 +268,7 @@ def generate_task_scores_dict():
     return task_scores
 def generate_common_scores():
     GENERIC_SCORES = copy.deepcopy(COMMON_SCORES)
     GENERIC_SCORES["_max_score"] = sum(
@@ -274,6 +281,7 @@ SCORES = generate_task_scores_dict()
 GENERIC_SCORES = generate_common_scores()
 def _basic_check(data: Optional[ModelMetadata]):
     score = 0
     if data is None:
@@ -334,7 +342,7 @@ def create_query_url(query, skip=0):
     return f"https://huggingface.co/api/search/full-text?q={query}&limit=100&skip={skip}&type=model"
-def get_results(query,sync_client=None) -> Dict[Any, Any]:
     if not sync_client:
         sync_client = Client(http2=True, headers=headers)
     url = create_query_url(query)
@@ -461,6 +469,7 @@ def create_markdown(results):  # TODO move to separate file
         rows.append(row)
     return "\n".join(rows)
 async def get_result_card_snippet(result, query=None, client=None):
     if not client:
         client = AsyncClient(http2=True, headers=headers)
@@ -472,6 +481,7 @@ async def get_result_card_snippet(result, query=None, client=None):
         result["text"] = "Could not load model card"
     return result
 @cache(ttl=timedelta(hours=3), condition=NOT_NONE)
 async def get_result_card_snippets(results, query=None, client=None):
     if not client:
@@ -483,8 +493,10 @@ async def get_result_card_snippets(results, query=None, client=None):
     results = await asyncio.gather(*result_snippets)
     return results
 sync_client = Client(http2=True, headers=headers)
 def _search_hub(
     query: str,
     min_score: Optional[int] = None,

 import os
 from dataclasses import asdict, dataclass
 from datetime import datetime, timedelta
+from functools import lru_cache
 from json import JSONDecodeError
 from typing import Any, Dict, List, Optional, Union
 import gradio as gr
 import httpx
 import orjson
+from cachetools import TTLCache, cached
 from cashews import NOT_NONE, cache
+from httpx import AsyncClient, Client
 from huggingface_hub import hf_hub_url, logging
 from huggingface_hub.utils import disable_progress_bars
 from rich import print
 from tqdm.auto import tqdm
+CACHE_EXPIRY_TIME = timedelta(hours=3)
+sync_cache = TTLCache(maxsize=200_000, ttl=CACHE_EXPIRY_TIME, timer=datetime.now)
+cache.setup("mem://")
 disable_progress_bars()
         length = None
     return card_text, length
 def _try_parse_card_data(hub_json_data):
     data = {}
     keys = ["license", "language", "datasets"]
     return data
+@dataclass(eq=False)
 class ModelMetadata:
     hub_id: str
     tags: Optional[List[str]]
     created_at: Optional[datetime] = None
     @classmethod
+    @cache(ttl=CACHE_EXPIRY_TIME, condition=NOT_NONE)
     async def from_hub(cls, hub_id, client=None):
         try:
             if not client:
 }
+@lru_cache()
 def generate_task_scores_dict():
     task_scores = {}
     for task in ALL_PIPELINES:
     return task_scores
+@lru_cache()
 def generate_common_scores():
     GENERIC_SCORES = copy.deepcopy(COMMON_SCORES)
     GENERIC_SCORES["_max_score"] = sum(
 GENERIC_SCORES = generate_common_scores()
+@cached(sync_cache)
 def _basic_check(data: Optional[ModelMetadata]):
     score = 0
     if data is None:
     return f"https://huggingface.co/api/search/full-text?q={query}&limit=100&skip={skip}&type=model"
+def get_results(query, sync_client=None) -> Dict[Any, Any]:
     if not sync_client:
         sync_client = Client(http2=True, headers=headers)
     url = create_query_url(query)
         rows.append(row)
     return "\n".join(rows)
 async def get_result_card_snippet(result, query=None, client=None):
     if not client:
         client = AsyncClient(http2=True, headers=headers)
         result["text"] = "Could not load model card"
     return result
 @cache(ttl=timedelta(hours=3), condition=NOT_NONE)
 async def get_result_card_snippets(results, query=None, client=None):
     if not client:
     results = await asyncio.gather(*result_snippets)
     return results
 sync_client = Client(http2=True, headers=headers)
 def _search_hub(
     query: str,
     min_score: Optional[int] = None,

requirements.txt CHANGED Viewed

@@ -30,6 +30,8 @@ attrs==23.1.0
     #   jsonschema
 backcall==0.2.0
     # via ipython
 cashews==6.2.0
     # via -r requirements.in
 certifi==2023.5.7

     #   jsonschema
 backcall==0.2.0
     # via ipython
+cachetools==5.3.1
+    # via -r requirements.in
 cashews==6.2.0
     # via -r requirements.in
 certifi==2023.5.7