MoizK commited on
Commit
d67650c
·
verified ·
1 Parent(s): 67898d2

Update download_assets.py

Browse files
Files changed (1) hide show
  1. download_assets.py +14 -15
download_assets.py CHANGED
@@ -3,16 +3,15 @@ import os
3
 
4
  def download_assets():
5
  """Download necessary assets from Hugging Face Hub."""
6
- # ensure local dirs exist
7
  os.makedirs("data", exist_ok=True)
8
  os.makedirs("vectorstore/db_faiss", exist_ok=True)
9
 
10
  repo_id = "MoizK/mindmedic-assets"
11
  repo_type = "dataset"
12
- # allow either env var name
13
  token = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HUGGINGFACE_API_TOKEN")
14
 
15
- # list of PDF filenames at the repo root
16
  pdf_files = [
17
  "71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
18
  "Depression-NIM-2024.pdf",
@@ -28,29 +27,29 @@ def download_assets():
28
  path = hf_hub_download(
29
  repo_id=repo_id,
30
  repo_type=repo_type,
31
- filename=fname,
32
- local_dir="data",
33
- token=token,
34
  )
35
- print(f"✅ Downloaded {fname} → {path}")
36
  except Exception as e:
37
- print(f"⚠️ Failed to download {fname}: {e}")
38
 
39
- # Now fetch the FAISS index files from the nested path
40
  index_files = ["index.faiss", "index.pkl"]
41
  for fname in index_files:
42
- remote_path = f"vectorstore/db_faiss/{fname}"
43
  try:
44
  path = hf_hub_download(
45
  repo_id=repo_id,
46
  repo_type=repo_type,
47
- filename=remote_path, # this is where they actually live in the repo
48
- local_dir=".", # so that ./vectorstore/db_faiss/... is created
49
- token=token,
 
50
  )
51
- print(f"✅ Downloaded {remote_path} → {path}")
52
  except Exception as e:
53
- print(f"⚠️ Failed to download {remote_path}: {e}")
54
 
55
  if __name__ == "__main__":
56
  download_assets()
 
3
 
4
  def download_assets():
5
  """Download necessary assets from Hugging Face Hub."""
6
+ # Prepare your local folders
7
  os.makedirs("data", exist_ok=True)
8
  os.makedirs("vectorstore/db_faiss", exist_ok=True)
9
 
10
  repo_id = "MoizK/mindmedic-assets"
11
  repo_type = "dataset"
 
12
  token = os.getenv("HUGGINGFACE_HUB_TOKEN") or os.getenv("HUGGINGFACE_API_TOKEN")
13
 
14
+ # -- PDFs (these live at the repo root) --
15
  pdf_files = [
16
  "71763-gale-encyclopedia-of-medicine.-vol.-1.-2nd-ed.pdf",
17
  "Depression-NIM-2024.pdf",
 
27
  path = hf_hub_download(
28
  repo_id=repo_id,
29
  repo_type=repo_type,
30
+ filename=fname, # root-level file
31
+ local_dir="data", # save under ./data/
32
+ use_auth_token=token,
33
  )
34
+ print(f"✅ Downloaded PDF {fname} → {path}")
35
  except Exception as e:
36
+ print(f"⚠️ Failed to download PDF {fname}: {e}")
37
 
38
+ # -- FAISS index files (nested under vectorstore/db_faiss in the repo) --
39
  index_files = ["index.faiss", "index.pkl"]
40
  for fname in index_files:
 
41
  try:
42
  path = hf_hub_download(
43
  repo_id=repo_id,
44
  repo_type=repo_type,
45
+ subfolder="vectorstore/db_faiss", # look in this folder in the repo
46
+ filename=fname, # just the filename here
47
+ local_dir="vectorstore/db_faiss", # save under ./vectorstore/db_faiss/
48
+ use_auth_token=token,
49
  )
50
+ print(f"✅ Downloaded index file {fname} → {path}")
51
  except Exception as e:
52
+ print(f"⚠️ Failed to download index file {fname}: {e}")
53
 
54
  if __name__ == "__main__":
55
  download_assets()