seungheondoh commited on
Commit
3988329
·
1 Parent(s): 9a6e5af
Files changed (3) hide show
  1. .gitignore +2 -0
  2. script/fetch_data.py +14 -27
  3. script/upload_model.py +2 -2
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ LLM4Music/
2
+ repo/
script/fetch_data.py CHANGED
@@ -4,41 +4,28 @@ import multiprocessing
4
  from functools import partial
5
  import time
6
 
7
- save_dir = "./dataset"
8
- urls = [
9
- "https://huggingface.co/datasets/seungheondoh/cmd-moisesdb-metadata/resolve/main/moisesdb.tar.gz",
10
- "https://huggingface.co/datasets/seungheondoh/cmd-musicnet-metadata/resolve/main/musicnet.tar.gz",
11
- "https://huggingface.co/datasets/seungheondoh/cmd-medleydb-metadata/resolve/main/medleydb.tar.gz",
12
- "https://huggingface.co/datasets/seungheondoh/cmd-fma-metadata/resolve/main/fma.tar.gz",
13
- "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/0.tar.gz",
14
- "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/1.tar.gz",
15
- "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/2.tar.gz",
16
- "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/3.tar.gz",
17
- "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/4.tar.gz",
18
- "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/5.tar.gz",
19
- "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/6.tar.gz",
20
- "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/7.tar.gz",
21
- "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/8.tar.gz",
22
- "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/9.tar.gz",
23
- ]
24
 
25
- def download_file(url):
26
- wget.download(url)
27
-
28
- def unzip_file(file_path):
29
- with tarfile.open(file_path, 'r:gz') as tar:
 
 
 
 
 
30
  tar.extractall(path=save_dir)
31
-
32
  if __name__ == "__main__":
33
  os.makedirs(save_dir, exist_ok=True)
34
  # Start timing
35
  start_time = time.time()
36
  num_processes = min(multiprocessing.cpu_count(), len(urls))
37
  with multiprocessing.Pool(processes=num_processes) as pool:
38
- pool.map(download_file, urls)
39
-
40
- with multiprocessing.Pool(processes=num_processes) as pool:
41
- pool.map(unzip_file, [f for f in os.listdir(save_dir) if f.endswith('.tar.gz')])
42
  # Calculate and display total time
43
  end_time = time.time()
44
  elapsed = end_time - start_time
 
4
  from functools import partial
5
  import time
6
 
7
+ save_dir = "/workspace/seungheon/dataset"
8
+ os.makedirs(save_dir, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ urls = []
11
+ db_config = {"fma": 34, "mtg_jamendo": 134, "medleydb": 100, "moisesdb": 8, "musicnet": 21}
12
+ for db_name, num_files in db_config.items():
13
+ for i in range(num_files):
14
+ urls.append(f"https://huggingface.co/datasets/seungheondoh/cmd-audio-dump/resolve/main/{db_name}{i}.tar.gz")
15
+ def download_and_unzip(url):
16
+ # Download file
17
+ filename = wget.download(url)
18
+ # Unzip file
19
+ with tarfile.open(filename, 'r:gz') as tar:
20
  tar.extractall(path=save_dir)
21
+
22
  if __name__ == "__main__":
23
  os.makedirs(save_dir, exist_ok=True)
24
  # Start timing
25
  start_time = time.time()
26
  num_processes = min(multiprocessing.cpu_count(), len(urls))
27
  with multiprocessing.Pool(processes=num_processes) as pool:
28
+ pool.map(download_and_unzip, urls)
 
 
 
29
  # Calculate and display total time
30
  end_time = time.time()
31
  elapsed = end_time - start_time
script/upload_model.py CHANGED
@@ -5,8 +5,8 @@ from huggingface_hub import HfApi
5
  # Initialize Hugging Face API
6
  api = HfApi()
7
  # Define paths and filenames
8
- source_dir = "/workspace/logs/llm4music_diffusion/full_channel_concat"
9
- output_filename = "full_channel_concat.tar.gz"
10
  print(f"Creating tar.gz archive from {source_dir}...")
11
  # Create tar.gz archive
12
  with tarfile.open(output_filename, "w:gz") as tar:
 
5
  # Initialize Hugging Face API
6
  api = HfApi()
7
  # Define paths and filenames
8
+ source_dir = "/workspace/logs/codebooks/kmeans/stable_vae_16384"
9
+ output_filename = "vq_codebook.tar.gz"
10
  print(f"Creating tar.gz archive from {source_dir}...")
11
  # Create tar.gz archive
12
  with tarfile.open(output_filename, "w:gz") as tar: