seungheondoh
commited on
Commit
·
3988329
1
Parent(s):
9a6e5af
update
Browse files- .gitignore +2 -0
- script/fetch_data.py +14 -27
- script/upload_model.py +2 -2
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
LLM4Music/
|
2 |
+
repo/
|
script/fetch_data.py
CHANGED
@@ -4,41 +4,28 @@ import multiprocessing
|
|
4 |
from functools import partial
|
5 |
import time
|
6 |
|
7 |
-
save_dir = "
|
8 |
-
|
9 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-moisesdb-metadata/resolve/main/moisesdb.tar.gz",
|
10 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-musicnet-metadata/resolve/main/musicnet.tar.gz",
|
11 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-medleydb-metadata/resolve/main/medleydb.tar.gz",
|
12 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-fma-metadata/resolve/main/fma.tar.gz",
|
13 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/0.tar.gz",
|
14 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/1.tar.gz",
|
15 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/2.tar.gz",
|
16 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/3.tar.gz",
|
17 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/4.tar.gz",
|
18 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/5.tar.gz",
|
19 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/6.tar.gz",
|
20 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/7.tar.gz",
|
21 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/8.tar.gz",
|
22 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/9.tar.gz",
|
23 |
-
]
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
30 |
tar.extractall(path=save_dir)
|
31 |
-
|
32 |
if __name__ == "__main__":
|
33 |
os.makedirs(save_dir, exist_ok=True)
|
34 |
# Start timing
|
35 |
start_time = time.time()
|
36 |
num_processes = min(multiprocessing.cpu_count(), len(urls))
|
37 |
with multiprocessing.Pool(processes=num_processes) as pool:
|
38 |
-
pool.map(
|
39 |
-
|
40 |
-
with multiprocessing.Pool(processes=num_processes) as pool:
|
41 |
-
pool.map(unzip_file, [f for f in os.listdir(save_dir) if f.endswith('.tar.gz')])
|
42 |
# Calculate and display total time
|
43 |
end_time = time.time()
|
44 |
elapsed = end_time - start_time
|
|
|
4 |
from functools import partial
|
5 |
import time
|
6 |
|
7 |
+
save_dir = "/workspace/seungheon/dataset"
|
8 |
+
os.makedirs(save_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
urls = []
|
11 |
+
db_config = {"fma": 34, "mtg_jamendo": 134, "medleydb": 100, "moisesdb": 8, "musicnet": 21}
|
12 |
+
for db_name, num_files in db_config.items():
|
13 |
+
for i in range(num_files):
|
14 |
+
urls.append(f"https://huggingface.co/datasets/seungheondoh/cmd-audio-dump/resolve/main/{db_name}{i}.tar.gz")
|
15 |
+
def download_and_unzip(url):
|
16 |
+
# Download file
|
17 |
+
filename = wget.download(url)
|
18 |
+
# Unzip file
|
19 |
+
with tarfile.open(filename, 'r:gz') as tar:
|
20 |
tar.extractall(path=save_dir)
|
21 |
+
|
22 |
if __name__ == "__main__":
|
23 |
os.makedirs(save_dir, exist_ok=True)
|
24 |
# Start timing
|
25 |
start_time = time.time()
|
26 |
num_processes = min(multiprocessing.cpu_count(), len(urls))
|
27 |
with multiprocessing.Pool(processes=num_processes) as pool:
|
28 |
+
pool.map(download_and_unzip, urls)
|
|
|
|
|
|
|
29 |
# Calculate and display total time
|
30 |
end_time = time.time()
|
31 |
elapsed = end_time - start_time
|
script/upload_model.py
CHANGED
@@ -5,8 +5,8 @@ from huggingface_hub import HfApi
|
|
5 |
# Initialize Hugging Face API
|
6 |
api = HfApi()
|
7 |
# Define paths and filenames
|
8 |
-
source_dir = "/workspace/logs/
|
9 |
-
output_filename = "
|
10 |
print(f"Creating tar.gz archive from {source_dir}...")
|
11 |
# Create tar.gz archive
|
12 |
with tarfile.open(output_filename, "w:gz") as tar:
|
|
|
5 |
# Initialize Hugging Face API
|
6 |
api = HfApi()
|
7 |
# Define paths and filenames
|
8 |
+
source_dir = "/workspace/logs/codebooks/kmeans/stable_vae_16384"
|
9 |
+
output_filename = "vq_codebook.tar.gz"
|
10 |
print(f"Creating tar.gz archive from {source_dir}...")
|
11 |
# Create tar.gz archive
|
12 |
with tarfile.open(output_filename, "w:gz") as tar:
|