seungheondoh
commited on
Commit
·
3988329
1
Parent(s):
9a6e5af
update
Browse files- .gitignore +2 -0
- script/fetch_data.py +14 -27
- script/upload_model.py +2 -2
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LLM4Music/
|
| 2 |
+
repo/
|
script/fetch_data.py
CHANGED
|
@@ -4,41 +4,28 @@ import multiprocessing
|
|
| 4 |
from functools import partial
|
| 5 |
import time
|
| 6 |
|
| 7 |
-
save_dir = "
|
| 8 |
-
|
| 9 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-moisesdb-metadata/resolve/main/moisesdb.tar.gz",
|
| 10 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-musicnet-metadata/resolve/main/musicnet.tar.gz",
|
| 11 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-medleydb-metadata/resolve/main/medleydb.tar.gz",
|
| 12 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-fma-metadata/resolve/main/fma.tar.gz",
|
| 13 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/0.tar.gz",
|
| 14 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/1.tar.gz",
|
| 15 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/2.tar.gz",
|
| 16 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/3.tar.gz",
|
| 17 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/4.tar.gz",
|
| 18 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/5.tar.gz",
|
| 19 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/6.tar.gz",
|
| 20 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/7.tar.gz",
|
| 21 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/8.tar.gz",
|
| 22 |
-
"https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/9.tar.gz",
|
| 23 |
-
]
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
tar.extractall(path=save_dir)
|
| 31 |
-
|
| 32 |
if __name__ == "__main__":
|
| 33 |
os.makedirs(save_dir, exist_ok=True)
|
| 34 |
# Start timing
|
| 35 |
start_time = time.time()
|
| 36 |
num_processes = min(multiprocessing.cpu_count(), len(urls))
|
| 37 |
with multiprocessing.Pool(processes=num_processes) as pool:
|
| 38 |
-
pool.map(
|
| 39 |
-
|
| 40 |
-
with multiprocessing.Pool(processes=num_processes) as pool:
|
| 41 |
-
pool.map(unzip_file, [f for f in os.listdir(save_dir) if f.endswith('.tar.gz')])
|
| 42 |
# Calculate and display total time
|
| 43 |
end_time = time.time()
|
| 44 |
elapsed = end_time - start_time
|
|
|
|
| 4 |
from functools import partial
|
| 5 |
import time
|
| 6 |
|
| 7 |
+
save_dir = "/workspace/seungheon/dataset"
|
| 8 |
+
os.makedirs(save_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
urls = []
|
| 11 |
+
db_config = {"fma": 34, "mtg_jamendo": 134, "medleydb": 100, "moisesdb": 8, "musicnet": 21}
|
| 12 |
+
for db_name, num_files in db_config.items():
|
| 13 |
+
for i in range(num_files):
|
| 14 |
+
urls.append(f"https://huggingface.co/datasets/seungheondoh/cmd-audio-dump/resolve/main/{db_name}{i}.tar.gz")
|
| 15 |
+
def download_and_unzip(url):
|
| 16 |
+
# Download file
|
| 17 |
+
filename = wget.download(url)
|
| 18 |
+
# Unzip file
|
| 19 |
+
with tarfile.open(filename, 'r:gz') as tar:
|
| 20 |
tar.extractall(path=save_dir)
|
| 21 |
+
|
| 22 |
if __name__ == "__main__":
|
| 23 |
os.makedirs(save_dir, exist_ok=True)
|
| 24 |
# Start timing
|
| 25 |
start_time = time.time()
|
| 26 |
num_processes = min(multiprocessing.cpu_count(), len(urls))
|
| 27 |
with multiprocessing.Pool(processes=num_processes) as pool:
|
| 28 |
+
pool.map(download_and_unzip, urls)
|
|
|
|
|
|
|
|
|
|
| 29 |
# Calculate and display total time
|
| 30 |
end_time = time.time()
|
| 31 |
elapsed = end_time - start_time
|
script/upload_model.py
CHANGED
|
@@ -5,8 +5,8 @@ from huggingface_hub import HfApi
|
|
| 5 |
# Initialize Hugging Face API
|
| 6 |
api = HfApi()
|
| 7 |
# Define paths and filenames
|
| 8 |
-
source_dir = "/workspace/logs/
|
| 9 |
-
output_filename = "
|
| 10 |
print(f"Creating tar.gz archive from {source_dir}...")
|
| 11 |
# Create tar.gz archive
|
| 12 |
with tarfile.open(output_filename, "w:gz") as tar:
|
|
|
|
| 5 |
# Initialize Hugging Face API
|
| 6 |
api = HfApi()
|
| 7 |
# Define paths and filenames
|
| 8 |
+
source_dir = "/workspace/logs/codebooks/kmeans/stable_vae_16384"
|
| 9 |
+
output_filename = "vq_codebook.tar.gz"
|
| 10 |
print(f"Creating tar.gz archive from {source_dir}...")
|
| 11 |
# Create tar.gz archive
|
| 12 |
with tarfile.open(output_filename, "w:gz") as tar:
|