update

Files changed (4) hide show

script/clone_repo.sh +9 -0
script/fetch_data.py +45 -0
script/fetch_data.sh +0 -77
script/virtualenv.sh +0 -19

script/clone_repo.sh ADDED Viewed

	@@ -0,0 +1,9 @@

+HF_TOKEN="your_token_here"
+TARGET_REPO=""
+REPO_NAME=""
+huggingface-cli login --token $HF_TOKEN
+git lfs install
+git clone $TARGET_REPO
+cd $REPO_NAME
+source .venv/bin/activate
+uv pip install -e .

script/fetch_data.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import wget
+import os
+import multiprocessing
+from functools import partial
+import time
+save_dir = "./dataset"
+urls = [
+    "https://huggingface.co/datasets/seungheondoh/cmd-moisesdb-metadata/resolve/main/moisesdb.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-musicnet-metadata/resolve/main/musicnet.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-medleydb-metadata/resolve/main/medleydb.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-fma-metadata/resolve/main/fma.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/0.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/1.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/2.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/3.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/4.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/5.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/6.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/7.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/8.tar.gz",
+    "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/9.tar.gz",
+]
+def download_file(url):
+    wget.download(url)
+def unzip_file(file_path):
+    with tarfile.open(file_path, 'r:gz') as tar:
+        tar.extractall(path=save_dir)
+if __name__ == "__main__":
+    os.makedirs(save_dir, exist_ok=True)
+    # Start timing
+    start_time = time.time()
+    num_processes = min(multiprocessing.cpu_count(), len(urls))
+    with multiprocessing.Pool(processes=num_processes) as pool:
+        pool.map(download_file, urls)
+    with multiprocessing.Pool(processes=num_processes) as pool:
+        pool.map(unzip_file, [f for f in os.listdir(save_dir) if f.endswith('.tar.gz')])
+    # Calculate and display total time
+    end_time = time.time()
+    elapsed = end_time - start_time
+    print(f"\nTotal download time: {int(elapsed // 60)} minutes and {int(elapsed % 60)} seconds")

script/fetch_data.sh DELETED Viewed

@@ -1,77 +0,0 @@
-HF_TOKEN="your_token"
-DATADIR="/workspace/seungheon/dataset"
-mkdir -p $DATADIR
-huggingface-cli login --token $HF_TOKEN
-git lfs install
-# Start timing
-start_time=$(date +%s)
-echo "Cloning FMA dataset..."
-git clone https://huggingface.co/datasets/seungheondoh/cmd-fma-metadata $DATADIR/fma
-echo "Cloning MusicNet dataset..."
-git clone https://huggingface.co/datasets/seungheondoh/cmd-musicnet-metadata $DATADIR/musicnet
-echo "Cloning MedleyDB dataset..."
-git clone https://huggingface.co/datasets/seungheondoh/cmd-medleydb-metadata $DATADIR/medleydb
-echo "Cloning MoisesDB dataset..."
-git clone https://huggingface.co/datasets/seungheondoh/cmd-moisesdb-metadata $DATADIR/moisesdb
-echo "Cloning MTG-Jamendo dataset..."
-git clone https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata $DATADIR/mtg_jamendo
-# Calculate and display total time
-end_time=$(date +%s)
-elapsed=$((end_time - start_time))
-echo "Total download time: $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
-# Extract FMA dataset
-cd $DATADIR/fma
-echo "Extracting FMA dataset..."
-start_time=$(date +%s)
-tar -xzf fma.tar.gz -C audio/
-end_time=$(date +%s)
-elapsed=$((end_time - start_time))
-echo "FMA dataset extraction complete in $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
-# Extract MusicNet dataset
-cd $DATADIR/musicnet
-echo "Extracting MusicNet dataset..."
-start_time=$(date +%s)
-tar -xzf musicnet.tar.gz -C audio/
-end_time=$(date +%s)
-elapsed=$((end_time - start_time))
-echo "MusicNet dataset extraction complete in $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
-# Extract MedleyDB dataset
-cd $DATADIR/medleydb
-echo "Extracting MedleyDB dataset..."
-start_time=$(date +%s)
-tar -xzf medleydb.tar.gz -C audio/
-end_time=$(date +%s)
-elapsed=$((end_time - start_time))
-echo "MedleyDB dataset extraction complete in $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
-# Extract MoisesDB dataset
-cd $DATADIR/moisesdb
-echo "Extracting MoisesDB dataset..."
-start_time=$(date +%s)
-tar -xzf moisesdb.tar.gz -C audio/
-end_time=$(date +%s)
-elapsed=$((end_time - start_time))
-echo "MoisesDB dataset extraction complete in $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
-# Extract MTG-Jamendo dataset
-cd $DATADIR/mtg_jamendo
-echo "Extracting MTG-Jamendo dataset..."
-start_time=$(date +%s)
-for file in *.tar.gz; do
-    echo "Extracting $file..."
-    tar -xzf "$file" -C audio/
-    echo "$file extraction complete"
-done
-end_time=$(date +%s)
-elapsed=$((end_time - start_time))
-echo "MTG-Jamendo dataset extraction complete in $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"

script/virtualenv.sh CHANGED Viewed

@@ -1,9 +1,5 @@
 #!/bin/bash
-# Cloud server setup script for ML environment
-# Installs: Python venv, uv, PyTorch 2.6.0, flash-attn, and Hugging Face CLI
 set -e  # Exit immediately if a command exits with a non-zero status
 # Update system packages
 echo "Updating system packages..."
 sudo apt update && sudo apt upgrade -y
@@ -82,27 +78,12 @@ echo "Installing PyTorch 2.6.0..."
 echo "Installing PyTorch with CUDA support (${PYTORCH_CUDA})..."
 uv pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/${PYTORCH_CUDA}
-# Install flash-attention
-echo "Installing flash-attention..."
-if [ "$CUDA_AVAILABLE" -eq 1 ]; then
-    # Install required dependencies for flash-attention
-    uv pip install packaging ninja
-    uv pip install flash-attn --no-build-isolation
-else
-    echo "Warning: flash-attention requires CUDA. Skipping installation."
-fi
 # Install Hugging Face CLI and tools
 echo "Installing Hugging Face CLI and tools..."
 uv pip install huggingface_hub
 # Verify installation
 echo "Verifying installation..."
 python -c "import torch; print('PyTorch version:', torch.__version__); print('CUDA available:', torch.cuda.is_available())"
-# Try importing flash-attention if CUDA is available
-if [ "$CUDA_AVAILABLE" -eq 1 ]; then
-    python -c "import importlib.util; print('flash-attn available:', importlib.util.find_spec('flash_attn') is not None)"
-fi
 # Check for Hugging Face CLI
 python -c "import huggingface_hub; print('Hugging Face Hub version:', huggingface_hub.__version__)"
 echo "============================================================"

 #!/bin/bash
 set -e  # Exit immediately if a command exits with a non-zero status
 # Update system packages
 echo "Updating system packages..."
 sudo apt update && sudo apt upgrade -y
 echo "Installing PyTorch with CUDA support (${PYTORCH_CUDA})..."
 uv pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/${PYTORCH_CUDA}
 # Install Hugging Face CLI and tools
 echo "Installing Hugging Face CLI and tools..."
 uv pip install huggingface_hub
 # Verify installation
 echo "Verifying installation..."
 python -c "import torch; print('PyTorch version:', torch.__version__); print('CUDA available:', torch.cuda.is_available())"
 # Check for Hugging Face CLI
 python -c "import huggingface_hub; print('Hugging Face Hub version:', huggingface_hub.__version__)"
 echo "============================================================"