seungheondoh commited on
Commit
c4683a3
·
1 Parent(s): 790468b
script/clone_repo.sh ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ HF_TOKEN="your_token_here"
2
+ TARGET_REPO=""
3
+ REPO_NAME=""
4
+ huggingface-cli login --token $HF_TOKEN
5
+ git lfs install
6
+ git clone $TARGET_REPO
7
+ cd $REPO_NAME
8
+ source .venv/bin/activate
9
+ uv pip install -e .
script/fetch_data.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import wget
2
+ import os
3
+ import multiprocessing
4
+ from functools import partial
5
+ import time
6
+
7
+ save_dir = "./dataset"
8
+ urls = [
9
+ "https://huggingface.co/datasets/seungheondoh/cmd-moisesdb-metadata/resolve/main/moisesdb.tar.gz",
10
+ "https://huggingface.co/datasets/seungheondoh/cmd-musicnet-metadata/resolve/main/musicnet.tar.gz",
11
+ "https://huggingface.co/datasets/seungheondoh/cmd-medleydb-metadata/resolve/main/medleydb.tar.gz",
12
+ "https://huggingface.co/datasets/seungheondoh/cmd-fma-metadata/resolve/main/fma.tar.gz",
13
+ "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/0.tar.gz",
14
+ "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/1.tar.gz",
15
+ "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/2.tar.gz",
16
+ "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/3.tar.gz",
17
+ "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/4.tar.gz",
18
+ "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/5.tar.gz",
19
+ "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/6.tar.gz",
20
+ "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/7.tar.gz",
21
+ "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/8.tar.gz",
22
+ "https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata/resolve/main/mtg_jamendo/9.tar.gz",
23
+ ]
24
+
25
+ def download_file(url):
26
+ wget.download(url)
27
+
28
+ def unzip_file(file_path):
29
+ with tarfile.open(file_path, 'r:gz') as tar:
30
+ tar.extractall(path=save_dir)
31
+
32
+ if __name__ == "__main__":
33
+ os.makedirs(save_dir, exist_ok=True)
34
+ # Start timing
35
+ start_time = time.time()
36
+ num_processes = min(multiprocessing.cpu_count(), len(urls))
37
+ with multiprocessing.Pool(processes=num_processes) as pool:
38
+ pool.map(download_file, urls)
39
+
40
+ with multiprocessing.Pool(processes=num_processes) as pool:
41
+ pool.map(unzip_file, [f for f in os.listdir(save_dir) if f.endswith('.tar.gz')])
42
+ # Calculate and display total time
43
+ end_time = time.time()
44
+ elapsed = end_time - start_time
45
+ print(f"\nTotal download time: {int(elapsed // 60)} minutes and {int(elapsed % 60)} seconds")
script/fetch_data.sh DELETED
@@ -1,77 +0,0 @@
1
- HF_TOKEN="your_token"
2
- DATADIR="/workspace/seungheon/dataset"
3
- mkdir -p $DATADIR
4
- huggingface-cli login --token $HF_TOKEN
5
- git lfs install
6
-
7
- # Start timing
8
- start_time=$(date +%s)
9
-
10
- echo "Cloning FMA dataset..."
11
- git clone https://huggingface.co/datasets/seungheondoh/cmd-fma-metadata $DATADIR/fma
12
-
13
- echo "Cloning MusicNet dataset..."
14
- git clone https://huggingface.co/datasets/seungheondoh/cmd-musicnet-metadata $DATADIR/musicnet
15
-
16
- echo "Cloning MedleyDB dataset..."
17
- git clone https://huggingface.co/datasets/seungheondoh/cmd-medleydb-metadata $DATADIR/medleydb
18
-
19
- echo "Cloning MoisesDB dataset..."
20
- git clone https://huggingface.co/datasets/seungheondoh/cmd-moisesdb-metadata $DATADIR/moisesdb
21
-
22
- echo "Cloning MTG-Jamendo dataset..."
23
- git clone https://huggingface.co/datasets/seungheondoh/cmd-mtg_jamendo-metadata $DATADIR/mtg_jamendo
24
-
25
- # Calculate and display total time
26
- end_time=$(date +%s)
27
- elapsed=$((end_time - start_time))
28
- echo "Total download time: $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
29
-
30
- # Extract FMA dataset
31
- cd $DATADIR/fma
32
- echo "Extracting FMA dataset..."
33
- start_time=$(date +%s)
34
- tar -xzf fma.tar.gz -C audio/
35
- end_time=$(date +%s)
36
- elapsed=$((end_time - start_time))
37
- echo "FMA dataset extraction complete in $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
38
-
39
- # Extract MusicNet dataset
40
- cd $DATADIR/musicnet
41
- echo "Extracting MusicNet dataset..."
42
- start_time=$(date +%s)
43
- tar -xzf musicnet.tar.gz -C audio/
44
- end_time=$(date +%s)
45
- elapsed=$((end_time - start_time))
46
- echo "MusicNet dataset extraction complete in $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
47
-
48
- # Extract MedleyDB dataset
49
- cd $DATADIR/medleydb
50
- echo "Extracting MedleyDB dataset..."
51
- start_time=$(date +%s)
52
- tar -xzf medleydb.tar.gz -C audio/
53
- end_time=$(date +%s)
54
- elapsed=$((end_time - start_time))
55
- echo "MedleyDB dataset extraction complete in $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
56
-
57
- # Extract MoisesDB dataset
58
- cd $DATADIR/moisesdb
59
- echo "Extracting MoisesDB dataset..."
60
- start_time=$(date +%s)
61
- tar -xzf moisesdb.tar.gz -C audio/
62
- end_time=$(date +%s)
63
- elapsed=$((end_time - start_time))
64
- echo "MoisesDB dataset extraction complete in $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
65
-
66
- # Extract MTG-Jamendo dataset
67
- cd $DATADIR/mtg_jamendo
68
- echo "Extracting MTG-Jamendo dataset..."
69
- start_time=$(date +%s)
70
- for file in *.tar.gz; do
71
- echo "Extracting $file..."
72
- tar -xzf "$file" -C audio/
73
- echo "$file extraction complete"
74
- done
75
- end_time=$(date +%s)
76
- elapsed=$((end_time - start_time))
77
- echo "MTG-Jamendo dataset extraction complete in $((elapsed / 60)) minutes and $((elapsed % 60)) seconds"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
script/virtualenv.sh CHANGED
@@ -1,9 +1,5 @@
1
  #!/bin/bash
2
- # Cloud server setup script for ML environment
3
- # Installs: Python venv, uv, PyTorch 2.6.0, flash-attn, and Hugging Face CLI
4
-
5
  set -e # Exit immediately if a command exits with a non-zero status
6
-
7
  # Update system packages
8
  echo "Updating system packages..."
9
  sudo apt update && sudo apt upgrade -y
@@ -82,27 +78,12 @@ echo "Installing PyTorch 2.6.0..."
82
  echo "Installing PyTorch with CUDA support (${PYTORCH_CUDA})..."
83
  uv pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/${PYTORCH_CUDA}
84
 
85
- # Install flash-attention
86
- echo "Installing flash-attention..."
87
- if [ "$CUDA_AVAILABLE" -eq 1 ]; then
88
- # Install required dependencies for flash-attention
89
- uv pip install packaging ninja
90
- uv pip install flash-attn --no-build-isolation
91
- else
92
- echo "Warning: flash-attention requires CUDA. Skipping installation."
93
- fi
94
-
95
  # Install Hugging Face CLI and tools
96
  echo "Installing Hugging Face CLI and tools..."
97
  uv pip install huggingface_hub
98
  # Verify installation
99
  echo "Verifying installation..."
100
  python -c "import torch; print('PyTorch version:', torch.__version__); print('CUDA available:', torch.cuda.is_available())"
101
-
102
- # Try importing flash-attention if CUDA is available
103
- if [ "$CUDA_AVAILABLE" -eq 1 ]; then
104
- python -c "import importlib.util; print('flash-attn available:', importlib.util.find_spec('flash_attn') is not None)"
105
- fi
106
  # Check for Hugging Face CLI
107
  python -c "import huggingface_hub; print('Hugging Face Hub version:', huggingface_hub.__version__)"
108
  echo "============================================================"
 
1
  #!/bin/bash
 
 
 
2
  set -e # Exit immediately if a command exits with a non-zero status
 
3
  # Update system packages
4
  echo "Updating system packages..."
5
  sudo apt update && sudo apt upgrade -y
 
78
  echo "Installing PyTorch with CUDA support (${PYTORCH_CUDA})..."
79
  uv pip install torch==2.6.0 --index-url https://download.pytorch.org/whl/${PYTORCH_CUDA}
80
 
 
 
 
 
 
 
 
 
 
 
81
  # Install Hugging Face CLI and tools
82
  echo "Installing Hugging Face CLI and tools..."
83
  uv pip install huggingface_hub
84
  # Verify installation
85
  echo "Verifying installation..."
86
  python -c "import torch; print('PyTorch version:', torch.__version__); print('CUDA available:', torch.cuda.is_available())"
 
 
 
 
 
87
  # Check for Hugging Face CLI
88
  python -c "import huggingface_hub; print('Hugging Face Hub version:', huggingface_hub.__version__)"
89
  echo "============================================================"