GraceYi commited on
Commit
4c1eec7
·
verified ·
1 Parent(s): fe8d011

Upload 80 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +24 -0
  2. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/JEN1-Composer.jpg +0 -0
  3. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/Jen1-Composer-2.png +0 -0
  4. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/README.md +99 -0
  5. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/__pycache__/trainer.cpython-311.pyc +0 -0
  6. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio1.mp3 +0 -0
  7. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio2.mp3 +0 -0
  8. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio3.mp3 +0 -0
  9. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio4.mp3 +0 -0
  10. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio5.mp3 +0 -0
  11. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio6.mp3 +0 -0
  12. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/bass.wav +3 -0
  13. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/drums.wav +3 -0
  14. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/other.wav +3 -0
  15. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/vocals.wav +3 -0
  16. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/bass.wav +3 -0
  17. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/drums.wav +3 -0
  18. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/other.wav +3 -0
  19. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/vocals.wav +3 -0
  20. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/bass.wav +3 -0
  21. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/drums.wav +3 -0
  22. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/other.wav +3 -0
  23. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/vocals.wav +3 -0
  24. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/bass.wav +3 -0
  25. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/drums.wav +3 -0
  26. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/other.wav +3 -0
  27. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/vocals.wav +3 -0
  28. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/bass.wav +3 -0
  29. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/drums.wav +3 -0
  30. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/other.wav +3 -0
  31. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/vocals.wav +3 -0
  32. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/bass.wav +3 -0
  33. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/drums.wav +3 -0
  34. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/other.wav +3 -0
  35. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/vocals.wav +3 -0
  36. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio1.json +1 -0
  37. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio2.json +1 -0
  38. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio3.json +1 -0
  39. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio4.json +1 -0
  40. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio5.json +1 -0
  41. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio6.json +1 -0
  42. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/dataset/__pycache__/audio_processor.cpython-38.pyc +0 -0
  43. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/dataset/__pycache__/dataloader.cpython-311.pyc +0 -0
  44. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/dataset/__pycache__/dataloader.cpython-38.pyc +0 -0
  45. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/dataset/dataloader.py +191 -0
  46. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/dataset/demix.py +51 -0
  47. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/generation.py +212 -0
  48. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/jen1/__pycache__/conditioners.cpython-311.pyc +0 -0
  49. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/jen1/__pycache__/conditioners.cpython-38.pyc +0 -0
  50. JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/jen1/__pycache__/noise_schedule.cpython-38.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/bass.wav filter=lfs diff=lfs merge=lfs -text
37
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/drums.wav filter=lfs diff=lfs merge=lfs -text
38
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/other.wav filter=lfs diff=lfs merge=lfs -text
39
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/vocals.wav filter=lfs diff=lfs merge=lfs -text
40
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/bass.wav filter=lfs diff=lfs merge=lfs -text
41
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/drums.wav filter=lfs diff=lfs merge=lfs -text
42
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/other.wav filter=lfs diff=lfs merge=lfs -text
43
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/vocals.wav filter=lfs diff=lfs merge=lfs -text
44
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/bass.wav filter=lfs diff=lfs merge=lfs -text
45
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/drums.wav filter=lfs diff=lfs merge=lfs -text
46
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/other.wav filter=lfs diff=lfs merge=lfs -text
47
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/vocals.wav filter=lfs diff=lfs merge=lfs -text
48
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/bass.wav filter=lfs diff=lfs merge=lfs -text
49
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/drums.wav filter=lfs diff=lfs merge=lfs -text
50
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/other.wav filter=lfs diff=lfs merge=lfs -text
51
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/vocals.wav filter=lfs diff=lfs merge=lfs -text
52
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/bass.wav filter=lfs diff=lfs merge=lfs -text
53
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/drums.wav filter=lfs diff=lfs merge=lfs -text
54
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/other.wav filter=lfs diff=lfs merge=lfs -text
55
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/vocals.wav filter=lfs diff=lfs merge=lfs -text
56
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/bass.wav filter=lfs diff=lfs merge=lfs -text
57
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/drums.wav filter=lfs diff=lfs merge=lfs -text
58
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/other.wav filter=lfs diff=lfs merge=lfs -text
59
+ JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/vocals.wav filter=lfs diff=lfs merge=lfs -text
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/JEN1-Composer.jpg ADDED
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/Jen1-Composer-2.png ADDED
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/README.md ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # JEN-1-COMPOSER-pytorch(WIP)
2
+ Unofficial implementation JEN-1 Composer: A Unified Framework for High-Fidelity Multi-Track Music Generation(https://arxiv.org/abs/2310.19180)
3
+
4
+ ![JEN-1](https://github.com/0417keito/JEN-1-COMPOSER-pytorch/blob/main/JEN1-Composer.jpg)
5
+ ![JEN-1-fig2](https://github.com/0417keito/JEN-1-COMPOSER-pytorch/blob/main/Jen1-Composer-2.png)
6
+
7
+ ## README
8
+
9
+ ## 📖 Quick Index
10
+ * [💻 Installation](#-installation)
11
+ * [🐍Usage](#-method)
12
+ * [🧠TODO](#-todo)
13
+ * [🚀Demo](#-demo)
14
+ * [🙏Appreciation](#-appreciation)
15
+ * [⭐️Show Your Support](#-show_your_support)
16
+ * [🙆Welcome Contributions](#-welcom_contributions)
17
+
18
+ ## 💻 Installation
19
+ ```commandline
20
+ git clone https://github.com/0417keito/JEN-1-pytorch.git
21
+ cd JEN-1-pytorch
22
+ pip install -r requirements.txt
23
+ ```
24
+
25
+ ## 🐍Usage
26
+ ### Sampling
27
+ ```python
28
+ import torch
29
+ from generation import Jen1
30
+
31
+ ckpt_path = 'your ckpt path'
32
+ jen1 = Jen1(ckpt_path)
33
+
34
+ prompt = 'a beautiful song'
35
+ samples = jen1.generate(prompt)
36
+ ```
37
+
38
+ ### Training
39
+ ```commandline
40
+ torchrun train.py
41
+ ```
42
+
43
+ ### Dataset format
44
+ Json format. the name of the Json file must be the same as the target music file.
45
+ ```json
46
+ {"prompt": "a beautiful song"}
47
+ ```
48
+ ```python
49
+ How should the data_dir be created?
50
+
51
+ '''
52
+ dataset_dir
53
+ ├── audios
54
+ | ├── music1.wav
55
+ | ├── music2.wav
56
+ | .......
57
+ | ├── music{n}.wav
58
+ |
59
+ ├── metadata
60
+ | ├── music1.json
61
+ | ├── music2.json
62
+ | ......
63
+ | ├── music{n}.json
64
+ |
65
+ '''
66
+ ```
67
+
68
+ ### About config
69
+ please see [config.py](https://github.com/0417keito/JEN-1-pytorch/blob/main/utils/config.py) and [conditioner_config.py](https://github.com/0417keito/JEN-1-pytorch/blob/main/utils/conditioner_config.py)
70
+
71
+ ## 🧠TODO
72
+ - [ ] Extension to [JEN-1-Composer](https://arxiv.org/abs/2310.19180)
73
+ - [ ] Extension to music generation with singing voice
74
+ - [ ] Adaptation of Consistency Model
75
+ - [ ] In the paper, Diffusion Autoencoder was used, but I did not have much computing resources, so I used Encodec instead. So, if I can afford it, I will implement Diffusion Autoencoder.
76
+
77
+ ## 🚀Demo
78
+ coming soon !
79
+
80
+ ## 🙏Appreciation
81
+ [Dr Adam Fils](https://github.com/adamfils) - for support and brought this to my attention.
82
+
83
+ ## ⭐️Show Your Support
84
+
85
+ If you find this repo interesting and useful, give us a ⭐️ on GitHub! It encourages us to keep improving the model and adding exciting features.
86
+ Please inform us of any deficiencies by issue.
87
+
88
+ ## 🙆Welcome Contributions
89
+ Contributions are always welcome.
90
+
91
+ ## Citations
92
+ ```bibtex
93
+ @misc{2310.19180,
94
+ Author = {Yao Yao and Peike Li and Boyu Chen and Alex Wang},
95
+ Title = {JEN-1 Composer: A Unified Framework for High-Fidelity Multi-Track Music Generation},
96
+ Year = {2023},
97
+ Eprint = {arXiv:2310.19180},
98
+ }
99
+ ```
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/__pycache__/trainer.cpython-311.pyc ADDED
Binary file (25.2 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio1.mp3 ADDED
Binary file (481 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio2.mp3 ADDED
Binary file (481 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio3.mp3 ADDED
Binary file (481 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio4.mp3 ADDED
Binary file (481 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio5.mp3 ADDED
Binary file (481 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/audio6.mp3 ADDED
Binary file (481 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/bass.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f6bc7ca834b5899a1f35e0294213ab0b74a305c718639af75b5d5f079cd7ca7
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/drums.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0a30f4be76598d557f5eb3e4c616f104d8b6624484de64ef040acf790d2380
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/other.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39447a94082b91122e123e93edaa7fae768e796d6092eb1471f67e38a30d5253
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio1/vocals.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3c51fb6ffd95e13cc866c499037f84a972985decb9cdd93c184fa27cd464458
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/bass.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8fb9f3d14c2f7a9c6792fe686fafc9a9be9e8169ab5efd481bd274156af2f18
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/drums.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad842cce67224f86c8bd3aa8e6cc673467b3f02dc39883be66a662b3c34363d
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/other.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4d1bee50855e4c06252e665300a9ff02b1aa1c66822bb60da0119ec950b0299
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio2/vocals.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca8c87f0b2569e52d499601463b4f0f394bad3e0c8002acca3e0aad25bc356f8
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/bass.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bf1d2592748b07e4a8553aae79eacb25b1d19c50f2ec7b658c9a224138e18d8
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/drums.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df8950751972beb6fc0f628a6c53e230f0eb785e68d9b3f8c8ea62895aaa40c5
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/other.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c003b116a0f3e56addfb7de9b3fe5e60c0a70e8d0d1f4af28b7e4bebd0639c2e
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio3/vocals.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9c62024fc680006609ee8c01c0bf8092287b7f8d435a979cf200eeb3de3b10f
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/bass.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93dee392485d0c3d3f9ca3cd312dae648a5840135cf272df631a0c815fbd0449
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/drums.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed3e0f48e874df199d8f95392504dde16d727da2396bb3427704bd7ec105c5a1
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/other.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1660aa65126295133d2fe1cc1ba12643c2679e13ecfa864e5e2b308c10a3812d
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio4/vocals.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e986bb445f169d4391a56fcebc5f55e8c39bcff752f61ccbf477161b303a1f69
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/bass.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b99f55edf4947722b0e0ed6b76099827f21eae020edac341d20f798758b615c5
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/drums.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eefadf55c12881632f35cf171e522117cc7e19c18683ece2887d2f9d5f79cb3a
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/other.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1da07f1d5c02b66d7d9091cce8bd8e5ddbc3aea1bd1f3bcb1eea8ff36abbd60
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio5/vocals.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b1411e7c0e128bc457873e011de7896f191f0bdee89c789e73a3d8c67df75f6
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/bass.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dcafd9ecc1a257897feacd719ce27359c48fd0115d6593d1b747ec2a32613c1
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/drums.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cf62a180ea5d3939eec617a6dfa71ec8cc4a8cfe9fe90d282faf2aa2b698113
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/other.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bca9d5f51d1118a5c48228fd13825479e386bd3eedb929cdc3b5f71bb5db5f25
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/audios/htdemucs/audio6/vocals.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cc00cd8b79e898899f6d24a4216c7ccae407760bd2750746f652e07df8c2233
3
+ size 5292044
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"prompt": "a beautiful song"}
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio2.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"prompt": "a beautiful song"}
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio3.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"prompt": "a beautiful song"}
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio4.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"prompt": "a beautiful song"}
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio5.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"prompt": "a beautiful song"}
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/data/metadata/audio6.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"prompt": "a beautiful song"}
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/dataset/__pycache__/audio_processor.cpython-38.pyc ADDED
Binary file (3.84 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/dataset/__pycache__/dataloader.cpython-311.pyc ADDED
Binary file (14.8 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/dataset/__pycache__/dataloader.cpython-38.pyc ADDED
Binary file (6.55 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/dataset/dataloader.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import math
4
+ import random
5
+ import json
6
+ import torch
7
+ import torchaudio
8
+ from torch.utils.data import Dataset, DataLoader, random_split
9
+
10
+ from encodec import EncodecModel
11
+ from encodec.utils import convert_audio
12
+
13
+
14
+ class MusicDataset(Dataset):
15
+ def __init__(self, dataset_dir, sr, channels, min_duration, max_duration,
16
+ sample_duration, aug_shift, device, composer=False):
17
+ super().__init__()
18
+ self.dataset_dir = dataset_dir
19
+ self.sr = sr
20
+ self.channels = channels
21
+ self.min_duration = min_duration
22
+ self.max_duration = max_duration
23
+ self.sample_duration = sample_duration
24
+ self.aug_shift = aug_shift
25
+ self.device = device
26
+ self.model = EncodecModel.encodec_model_48khz().to(device=self.device)
27
+ self.audio_files_dir = f'{dataset_dir}/audios'
28
+ self.metadatas_dir = f'{dataset_dir}/metadata'
29
+ self.composer = composer
30
+ if composer:
31
+ self.demix_dir = f'{self.audio_files_dir}/htdemucs'
32
+ self.init_dataset()
33
+
34
+ def get_duration_sec(self, file):
35
+ wav, sr = torchaudio.load(file)
36
+ duration_sec = wav.shape[-1] / sr
37
+ return duration_sec
38
+
39
+ def filter(self, audio_files, durations):
40
+ keep = []
41
+ self.audio_files = []
42
+ for i in range(len(audio_files)):
43
+ filepath = audio_files[i]
44
+ if durations[i] / self.sr < self.min_duration:
45
+ continue
46
+ if durations[i] / self.sr >= self.max_duration:
47
+ continue
48
+ keep.append(i)
49
+ self.audio_files.append(filepath)
50
+ self.durations = [durations[i] for i in keep] # in (s)
51
+ duration_tensor = torch.tensor(self.durations)
52
+ self.cumsum = torch.cumsum(duration_tensor, dim=0) # in (s)
53
+
54
+ def init_dataset(self):
55
+ audio_files = os.listdir(self.audio_files_dir)
56
+ audio_files = [f'{self.audio_files_dir}/{file}' for file in audio_files if file.endswith('.wav') or file.endswith('.mp3')]
57
+ durations = [self.get_duration_sec(file) for file in audio_files]
58
+ self.filter(audio_files=audio_files, durations=durations)
59
+
60
+ def get_index_offset(self, item):
61
+ half_interval = self.sample_duration // 2
62
+ shift = random.randint(-half_interval, half_interval) if self.aug_shift else 0
63
+ offset = item * self.sample_duration + shift
64
+ midpoint = offset + half_interval
65
+ assert 0 <= midpoint < self.cumsum[-1], f'Midpoint {midpoint} of item beyond total length {self.cumsum[-1]}'
66
+ index = torch.searchsorted(self.cumsum, midpoint)
67
+ start, end = self.cumsum[index-1] if index > 0 else 0.0, self.cumsum[index]
68
+ assert start <= midpoint <= end, f'Midpoint {midpoint} not inside interval [{start}, {end}] for index {index}'
69
+ if offset > end - self.sample_duration:
70
+ offset = max(start, offset - half_interval)
71
+ elif offset < start:
72
+ offset = min(end - self.sample_duration, offset + half_interval)
73
+ assert start <= offset <= end - self.sample_duration, f'Offset {offset} not in [{start}, {end} for index {index}]'
74
+ offset = offset - start
75
+ return index, offset
76
+
77
+ def get_song_chunk(self, index, offset):
78
+ audio_file_path = self.audio_files[index]
79
+ song_name = os.path.splitext(os.path.basename(audio_file_path))[0]
80
+ wav, sr = torchaudio.load(audio_file_path)
81
+
82
+ start_sample = int(offset * sr)
83
+ end_sample = start_sample + int(self.sample_duration * sr)
84
+ chunk = wav[:, start_sample:end_sample]
85
+ if self.composer:
86
+ demix_chunks = {}
87
+ demix_file_dict = {'bass': f'{self.demix_dir}/{song_name}/bass.wav',
88
+ 'drums': f'{self.demix_dir}/{song_name}/drums.wav',
89
+ 'other': f'{self.demix_dir}/{song_name}/other.wav'}
90
+ for key, value in demix_file_dict.items():
91
+ demix_chunk, demix_sr = torchaudio.load(value)
92
+ start_sample = int(offset * demix_sr)
93
+ end_sample = start_sample + int(self.sample_duration * demix_sr)
94
+ demix_chunks[key] = {'demix_chunk': demix_chunk[:, start_sample:end_sample],
95
+ 'demix_sr': demix_sr}
96
+
97
+ return chunk, sr, demix_chunks
98
+ #chunk = chunk.unsqueeze(0)
99
+
100
+ return chunk, sr
101
+
102
+ def __len__(self):
103
+ return len(self.durations)
104
+
105
+ def __getitem__(self, item):
106
+ index, offset = self.get_index_offset(item)
107
+ if self.composer:
108
+ chunk, sr, demix_chunks = self.get_song_chunk(item, offset)
109
+ else:
110
+ chunk, sr = self.get_song_chunk(item, offset)
111
+ song_name = os.path.splitext(os.path.basename(self.audio_files[index]))[0]
112
+ if os.path.exists(f'{self.metadatas_dir}/{song_name}.json'):
113
+ with open(f'{self.metadatas_dir}/{song_name}.json', 'r') as file:
114
+ metadata = json.load(file)
115
+ chunk = convert_audio(chunk, sr, self.model.sample_rate, self.model.channels)
116
+ chunk = chunk.unsqueeze(0).to(device=self.device)
117
+ with torch.no_grad():
118
+ encoded_frames = self.model.encode(chunk)
119
+ chunk = chunk.mean(0, keepdim=True)
120
+ codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1)
121
+ codes = codes.transpose(0, 1)
122
+ emb = self.model.quantizer.decode(codes)
123
+ emb = emb.to(self.device)
124
+
125
+ demix_embs = None
126
+ if self.composer:
127
+ demix_embs = {}
128
+ for key, value in demix_chunks.items():
129
+ demix_chunk = value['demix_chunk']
130
+ demix_sr = value['demix_sr']
131
+ demix_chunk = convert_audio(demix_chunk, demix_sr, self.model.sample_rate, self.model.channels)
132
+ demix_chunk = demix_chunk.unsqueeze(0).to(device=self.device)
133
+ with torch.no_grad():
134
+ demix_encoded_frames = self.model.encode(demix_chunk)
135
+ demix_chunk = demix_chunk.mean(0, keepdim=True)
136
+ demix_codes = torch.cat([encoded[0] for encoded in demix_encoded_frames], dim=-1)
137
+ demix_codes = demix_codes.transpose(0, 1)
138
+ demix_emb = self.model.quantizer.decode(demix_codes)
139
+ demix_emb = demix_emb.to(self.device)
140
+ demix_embs[key] = demix_emb
141
+
142
+ return chunk, metadata, emb, demix_embs
143
+
144
+ def collate(batch):
145
+ device = 'cuda' if torch.cuda.is_available else 'cpu'
146
+ audio, data, emb, demix_embs = zip(*batch)
147
+ audio = torch.cat(audio, dim=0)
148
+ emb = torch.cat(emb, dim=0)
149
+ demix_embs_dict = None
150
+ metadata = [d for d in data]
151
+ keys = demix_embs[0].keys()
152
+ tensors_dict = {key: [] for key in keys}
153
+ if demix_embs is not None:
154
+ for d in demix_embs:
155
+ for key in keys:
156
+ tensors_dict[key].append(d[key])
157
+ demix_embs_dict = {key: torch.cat(tensors_dict[key], dim=0) for key in keys}
158
+ return (emb, metadata, demix_embs_dict)
159
+
160
+ return (emb, metadata, demix_embs_dict)
161
+
162
+ def get_dataloader(dataset_folder, batch_size: int = 50, shuffle: bool = True):
163
+ dataset = MusicDataset(dataset_folder)
164
+
165
+ dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
166
+ return dataloader
167
+
168
+
169
+ def get_dataloaders(dataset_dir, sr, channels, min_duration, max_duration, sample_duration,
170
+ aug_shift, batch_size: int = 50, shuffle: bool = True, split_ratio=0.8, device='cpu',
171
+ composer=False):
172
+ if not isinstance(dataset_dir, tuple):
173
+ dataset = MusicDataset(dataset_dir=dataset_dir, sr=sr, channels=channels,
174
+ min_duration=min_duration, max_duration=max_duration, sample_duration=sample_duration,
175
+ aug_shift=aug_shift, device=device, composer=composer)
176
+ # Split the dataset into train and validation
177
+ train_size = int(split_ratio * len(dataset))
178
+ val_size = len(dataset) - train_size
179
+ train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
180
+ else:
181
+ train_dir, valid_dir = dataset_dir
182
+ train_dataset = MusicDataset(dataset_dir=train_dir, sr=sr, channels=channels,
183
+ min_duration=min_duration, max_duration=max_duration, sample_duration=sample_duration,
184
+ aug_shift=aug_shift, device=device, composer=composer)
185
+ val_dataset = MusicDataset(dataset_dir=valid_dir, sr=sr, channels=channels,
186
+ min_duration=min_duration, max_duration=max_duration, sample_duration=sample_duration,
187
+ aug_shift=aug_shift, device=device, composer=composer)
188
+ train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate, drop_last=True)
189
+ val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate, drop_last=True)
190
+
191
+ return train_dataloader, val_dataloader
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/dataset/demix.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import subprocess
3
+ import torch
4
+
5
+ from pathlib import Path
6
+ from typing import List, Union
7
+
8
+
9
+ def demix(paths: List[Path], demix_dir: Path, device: Union[str, torch.device]):
10
+ """Demixes the audio file into its sources."""
11
+ todos = []
12
+ demix_paths = []
13
+ for path in paths:
14
+ out_dir = demix_dir / 'htdemucs' / path.stem
15
+ demix_paths.append(out_dir)
16
+ if out_dir.is_dir():
17
+ if (
18
+ (out_dir / 'bass.wav').is_file() and
19
+ (out_dir / 'drums.wav').is_file() and
20
+ (out_dir / 'other.wav').is_file() and
21
+ (out_dir / 'vocals.wav').is_file()
22
+ ):
23
+ continue
24
+ todos.append(path)
25
+
26
+ existing = len(paths) - len(todos)
27
+ print(f'=> Found {existing} tracks already demixed, {len(todos)} to demix.')
28
+
29
+ if todos:
30
+ subprocess.run(
31
+ [
32
+ sys.executable, '-m', 'demucs.separate',
33
+ '--out', demix_dir.as_posix(),
34
+ '--name', 'htdemucs',
35
+ '--device', str(device),
36
+ *[path.as_posix() for path in todos],
37
+ ],
38
+ check=True,
39
+ )
40
+
41
+ return demix_paths
42
+
43
+ def find_audio_files(directory: Path):
44
+ return list(directory.rglob('*.mp3'))
45
+
46
+ if __name__ == '__main__':
47
+ dataset_dir = '/home/keito/data/audios'
48
+ audio_dir = Path(dataset_dir)
49
+ audio_paths = find_audio_files(audio_dir)
50
+ demixed_paths = demix(audio_paths, audio_dir, "cuda")
51
+ print('demixed_paths', demixed_paths)
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/generation.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import random
3
+ import numpy as np
4
+ import math
5
+
6
+ from utils.script_util import create_multi_conditioner, load_checkpoint
7
+ from utils.config import Config
8
+
9
+ from encodec import EncodecModel
10
+ from encodec.utils import convert_audio
11
+
12
+ from jen1.diffusion.gdm.gdm import GaussianDiffusion
13
+ from jen1.model.model import UNetCFG1d
14
+ from jen1.diffusion.gdm.noise_schedule import get_beta_schedule
15
+
16
+ class Jen1():
17
+ def __init__(self,
18
+ ckpt_path,
19
+ device='cuda' if torch.cuda.is_available() else 'cpu',
20
+ sample_rate = 48000,
21
+ cross_attn_cond_ids=['prompt'],
22
+ global_cond_ids= [],
23
+ input_concat_ids= ['masked_input', 'mask']):
24
+ self.ckpt_path = ckpt_path
25
+ self.device = device
26
+ self.sample_rate = sample_rate
27
+ self.config = Config
28
+ self.conditioner = create_multi_conditioner(self.config.conditioner_config)
29
+ self.cross_attn_cond_ids = cross_attn_cond_ids
30
+ self.global_cond_ids = global_cond_ids
31
+ self.input_concat_ids = input_concat_ids
32
+
33
+ self.audio_encoder = EncodecModel.encodec_model_48khz()
34
+
35
+ def get_model_and_diffusion(self, steps, use_gdm):
36
+ if use_gdm:
37
+ diffusion_config = self.config.diffusion_config.gaussian_diffusion
38
+ else:
39
+ diffusion_config = self.config.diffusion_config.variational_diffusion
40
+ model_config = self.config.model_config
41
+
42
+ if use_gdm:
43
+ betas, alphas = get_beta_schedule(diffusion_config.noise_schedule, diffusion_config.steps)
44
+ betas = betas.to(self.device)
45
+ betas = betas.to(torch.float32)
46
+ if alphas is not None:
47
+ alphas.to(self.device)
48
+ alphas = alphas.to(torch.float32)
49
+ diffusion = GaussianDiffusion(steps=diffusion_config.steps, betas=betas, alphas=alphas,
50
+ objective=diffusion_config.objective, loss_type=diffusion_config.loss_type,
51
+ device=self.device, cfg_dropout_proba=diffusion_config.cfg_dropout_proba,
52
+ embedding_scale=diffusion_config.embedding_scale,
53
+ batch_cfg=diffusion_config.batch_cfg, scale_cfg=diffusion_config.scale_cfg,
54
+ sampling_timesteps=steps, use_fp16=False)
55
+
56
+ config_dict = {k: v for k, v in model_config.__dict__.items() if not k.startswith('__') and not callable(v)}
57
+ context_embedding_features = config_dict.pop('context_embedding_features', None)
58
+ context_embedding_max_length = config_dict.pop('context_embedding_max_length', None)
59
+
60
+ model = UNetCFG1d(context_embedding_features=context_embedding_features,
61
+ context_embedding_max_length=context_embedding_max_length,
62
+ **config_dict).to(self.device)
63
+
64
+ #model, _, _, _ = load_checkpoint(self.ckpt_path, model)
65
+ model.eval()
66
+ diffusion.eval()
67
+
68
+ return diffusion, model
69
+
70
+ def generate(self, prompt, seed=-1, steps=100, batch_size=1, seconds=30, use_gdm=True,
71
+ task='text_guided', init_audio=None, init_audio_sr=None, inpainting_scope=None):
72
+
73
+ seed = seed if seed != -1 else np.random.randint(0, 2**32 -1)
74
+ torch.manual_seed(seed)
75
+ self.batch_size = batch_size
76
+
77
+ diffusion, model = self.get_model_and_diffusion(steps, use_gdm)
78
+
79
+ if init_audio is not None and init_audio.size() != 3:
80
+ init_audio = init_audio.repeat(batch_size, 1, 1)
81
+
82
+ if init_audio is None:
83
+ flag = True
84
+ sample_length = seconds * self.sample_rate
85
+ shape = (batch_size, self.audio_encoder.channels, sample_length)
86
+ init_audio = torch.zeros(shape)
87
+ init_audio_sr = self.sample_rate
88
+
89
+ init_audio = convert_audio(init_audio, init_audio_sr, self.sample_rate, self.audio_encoder.channels)
90
+
91
+ if task == 'text_guided':
92
+ mask = self.get_mask(sample_length, 0, seconds, batch_size)
93
+ causal = False
94
+ elif task == 'music_inpaint':
95
+ mask = self.get_mask(sample_length, inpainting_scope[0], inpainting_scope[1], batch_size)
96
+ causal = False
97
+ elif task == 'music_cont':
98
+ cont_length = sample_length - init_audio.size(2)
99
+ cont_start = init_audio.size(2)
100
+ mask = self.get_mask(sample_length, cont_start/self.sample_rate, seconds, batch_size)
101
+ cont_audio = torch.randn(batch_size, self.audio_encoder.channels, cont_length, device=self.device)
102
+ cont_audio = cont_audio * mask[:, cont_start:]
103
+ init_audio = torch.cat([init_audio, cont_audio], dim=2)
104
+ causal = True
105
+
106
+ with torch.no_grad():
107
+ init_emb = self.get_emb(init_audio).to(self.device)
108
+ emb_shape = init_emb.shape
109
+ mask = mask.to(self.device)
110
+
111
+ mask = torch.nn.functional.interpolate(mask, size=(emb_shape[2]))
112
+ masked_emb = init_emb * mask
113
+ if flag:
114
+ init_emb = None
115
+ batch_metadata = [{'prompt': prompt} for _ in range(batch_size)]
116
+ conditioning = self.conditioner(batch_metadata, self.device)
117
+ conditioning['masked_input'] = masked_emb
118
+ conditioning['mask'] = mask
119
+ conditioning = self.get_conditioning(conditioning)
120
+
121
+ sample_embs = diffusion.sample(model, emb_shape, conditioning, causal, init_data=init_emb)
122
+ sample_embs = sample_embs.to('cpu')
123
+ samples = self.audio_encoder.decoder(sample_embs)
124
+
125
+ return samples
126
+
127
+ def get_mask(self, sample_size, start, end, batch_size):
128
+ masks = []
129
+ maskstart = math.floor(start * self.sample_rate)
130
+ maskend = math.ceil(end * self.sample_rate)
131
+ mask = torch.ones((1, 1, sample_size))
132
+ mask[:, :, maskstart:maskend] = 0
133
+ masks.append(mask)
134
+ mask = torch.concat(masks * batch_size, dim=0)
135
+
136
+ return mask
137
+
138
+ def get_emb(self, audio):
139
+ encoded_frames = self.audio_encoder.encode(audio)
140
+ codes = torch.cat([encoded[0] for encoded in encoded_frames], dim=-1)
141
+ codes = codes.transpose(0, 1)
142
+ emb = self.audio_encoder.quantizer.decode(codes)
143
+ return emb
144
+
145
+ def get_conditioning(self, cond):
146
+ cross_attention_input = None
147
+ cross_attention_masks = None
148
+ global_cond = None
149
+ input_concat_cond = None
150
+
151
+ if len(self.cross_attn_cond_ids) > 0:
152
+ # Concatenate all cross-attention inputs over the sequence dimension
153
+ # Assumes that the cross-attention inputs are of shape (batch, seq, channels)
154
+ cross_attention_input = torch.cat([cond[key][0] for key in self.cross_attn_cond_ids], dim=1)
155
+ cross_attention_masks = torch.cat([cond[key][1] for key in self.cross_attn_cond_ids], dim=1)
156
+
157
+ if len(self.global_cond_ids) > 0:
158
+ # Concatenate all global conditioning inputs over the channel dimension
159
+ # Assumes that the global conditioning inputs are of shape (batch, channels)
160
+ global_cond = torch.cat([cond[key][0] for key in self.global_cond_ids], dim=-1)
161
+ if len(global_cond.shape) == 3:
162
+ global_cond = global_cond.squeeze(1)
163
+
164
+ if len(self.input_concat_ids) > 0:
165
+ concated_tensors = []
166
+ for key in self.input_concat_ids:
167
+ tensor = cond[key][0]
168
+
169
+ if tensor.ndim == 2:
170
+ tensor = tensor.unsqueeze(0)
171
+ tensor = tensor.expand(self.batch_size, -1, -1)
172
+
173
+ concated_tensors.append(tensor)
174
+ # Concatenate all input concat conditioning inputs over the channel dimension
175
+ # Assumes that the input concat conditioning inputs are of shape (batch, channels, seq)
176
+ #input_concat_cond = torch.cat([cond[key][0] for key in self.input_concat_ids], dim=1)
177
+ #For some reason, the BATCH component is removed. I don't know why.
178
+ input_concat_cond = torch.cat(concated_tensors, dim=1)
179
+
180
+ return {
181
+ "cross_attn_cond": cross_attention_input,
182
+ "cross_attn_masks": cross_attention_masks,
183
+ "global_cond": global_cond,
184
+ "input_concat_cond": input_concat_cond
185
+ }
186
+
187
+ def save_audio_tensor(audio_tensor: torch.Tensor, file_path: str, sample_rate: int = 48000):
188
+ print(f'Saving audio to {file_path}')
189
+ """
190
+ Saves an audio tensor to a file.
191
+ Params:
192
+ audio_tensor: torch.Tensor, The audio data to save.
193
+ file_path: str, The path to the file where the audio will be saved.
194
+ sample_rate: int, The sample rate of the audio data.
195
+ Returns:
196
+ None
197
+ """
198
+ # Ensure the tensor is on the CPU before saving
199
+ audio_tensor = audio_tensor.detach()
200
+ print(f'audio_tensor.shape: {audio_tensor.shape}')
201
+ if audio_tensor.ndim == 3:
202
+ audio_tensor = audio_tensor.squeeze(0) # Remove the batch dimension
203
+ # Use torchaudio to save the tensor as an audio file
204
+ import torchaudio
205
+ torchaudio.save(file_path, audio_tensor, sample_rate)
206
+ print(f'Saved audio to {file_path}')
207
+
208
+ if __name__ == '__main__':
209
+ jen1 = Jen1(ckpt_path=None)
210
+ prompt = 'a beautiful song'
211
+ samples = jen1.generate(prompt=prompt)
212
+ save_audio_tensor(samples, 'samples.wav')
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/jen1/__pycache__/conditioners.cpython-311.pyc ADDED
Binary file (12.4 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/jen1/__pycache__/conditioners.cpython-38.pyc ADDED
Binary file (6.53 kB). View file
 
JEN-1-COMPOSER-pytorch-main/JEN-1-COMPOSER-pytorch-main/jen1/__pycache__/noise_schedule.cpython-38.pyc ADDED
Binary file (1.11 kB). View file