Update README.md
Browse files
README.md
CHANGED
@@ -7,7 +7,6 @@ language:
|
|
7 |
|
8 |
<center><h3>IndexTTS2: A Breakthrough in Emotionally Expressive and Duration-Controlled Auto-Regressive Zero-Shot Text-to-Speech</h3></center>
|
9 |
|
10 |
-
[](assets/IndexTTS2_banner.png)
|
11 |
|
12 |
<div align="center">
|
13 |
<a href='https://arxiv.org/abs/2506.21619'>
|
@@ -35,3 +34,41 @@ language:
|
|
35 |
<img src='https://img.shields.io/badge/ModelScope-Model-purple?logo=modelscope'/>
|
36 |
</a>
|
37 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
<center><h3>IndexTTS2: A Breakthrough in Emotionally Expressive and Duration-Controlled Auto-Regressive Zero-Shot Text-to-Speech</h3></center>
|
9 |
|
|
|
10 |
|
11 |
<div align="center">
|
12 |
<a href='https://arxiv.org/abs/2506.21619'>
|
|
|
34 |
<img src='https://img.shields.io/badge/ModelScope-Model-purple?logo=modelscope'/>
|
35 |
</a>
|
36 |
</div>
|
37 |
+
|
38 |
+
|
39 |
+
## Acknowledge
|
40 |
+
1. [tortoise-tts](https://github.com/neonbjb/tortoise-tts)
|
41 |
+
2. [XTTSv2](https://github.com/coqui-ai/TTS)
|
42 |
+
3. [BigVGAN](https://github.com/NVIDIA/BigVGAN)
|
43 |
+
4. [wenet](https://github.com/wenet-e2e/wenet/tree/main)
|
44 |
+
5. [icefall](https://github.com/k2-fsa/icefall)
|
45 |
+
6. [maskgct](https://github.com/open-mmlab/Amphion/tree/main/models/tts/maskgct)
|
46 |
+
7. [seed-vc](https://github.com/Plachtaa/seed-vc)
|
47 |
+
|
48 |
+
|
49 |
+
## 📚 Citation
|
50 |
+
|
51 |
+
🌟 If you find our work helpful, please leave us a star and cite our paper.
|
52 |
+
|
53 |
+
|
54 |
+
IndexTTS2
|
55 |
+
```
|
56 |
+
@article{zhou2025indextts2,
|
57 |
+
title={IndexTTS2: A Breakthrough in Emotionally Expressive and Duration-Controlled Auto-Regressive Zero-Shot Text-to-Speech},
|
58 |
+
author={Siyi Zhou, Yiquan Zhou, Yi He, Xun Zhou, Jinchao Wang, Wei Deng, Jingchen Shu},
|
59 |
+
journal={arXiv preprint arXiv:2506.21619},
|
60 |
+
year={2025}
|
61 |
+
}
|
62 |
+
```
|
63 |
+
|
64 |
+
IndexTTS
|
65 |
+
```
|
66 |
+
@article{deng2025indextts,
|
67 |
+
title={IndexTTS: An Industrial-Level Controllable and Efficient Zero-Shot Text-To-Speech System},
|
68 |
+
author={Wei Deng, Siyi Zhou, Jingchen Shu, Jinchao Wang, Lu Wang},
|
69 |
+
journal={arXiv preprint arXiv:2502.05512},
|
70 |
+
year={2025},
|
71 |
+
doi={10.48550/arXiv.2502.05512},
|
72 |
+
url={https://arxiv.org/abs/2502.05512}
|
73 |
+
}
|
74 |
+
```
|