CongBang commited on
Commit
051f5c6
·
verified ·
1 Parent(s): 1ae22a3

Upload 86 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +11 -0
  2. Test-F5/00000.wav +3 -0
  3. Test-F5/app.py +45 -0
  4. Test-F5/checkpoints/vocos-mel-24khz/config.yaml +24 -0
  5. Test-F5/checkpoints/vocos-mel-24khz/pytorch_model.bin +3 -0
  6. Test-F5/ckpts/viVoice/model_last.pt +3 -0
  7. Test-F5/data/viVoice/duration.json +0 -0
  8. Test-F5/data/viVoice/metadata.csv +3 -0
  9. Test-F5/data/viVoice/new_vocab.txt +149 -0
  10. Test-F5/data/viVoice/raw.arrow +3 -0
  11. Test-F5/data/viVoice/vocab.txt +2571 -0
  12. Test-F5/gradio_output/gen_20250510_001635.wav +3 -0
  13. Test-F5/gradio_output/gen_20250510_001804.wav +3 -0
  14. Test-F5/gradio_output/gen_20250510_002157.wav +3 -0
  15. Test-F5/ref.wav +0 -0
  16. Test-F5/ref2.wav +3 -0
  17. Test-F5/src/f5_tts.egg-info/PKG-INFO +156 -0
  18. Test-F5/src/f5_tts.egg-info/SOURCES.txt +88 -0
  19. Test-F5/src/f5_tts.egg-info/dependency_links.txt +1 -0
  20. Test-F5/src/f5_tts.egg-info/entry_points.txt +5 -0
  21. Test-F5/src/f5_tts.egg-info/requires.txt +36 -0
  22. Test-F5/src/f5_tts.egg-info/top_level.txt +1 -0
  23. Test-F5/src/f5_tts/__pycache__/api.cpython-310.pyc +0 -0
  24. Test-F5/src/f5_tts/api.py +165 -0
  25. Test-F5/src/f5_tts/configs/E2TTS_Base.yaml +49 -0
  26. Test-F5/src/f5_tts/configs/E2TTS_Small.yaml +49 -0
  27. Test-F5/src/f5_tts/configs/F5TTS_Base.yaml +52 -0
  28. Test-F5/src/f5_tts/configs/F5TTS_Small.yaml +52 -0
  29. Test-F5/src/f5_tts/configs/F5TTS_v1_Base.yaml +53 -0
  30. Test-F5/src/f5_tts/eval/README.md +52 -0
  31. Test-F5/src/f5_tts/eval/ecapa_tdnn.py +330 -0
  32. Test-F5/src/f5_tts/eval/eval_infer_batch.py +202 -0
  33. Test-F5/src/f5_tts/eval/eval_infer_batch.sh +18 -0
  34. Test-F5/src/f5_tts/eval/eval_librispeech_test_clean.py +90 -0
  35. Test-F5/src/f5_tts/eval/eval_seedtts_testset.py +89 -0
  36. Test-F5/src/f5_tts/eval/eval_utmos.py +42 -0
  37. Test-F5/src/f5_tts/eval/utils_eval.py +418 -0
  38. Test-F5/src/f5_tts/infer/README.md +154 -0
  39. Test-F5/src/f5_tts/infer/SHARED.md +174 -0
  40. Test-F5/src/f5_tts/infer/__pycache__/infer_cli.cpython-310.pyc +0 -0
  41. Test-F5/src/f5_tts/infer/__pycache__/utils_infer.cpython-310.pyc +0 -0
  42. Test-F5/src/f5_tts/infer/examples/basic/basic.toml +11 -0
  43. Test-F5/src/f5_tts/infer/examples/basic/basic_ref_en.wav +3 -0
  44. Test-F5/src/f5_tts/infer/examples/basic/basic_ref_zh.wav +3 -0
  45. Test-F5/src/f5_tts/infer/examples/multi/country.flac +3 -0
  46. Test-F5/src/f5_tts/infer/examples/multi/main.flac +3 -0
  47. Test-F5/src/f5_tts/infer/examples/multi/story.toml +20 -0
  48. Test-F5/src/f5_tts/infer/examples/multi/story.txt +1 -0
  49. Test-F5/src/f5_tts/infer/examples/multi/town.flac +3 -0
  50. Test-F5/src/f5_tts/infer/examples/vocab.txt +2545 -0
.gitattributes CHANGED
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Test-F5/00000.wav filter=lfs diff=lfs merge=lfs -text
37
+ Test-F5/data/viVoice/metadata.csv filter=lfs diff=lfs merge=lfs -text
38
+ Test-F5/gradio_output/gen_20250510_001635.wav filter=lfs diff=lfs merge=lfs -text
39
+ Test-F5/gradio_output/gen_20250510_001804.wav filter=lfs diff=lfs merge=lfs -text
40
+ Test-F5/gradio_output/gen_20250510_002157.wav filter=lfs diff=lfs merge=lfs -text
41
+ Test-F5/ref2.wav filter=lfs diff=lfs merge=lfs -text
42
+ Test-F5/src/f5_tts/infer/examples/basic/basic_ref_en.wav filter=lfs diff=lfs merge=lfs -text
43
+ Test-F5/src/f5_tts/infer/examples/basic/basic_ref_zh.wav filter=lfs diff=lfs merge=lfs -text
44
+ Test-F5/src/f5_tts/infer/examples/multi/country.flac filter=lfs diff=lfs merge=lfs -text
45
+ Test-F5/src/f5_tts/infer/examples/multi/main.flac filter=lfs diff=lfs merge=lfs -text
46
+ Test-F5/src/f5_tts/infer/examples/multi/town.flac filter=lfs diff=lfs merge=lfs -text
Test-F5/00000.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:587ace17332ee4a998dfe5ec37517438beca6610b668d8625414645bce253fdf
3
+ size 166484
Test-F5/app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import os
4
+ from datetime import datetime
5
+
6
+ def run_infer_cli(ref_audio, ref_text, gen_text):
7
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
8
+ output_dir = "gradio_output"
9
+ output_path = f"{output_dir}/gen_{timestamp}.wav"
10
+
11
+ os.makedirs(output_dir, exist_ok=True)
12
+
13
+ cmd = [
14
+ "python", "src/f5_tts/infer/infer_cli.py",
15
+ "--model", "F5TTS_Base",
16
+ "--ref_audio", ref_audio,
17
+ "--ref_text", ref_text,
18
+ "--gen_text", gen_text,
19
+ "--speed", "1.0",
20
+ "--vocoder_name", "vocos",
21
+ "--vocab_file", "data/viVoice/vocab.txt",
22
+ "--ckpt_file", "ckpts/viVoice/model_last.pt",
23
+ "--output_dir", output_dir,
24
+ "--output_file", f"gen_{timestamp}.wav"
25
+ ]
26
+
27
+ try:
28
+ subprocess.run(cmd, check=True)
29
+ return output_path
30
+ except subprocess.CalledProcessError as e:
31
+ return f"Lỗi khi chạy infer_cli.py: {e}"
32
+
33
+ with gr.Blocks() as demo:
34
+ gr.Markdown("## 🔉 Tạo giọng nói bằng F5-TTS (dùng infer_cli.py)")
35
+
36
+ ref_audio = gr.Audio(label="📁 Giọng tham chiếu (.wav)", type="filepath")
37
+ ref_text = gr.Textbox(label="📝 Văn bản tham chiếu", lines=2)
38
+ gen_text = gr.Textbox(label="📄 Văn bản cần sinh", lines=3)
39
+
40
+ output_audio = gr.Audio(label="🎧 Kết quả", type="filepath")
41
+ run_btn = gr.Button("🚀 Sinh giọng")
42
+
43
+ run_btn.click(run_infer_cli, inputs=[ref_audio, ref_text, gen_text], outputs=output_audio)
44
+
45
+ demo.launch()
Test-F5/checkpoints/vocos-mel-24khz/config.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ feature_extractor:
2
+ class_path: vocos.feature_extractors.MelSpectrogramFeatures
3
+ init_args:
4
+ sample_rate: 24000
5
+ n_fft: 1024
6
+ hop_length: 256
7
+ n_mels: 100
8
+ padding: center
9
+
10
+ backbone:
11
+ class_path: vocos.models.VocosBackbone
12
+ init_args:
13
+ input_channels: 100
14
+ dim: 512
15
+ intermediate_dim: 1536
16
+ num_layers: 8
17
+
18
+ head:
19
+ class_path: vocos.heads.ISTFTHead
20
+ init_args:
21
+ dim: 512
22
+ n_fft: 1024
23
+ hop_length: 256
24
+ padding: center
Test-F5/checkpoints/vocos-mel-24khz/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97ec976ad1fd67a33ab2682d29c0ac7df85234fae875aefcc5fb215681a91b2a
3
+ size 54365991
Test-F5/ckpts/viVoice/model_last.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f42c5ffaaa067852ad6e058dc9442dd87c2e839fe9da46a886e8405c711ac51c
3
+ size 5394403084
Test-F5/data/viVoice/duration.json ADDED
The diff for this file is too large to render. See raw diff
 
Test-F5/data/viVoice/metadata.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e153cc6924d1614d85df359fae201c5f220ed54c48d68ef141a79617076f0b48
3
+ size 104821968
Test-F5/data/viVoice/new_vocab.txt ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ !
3
+ "
4
+ $
5
+ %
6
+ &
7
+ '
8
+ *
9
+ +
10
+ ,
11
+ -
12
+ .
13
+ 0
14
+ 1
15
+ 2
16
+ 3
17
+ 4
18
+ 5
19
+ 6
20
+ 7
21
+ 8
22
+ 9
23
+ :
24
+ ?
25
+ a
26
+ b
27
+ c
28
+ d
29
+ e
30
+ f
31
+ g
32
+ h
33
+ i
34
+ j
35
+ k
36
+ l
37
+ m
38
+ n
39
+ o
40
+ p
41
+ q
42
+ r
43
+ s
44
+ t
45
+ u
46
+ v
47
+ w
48
+ x
49
+ y
50
+ z
51
+ «
52
+ °
53
+ µ
54
+ ·
55
+ ¹
56
+ »
57
+ à
58
+ á
59
+ â
60
+ ã
61
+ ä
62
+ å
63
+ ç
64
+ è
65
+ é
66
+ ê
67
+ ë
68
+ ì
69
+ í
70
+ î
71
+ ñ
72
+ ò
73
+ ó
74
+ ô
75
+ õ
76
+ ö
77
+ ù
78
+ ú
79
+ û
80
+ ü
81
+ ý
82
+ ă
83
+ đ
84
+ ĩ
85
+ ō
86
+ ũ
87
+ ū
88
+ ơ
89
+ ư
90
+ в
91
+ и
92
+ л
93
+ о
94
+ р
95
+ т
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+ ế
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
Test-F5/data/viVoice/raw.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e892f8076f71e436f21772b1360da23d5d64dbe9f6d4f8bb67116711a248ed54
3
+ size 398665480
Test-F5/data/viVoice/vocab.txt ADDED
@@ -0,0 +1,2571 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ 
3
+ !
4
+ "
5
+ #
6
+ $
7
+ %
8
+ &
9
+ '
10
+ (
11
+ )
12
+ *
13
+ +
14
+ ,
15
+ -
16
+ .
17
+ /
18
+ 0
19
+ 1
20
+ 2
21
+ 3
22
+ 4
23
+ 5
24
+ 6
25
+ 7
26
+ 8
27
+ 9
28
+ :
29
+ ;
30
+ =
31
+ >
32
+ ?
33
+ @
34
+ A
35
+ B
36
+ C
37
+ D
38
+ E
39
+ F
40
+ G
41
+ H
42
+ I
43
+ J
44
+ K
45
+ L
46
+ M
47
+ N
48
+ O
49
+ P
50
+ Q
51
+ R
52
+ S
53
+ T
54
+ U
55
+ V
56
+ W
57
+ X
58
+ Y
59
+ Z
60
+ [
61
+ \
62
+ ]
63
+ _
64
+ a
65
+ a1
66
+ ai1
67
+ ai2
68
+ ai3
69
+ ai4
70
+ an1
71
+ an3
72
+ an4
73
+ ang1
74
+ ang2
75
+ ang4
76
+ ao1
77
+ ao2
78
+ ao3
79
+ ao4
80
+ b
81
+ ba
82
+ ba1
83
+ ba2
84
+ ba3
85
+ ba4
86
+ bai1
87
+ bai2
88
+ bai3
89
+ bai4
90
+ ban1
91
+ ban2
92
+ ban3
93
+ ban4
94
+ bang1
95
+ bang2
96
+ bang3
97
+ bang4
98
+ bao1
99
+ bao2
100
+ bao3
101
+ bao4
102
+ bei
103
+ bei1
104
+ bei2
105
+ bei3
106
+ bei4
107
+ ben1
108
+ ben2
109
+ ben3
110
+ ben4
111
+ beng
112
+ beng1
113
+ beng2
114
+ beng3
115
+ beng4
116
+ bi1
117
+ bi2
118
+ bi3
119
+ bi4
120
+ bian1
121
+ bian2
122
+ bian3
123
+ bian4
124
+ biao1
125
+ biao2
126
+ biao3
127
+ bie1
128
+ bie2
129
+ bie3
130
+ bie4
131
+ bin1
132
+ bin4
133
+ bing1
134
+ bing2
135
+ bing3
136
+ bing4
137
+ bo
138
+ bo1
139
+ bo2
140
+ bo3
141
+ bo4
142
+ bu2
143
+ bu3
144
+ bu4
145
+ c
146
+ ca1
147
+ cai1
148
+ cai2
149
+ cai3
150
+ cai4
151
+ can1
152
+ can2
153
+ can3
154
+ can4
155
+ cang1
156
+ cang2
157
+ cao1
158
+ cao2
159
+ cao3
160
+ ce4
161
+ cen1
162
+ cen2
163
+ ceng1
164
+ ceng2
165
+ ceng4
166
+ cha1
167
+ cha2
168
+ cha3
169
+ cha4
170
+ chai1
171
+ chai2
172
+ chan1
173
+ chan2
174
+ chan3
175
+ chan4
176
+ chang1
177
+ chang2
178
+ chang3
179
+ chang4
180
+ chao1
181
+ chao2
182
+ chao3
183
+ che1
184
+ che2
185
+ che3
186
+ che4
187
+ chen1
188
+ chen2
189
+ chen3
190
+ chen4
191
+ cheng1
192
+ cheng2
193
+ cheng3
194
+ cheng4
195
+ chi1
196
+ chi2
197
+ chi3
198
+ chi4
199
+ chong1
200
+ chong2
201
+ chong3
202
+ chong4
203
+ chou1
204
+ chou2
205
+ chou3
206
+ chou4
207
+ chu1
208
+ chu2
209
+ chu3
210
+ chu4
211
+ chua1
212
+ chuai1
213
+ chuai2
214
+ chuai3
215
+ chuai4
216
+ chuan1
217
+ chuan2
218
+ chuan3
219
+ chuan4
220
+ chuang1
221
+ chuang2
222
+ chuang3
223
+ chuang4
224
+ chui1
225
+ chui2
226
+ chun1
227
+ chun2
228
+ chun3
229
+ chuo1
230
+ chuo4
231
+ ci1
232
+ ci2
233
+ ci3
234
+ ci4
235
+ cong1
236
+ cong2
237
+ cou4
238
+ cu1
239
+ cu4
240
+ cuan1
241
+ cuan2
242
+ cuan4
243
+ cui1
244
+ cui3
245
+ cui4
246
+ cun1
247
+ cun2
248
+ cun4
249
+ cuo1
250
+ cuo2
251
+ cuo4
252
+ d
253
+ da
254
+ da1
255
+ da2
256
+ da3
257
+ da4
258
+ dai1
259
+ dai2
260
+ dai3
261
+ dai4
262
+ dan1
263
+ dan2
264
+ dan3
265
+ dan4
266
+ dang1
267
+ dang2
268
+ dang3
269
+ dang4
270
+ dao1
271
+ dao2
272
+ dao3
273
+ dao4
274
+ de
275
+ de1
276
+ de2
277
+ dei3
278
+ den4
279
+ deng1
280
+ deng2
281
+ deng3
282
+ deng4
283
+ di1
284
+ di2
285
+ di3
286
+ di4
287
+ dia3
288
+ dian1
289
+ dian2
290
+ dian3
291
+ dian4
292
+ diao1
293
+ diao3
294
+ diao4
295
+ die1
296
+ die2
297
+ die4
298
+ ding1
299
+ ding2
300
+ ding3
301
+ ding4
302
+ diu1
303
+ dong1
304
+ dong3
305
+ dong4
306
+ dou1
307
+ dou2
308
+ dou3
309
+ dou4
310
+ du1
311
+ du2
312
+ du3
313
+ du4
314
+ duan1
315
+ duan2
316
+ duan3
317
+ duan4
318
+ dui1
319
+ dui4
320
+ dun1
321
+ dun3
322
+ dun4
323
+ duo1
324
+ duo2
325
+ duo3
326
+ duo4
327
+ e
328
+ e1
329
+ e2
330
+ e3
331
+ e4
332
+ ei2
333
+ en1
334
+ en4
335
+ er
336
+ er2
337
+ er3
338
+ er4
339
+ f
340
+ fa1
341
+ fa2
342
+ fa3
343
+ fa4
344
+ fan1
345
+ fan2
346
+ fan3
347
+ fan4
348
+ fang1
349
+ fang2
350
+ fang3
351
+ fang4
352
+ fei1
353
+ fei2
354
+ fei3
355
+ fei4
356
+ fen1
357
+ fen2
358
+ fen3
359
+ fen4
360
+ feng1
361
+ feng2
362
+ feng3
363
+ feng4
364
+ fo2
365
+ fou2
366
+ fou3
367
+ fu1
368
+ fu2
369
+ fu3
370
+ fu4
371
+ g
372
+ ga1
373
+ ga2
374
+ ga3
375
+ ga4
376
+ gai1
377
+ gai2
378
+ gai3
379
+ gai4
380
+ gan1
381
+ gan2
382
+ gan3
383
+ gan4
384
+ gang1
385
+ gang2
386
+ gang3
387
+ gang4
388
+ gao1
389
+ gao2
390
+ gao3
391
+ gao4
392
+ ge1
393
+ ge2
394
+ ge3
395
+ ge4
396
+ gei2
397
+ gei3
398
+ gen1
399
+ gen2
400
+ gen3
401
+ gen4
402
+ geng1
403
+ geng3
404
+ geng4
405
+ gong1
406
+ gong3
407
+ gong4
408
+ gou1
409
+ gou2
410
+ gou3
411
+ gou4
412
+ gu
413
+ gu1
414
+ gu2
415
+ gu3
416
+ gu4
417
+ gua1
418
+ gua2
419
+ gua3
420
+ gua4
421
+ guai1
422
+ guai2
423
+ guai3
424
+ guai4
425
+ guan1
426
+ guan2
427
+ guan3
428
+ guan4
429
+ guang1
430
+ guang2
431
+ guang3
432
+ guang4
433
+ gui1
434
+ gui2
435
+ gui3
436
+ gui4
437
+ gun3
438
+ gun4
439
+ guo1
440
+ guo2
441
+ guo3
442
+ guo4
443
+ h
444
+ ha1
445
+ ha2
446
+ ha3
447
+ hai1
448
+ hai2
449
+ hai3
450
+ hai4
451
+ han1
452
+ han2
453
+ han3
454
+ han4
455
+ hang1
456
+ hang2
457
+ hang4
458
+ hao1
459
+ hao2
460
+ hao3
461
+ hao4
462
+ he1
463
+ he2
464
+ he4
465
+ hei1
466
+ hen2
467
+ hen3
468
+ hen4
469
+ heng1
470
+ heng2
471
+ heng4
472
+ hong1
473
+ hong2
474
+ hong3
475
+ hong4
476
+ hou1
477
+ hou2
478
+ hou3
479
+ hou4
480
+ hu1
481
+ hu2
482
+ hu3
483
+ hu4
484
+ hua1
485
+ hua2
486
+ hua4
487
+ huai2
488
+ huai4
489
+ huan1
490
+ huan2
491
+ huan3
492
+ huan4
493
+ huang1
494
+ huang2
495
+ huang3
496
+ huang4
497
+ hui1
498
+ hui2
499
+ hui3
500
+ hui4
501
+ hun1
502
+ hun2
503
+ hun4
504
+ huo
505
+ huo1
506
+ huo2
507
+ huo3
508
+ huo4
509
+ i
510
+ j
511
+ ji1
512
+ ji2
513
+ ji3
514
+ ji4
515
+ jia
516
+ jia1
517
+ jia2
518
+ jia3
519
+ jia4
520
+ jian1
521
+ jian2
522
+ jian3
523
+ jian4
524
+ jiang1
525
+ jiang2
526
+ jiang3
527
+ jiang4
528
+ jiao1
529
+ jiao2
530
+ jiao3
531
+ jiao4
532
+ jie1
533
+ jie2
534
+ jie3
535
+ jie4
536
+ jin1
537
+ jin2
538
+ jin3
539
+ jin4
540
+ jing1
541
+ jing2
542
+ jing3
543
+ jing4
544
+ jiong3
545
+ jiu1
546
+ jiu2
547
+ jiu3
548
+ jiu4
549
+ ju1
550
+ ju2
551
+ ju3
552
+ ju4
553
+ juan1
554
+ juan2
555
+ juan3
556
+ juan4
557
+ jue1
558
+ jue2
559
+ jue4
560
+ jun1
561
+ jun4
562
+ k
563
+ ka1
564
+ ka2
565
+ ka3
566
+ kai1
567
+ kai2
568
+ kai3
569
+ kai4
570
+ kan1
571
+ kan2
572
+ kan3
573
+ kan4
574
+ kang1
575
+ kang2
576
+ kang4
577
+ kao1
578
+ kao2
579
+ kao3
580
+ kao4
581
+ ke1
582
+ ke2
583
+ ke3
584
+ ke4
585
+ ken3
586
+ keng1
587
+ kong1
588
+ kong3
589
+ kong4
590
+ kou1
591
+ kou2
592
+ kou3
593
+ kou4
594
+ ku1
595
+ ku2
596
+ ku3
597
+ ku4
598
+ kua1
599
+ kua3
600
+ kua4
601
+ kuai3
602
+ kuai4
603
+ kuan1
604
+ kuan2
605
+ kuan3
606
+ kuang1
607
+ kuang2
608
+ kuang4
609
+ kui1
610
+ kui2
611
+ kui3
612
+ kui4
613
+ kun1
614
+ kun3
615
+ kun4
616
+ kuo4
617
+ l
618
+ la
619
+ la1
620
+ la2
621
+ la3
622
+ la4
623
+ lai2
624
+ lai4
625
+ lan2
626
+ lan3
627
+ lan4
628
+ lang1
629
+ lang2
630
+ lang3
631
+ lang4
632
+ lao1
633
+ lao2
634
+ lao3
635
+ lao4
636
+ le
637
+ le1
638
+ le4
639
+ lei
640
+ lei1
641
+ lei2
642
+ lei3
643
+ lei4
644
+ leng1
645
+ leng2
646
+ leng3
647
+ leng4
648
+ li
649
+ li1
650
+ li2
651
+ li3
652
+ li4
653
+ lia3
654
+ lian2
655
+ lian3
656
+ lian4
657
+ liang2
658
+ liang3
659
+ liang4
660
+ liao1
661
+ liao2
662
+ liao3
663
+ liao4
664
+ lie1
665
+ lie2
666
+ lie3
667
+ lie4
668
+ lin1
669
+ lin2
670
+ lin3
671
+ lin4
672
+ ling2
673
+ ling3
674
+ ling4
675
+ liu1
676
+ liu2
677
+ liu3
678
+ liu4
679
+ long1
680
+ long2
681
+ long3
682
+ long4
683
+ lou1
684
+ lou2
685
+ lou3
686
+ lou4
687
+ lu1
688
+ lu2
689
+ lu3
690
+ lu4
691
+ luan2
692
+ luan3
693
+ luan4
694
+ lun1
695
+ lun2
696
+ lun4
697
+ luo1
698
+ luo2
699
+ luo3
700
+ luo4
701
+ lv2
702
+ lv3
703
+ lv4
704
+ lve3
705
+ lve4
706
+ m
707
+ ma
708
+ ma1
709
+ ma2
710
+ ma3
711
+ ma4
712
+ mai2
713
+ mai3
714
+ mai4
715
+ man1
716
+ man2
717
+ man3
718
+ man4
719
+ mang2
720
+ mang3
721
+ mao1
722
+ mao2
723
+ mao3
724
+ mao4
725
+ me
726
+ mei2
727
+ mei3
728
+ mei4
729
+ men
730
+ men1
731
+ men2
732
+ men4
733
+ meng
734
+ meng1
735
+ meng2
736
+ meng3
737
+ meng4
738
+ mi1
739
+ mi2
740
+ mi3
741
+ mi4
742
+ mian2
743
+ mian3
744
+ mian4
745
+ miao1
746
+ miao2
747
+ miao3
748
+ miao4
749
+ mie1
750
+ mie4
751
+ min2
752
+ min3
753
+ ming2
754
+ ming3
755
+ ming4
756
+ miu4
757
+ mo1
758
+ mo2
759
+ mo3
760
+ mo4
761
+ mou1
762
+ mou2
763
+ mou3
764
+ mu2
765
+ mu3
766
+ mu4
767
+ n
768
+ n2
769
+ na1
770
+ na2
771
+ na3
772
+ na4
773
+ nai2
774
+ nai3
775
+ nai4
776
+ nan1
777
+ nan2
778
+ nan3
779
+ nan4
780
+ nang1
781
+ nang2
782
+ nang3
783
+ nao1
784
+ nao2
785
+ nao3
786
+ nao4
787
+ ne
788
+ ne2
789
+ ne4
790
+ nei3
791
+ nei4
792
+ nen4
793
+ neng2
794
+ ni1
795
+ ni2
796
+ ni3
797
+ ni4
798
+ nian1
799
+ nian2
800
+ nian3
801
+ nian4
802
+ niang2
803
+ niang4
804
+ niao2
805
+ niao3
806
+ niao4
807
+ nie1
808
+ nie4
809
+ nin2
810
+ ning2
811
+ ning3
812
+ ning4
813
+ niu1
814
+ niu2
815
+ niu3
816
+ niu4
817
+ nong2
818
+ nong4
819
+ nou4
820
+ nu2
821
+ nu3
822
+ nu4
823
+ nuan3
824
+ nuo2
825
+ nuo4
826
+ nv2
827
+ nv3
828
+ nve4
829
+ o
830
+ o1
831
+ o2
832
+ ou1
833
+ ou2
834
+ ou3
835
+ ou4
836
+ p
837
+ pa1
838
+ pa2
839
+ pa4
840
+ pai1
841
+ pai2
842
+ pai3
843
+ pai4
844
+ pan1
845
+ pan2
846
+ pan4
847
+ pang1
848
+ pang2
849
+ pang4
850
+ pao1
851
+ pao2
852
+ pao3
853
+ pao4
854
+ pei1
855
+ pei2
856
+ pei4
857
+ pen1
858
+ pen2
859
+ pen4
860
+ peng1
861
+ peng2
862
+ peng3
863
+ peng4
864
+ pi1
865
+ pi2
866
+ pi3
867
+ pi4
868
+ pian1
869
+ pian2
870
+ pian4
871
+ piao1
872
+ piao2
873
+ piao3
874
+ piao4
875
+ pie1
876
+ pie2
877
+ pie3
878
+ pin1
879
+ pin2
880
+ pin3
881
+ pin4
882
+ ping1
883
+ ping2
884
+ po1
885
+ po2
886
+ po3
887
+ po4
888
+ pou1
889
+ pu1
890
+ pu2
891
+ pu3
892
+ pu4
893
+ q
894
+ qi1
895
+ qi2
896
+ qi3
897
+ qi4
898
+ qia1
899
+ qia3
900
+ qia4
901
+ qian1
902
+ qian2
903
+ qian3
904
+ qian4
905
+ qiang1
906
+ qiang2
907
+ qiang3
908
+ qiang4
909
+ qiao1
910
+ qiao2
911
+ qiao3
912
+ qiao4
913
+ qie1
914
+ qie2
915
+ qie3
916
+ qie4
917
+ qin1
918
+ qin2
919
+ qin3
920
+ qin4
921
+ qing1
922
+ qing2
923
+ qing3
924
+ qing4
925
+ qiong1
926
+ qiong2
927
+ qiu1
928
+ qiu2
929
+ qiu3
930
+ qu1
931
+ qu2
932
+ qu3
933
+ qu4
934
+ quan1
935
+ quan2
936
+ quan3
937
+ quan4
938
+ que1
939
+ que2
940
+ que4
941
+ qun2
942
+ r
943
+ ran2
944
+ ran3
945
+ rang1
946
+ rang2
947
+ rang3
948
+ rang4
949
+ rao2
950
+ rao3
951
+ rao4
952
+ re2
953
+ re3
954
+ re4
955
+ ren2
956
+ ren3
957
+ ren4
958
+ reng1
959
+ reng2
960
+ ri4
961
+ rong1
962
+ rong2
963
+ rong3
964
+ rou2
965
+ rou4
966
+ ru2
967
+ ru3
968
+ ru4
969
+ ruan2
970
+ ruan3
971
+ rui3
972
+ rui4
973
+ run4
974
+ ruo4
975
+ s
976
+ sa1
977
+ sa2
978
+ sa3
979
+ sa4
980
+ sai1
981
+ sai4
982
+ san1
983
+ san2
984
+ san3
985
+ san4
986
+ sang1
987
+ sang3
988
+ sang4
989
+ sao1
990
+ sao2
991
+ sao3
992
+ sao4
993
+ se4
994
+ sen1
995
+ seng1
996
+ sha1
997
+ sha2
998
+ sha3
999
+ sha4
1000
+ shai1
1001
+ shai2
1002
+ shai3
1003
+ shai4
1004
+ shan1
1005
+ shan3
1006
+ shan4
1007
+ shang
1008
+ shang1
1009
+ shang3
1010
+ shang4
1011
+ shao1
1012
+ shao2
1013
+ shao3
1014
+ shao4
1015
+ she1
1016
+ she2
1017
+ she3
1018
+ she4
1019
+ shei2
1020
+ shen1
1021
+ shen2
1022
+ shen3
1023
+ shen4
1024
+ sheng1
1025
+ sheng2
1026
+ sheng3
1027
+ sheng4
1028
+ shi
1029
+ shi1
1030
+ shi2
1031
+ shi3
1032
+ shi4
1033
+ shou1
1034
+ shou2
1035
+ shou3
1036
+ shou4
1037
+ shu1
1038
+ shu2
1039
+ shu3
1040
+ shu4
1041
+ shua1
1042
+ shua2
1043
+ shua3
1044
+ shua4
1045
+ shuai1
1046
+ shuai3
1047
+ shuai4
1048
+ shuan1
1049
+ shuan4
1050
+ shuang1
1051
+ shuang3
1052
+ shui2
1053
+ shui3
1054
+ shui4
1055
+ shun3
1056
+ shun4
1057
+ shuo1
1058
+ shuo4
1059
+ si1
1060
+ si2
1061
+ si3
1062
+ si4
1063
+ song1
1064
+ song3
1065
+ song4
1066
+ sou1
1067
+ sou3
1068
+ sou4
1069
+ su1
1070
+ su2
1071
+ su4
1072
+ suan1
1073
+ suan4
1074
+ sui1
1075
+ sui2
1076
+ sui3
1077
+ sui4
1078
+ sun1
1079
+ sun3
1080
+ suo
1081
+ suo1
1082
+ suo2
1083
+ suo3
1084
+ t
1085
+ ta1
1086
+ ta2
1087
+ ta3
1088
+ ta4
1089
+ tai1
1090
+ tai2
1091
+ tai4
1092
+ tan1
1093
+ tan2
1094
+ tan3
1095
+ tan4
1096
+ tang1
1097
+ tang2
1098
+ tang3
1099
+ tang4
1100
+ tao1
1101
+ tao2
1102
+ tao3
1103
+ tao4
1104
+ te4
1105
+ teng2
1106
+ ti1
1107
+ ti2
1108
+ ti3
1109
+ ti4
1110
+ tian1
1111
+ tian2
1112
+ tian3
1113
+ tiao1
1114
+ tiao2
1115
+ tiao3
1116
+ tiao4
1117
+ tie1
1118
+ tie2
1119
+ tie3
1120
+ tie4
1121
+ ting1
1122
+ ting2
1123
+ ting3
1124
+ tong1
1125
+ tong2
1126
+ tong3
1127
+ tong4
1128
+ tou
1129
+ tou1
1130
+ tou2
1131
+ tou4
1132
+ tu1
1133
+ tu2
1134
+ tu3
1135
+ tu4
1136
+ tuan1
1137
+ tuan2
1138
+ tui1
1139
+ tui2
1140
+ tui3
1141
+ tui4
1142
+ tun1
1143
+ tun2
1144
+ tun4
1145
+ tuo1
1146
+ tuo2
1147
+ tuo3
1148
+ tuo4
1149
+ u
1150
+ v
1151
+ w
1152
+ wa
1153
+ wa1
1154
+ wa2
1155
+ wa3
1156
+ wa4
1157
+ wai1
1158
+ wai3
1159
+ wai4
1160
+ wan1
1161
+ wan2
1162
+ wan3
1163
+ wan4
1164
+ wang1
1165
+ wang2
1166
+ wang3
1167
+ wang4
1168
+ wei1
1169
+ wei2
1170
+ wei3
1171
+ wei4
1172
+ wen1
1173
+ wen2
1174
+ wen3
1175
+ wen4
1176
+ weng1
1177
+ weng4
1178
+ wo1
1179
+ wo2
1180
+ wo3
1181
+ wo4
1182
+ wu1
1183
+ wu2
1184
+ wu3
1185
+ wu4
1186
+ x
1187
+ xi1
1188
+ xi2
1189
+ xi3
1190
+ xi4
1191
+ xia1
1192
+ xia2
1193
+ xia4
1194
+ xian1
1195
+ xian2
1196
+ xian3
1197
+ xian4
1198
+ xiang1
1199
+ xiang2
1200
+ xiang3
1201
+ xiang4
1202
+ xiao1
1203
+ xiao2
1204
+ xiao3
1205
+ xiao4
1206
+ xie1
1207
+ xie2
1208
+ xie3
1209
+ xie4
1210
+ xin1
1211
+ xin2
1212
+ xin4
1213
+ xing1
1214
+ xing2
1215
+ xing3
1216
+ xing4
1217
+ xiong1
1218
+ xiong2
1219
+ xiu1
1220
+ xiu3
1221
+ xiu4
1222
+ xu
1223
+ xu1
1224
+ xu2
1225
+ xu3
1226
+ xu4
1227
+ xuan1
1228
+ xuan2
1229
+ xuan3
1230
+ xuan4
1231
+ xue1
1232
+ xue2
1233
+ xue3
1234
+ xue4
1235
+ xun1
1236
+ xun2
1237
+ xun4
1238
+ y
1239
+ ya
1240
+ ya1
1241
+ ya2
1242
+ ya3
1243
+ ya4
1244
+ yan1
1245
+ yan2
1246
+ yan3
1247
+ yan4
1248
+ yang1
1249
+ yang2
1250
+ yang3
1251
+ yang4
1252
+ yao1
1253
+ yao2
1254
+ yao3
1255
+ yao4
1256
+ ye1
1257
+ ye2
1258
+ ye3
1259
+ ye4
1260
+ yi
1261
+ yi1
1262
+ yi2
1263
+ yi3
1264
+ yi4
1265
+ yin1
1266
+ yin2
1267
+ yin3
1268
+ yin4
1269
+ ying1
1270
+ ying2
1271
+ ying3
1272
+ ying4
1273
+ yo1
1274
+ yong1
1275
+ yong2
1276
+ yong3
1277
+ yong4
1278
+ you1
1279
+ you2
1280
+ you3
1281
+ you4
1282
+ yu1
1283
+ yu2
1284
+ yu3
1285
+ yu4
1286
+ yuan1
1287
+ yuan2
1288
+ yuan3
1289
+ yuan4
1290
+ yue1
1291
+ yue4
1292
+ yun1
1293
+ yun2
1294
+ yun3
1295
+ yun4
1296
+ z
1297
+ za1
1298
+ za2
1299
+ za3
1300
+ zai1
1301
+ zai3
1302
+ zai4
1303
+ zan1
1304
+ zan2
1305
+ zan3
1306
+ zan4
1307
+ zang1
1308
+ zang4
1309
+ zao1
1310
+ zao2
1311
+ zao3
1312
+ zao4
1313
+ ze2
1314
+ ze4
1315
+ zei2
1316
+ zen3
1317
+ zeng1
1318
+ zeng4
1319
+ zha1
1320
+ zha2
1321
+ zha3
1322
+ zha4
1323
+ zhai1
1324
+ zhai2
1325
+ zhai3
1326
+ zhai4
1327
+ zhan1
1328
+ zhan2
1329
+ zhan3
1330
+ zhan4
1331
+ zhang1
1332
+ zhang2
1333
+ zhang3
1334
+ zhang4
1335
+ zhao1
1336
+ zhao2
1337
+ zhao3
1338
+ zhao4
1339
+ zhe
1340
+ zhe1
1341
+ zhe2
1342
+ zhe3
1343
+ zhe4
1344
+ zhen1
1345
+ zhen2
1346
+ zhen3
1347
+ zhen4
1348
+ zheng1
1349
+ zheng2
1350
+ zheng3
1351
+ zheng4
1352
+ zhi1
1353
+ zhi2
1354
+ zhi3
1355
+ zhi4
1356
+ zhong1
1357
+ zhong2
1358
+ zhong3
1359
+ zhong4
1360
+ zhou1
1361
+ zhou2
1362
+ zhou3
1363
+ zhou4
1364
+ zhu1
1365
+ zhu2
1366
+ zhu3
1367
+ zhu4
1368
+ zhua1
1369
+ zhua2
1370
+ zhua3
1371
+ zhuai1
1372
+ zhuai3
1373
+ zhuai4
1374
+ zhuan1
1375
+ zhuan2
1376
+ zhuan3
1377
+ zhuan4
1378
+ zhuang1
1379
+ zhuang4
1380
+ zhui1
1381
+ zhui4
1382
+ zhun1
1383
+ zhun2
1384
+ zhun3
1385
+ zhuo1
1386
+ zhuo2
1387
+ zi
1388
+ zi1
1389
+ zi2
1390
+ zi3
1391
+ zi4
1392
+ zong1
1393
+ zong2
1394
+ zong3
1395
+ zong4
1396
+ zou1
1397
+ zou2
1398
+ zou3
1399
+ zou4
1400
+ zu1
1401
+ zu2
1402
+ zu3
1403
+ zuan1
1404
+ zuan3
1405
+ zuan4
1406
+ zui2
1407
+ zui3
1408
+ zui4
1409
+ zun1
1410
+ zuo
1411
+ zuo1
1412
+ zuo2
1413
+ zuo3
1414
+ zuo4
1415
+ {
1416
+ ~
1417
+ ¡
1418
+ ¢
1419
+ £
1420
+ ¥
1421
+ §
1422
+ ¨
1423
+ ©
1424
+ «
1425
+ ®
1426
+ ¯
1427
+ °
1428
+ ±
1429
+ ²
1430
+ ³
1431
+ ´
1432
+ µ
1433
+ ·
1434
+ ¹
1435
+ º
1436
+ »
1437
+ ¼
1438
+ ½
1439
+ ¾
1440
+ ¿
1441
+ À
1442
+ Á
1443
+ Â
1444
+ Ã
1445
+ Ä
1446
+ Å
1447
+ Æ
1448
+ Ç
1449
+ È
1450
+ É
1451
+ Ê
1452
+ Í
1453
+ Î
1454
+ Ñ
1455
+ Ó
1456
+ Ö
1457
+ ×
1458
+ Ø
1459
+ Ú
1460
+ Ü
1461
+ Ý
1462
+ Þ
1463
+ ß
1464
+ à
1465
+ á
1466
+ â
1467
+ ã
1468
+ ä
1469
+ å
1470
+ æ
1471
+ ç
1472
+ è
1473
+ é
1474
+ ê
1475
+ ë
1476
+ ì
1477
+ í
1478
+ î
1479
+ ï
1480
+ ð
1481
+ ñ
1482
+ ò
1483
+ ó
1484
+ ô
1485
+ õ
1486
+ ö
1487
+ ø
1488
+ ù
1489
+ ú
1490
+ û
1491
+ ü
1492
+ ý
1493
+ Ā
1494
+ ā
1495
+ ă
1496
+ ą
1497
+ ć
1498
+ Č
1499
+ č
1500
+ Đ
1501
+ đ
1502
+ ē
1503
+ ė
1504
+ ę
1505
+ ě
1506
+ ĝ
1507
+ ğ
1508
+ ħ
1509
+ ĩ
1510
+ ī
1511
+ į
1512
+ İ
1513
+ ı
1514
+ Ł
1515
+ ł
1516
+ ń
1517
+ ņ
1518
+ ň
1519
+ ŋ
1520
+ Ō
1521
+ ō
1522
+ ő
1523
+ œ
1524
+ ř
1525
+ Ś
1526
+ ś
1527
+ Ş
1528
+ ş
1529
+ Š
1530
+ š
1531
+ Ť
1532
+ ť
1533
+ ũ
1534
+ ū
1535
+ ź
1536
+ Ż
1537
+ ż
1538
+ Ž
1539
+ ž
1540
+ ơ
1541
+ ư
1542
+ ǀ
1543
+ ǎ
1544
+ ǐ
1545
+ ǒ
1546
+ ǔ
1547
+ ǚ
1548
+ ș
1549
+ ț
1550
+ ɑ
1551
+ ɔ
1552
+ ɕ
1553
+ ə
1554
+ ɛ
1555
+ ɜ
1556
+ ɡ
1557
+ ɣ
1558
+ ɪ
1559
+ ɫ
1560
+ ɴ
1561
+ ɹ
1562
+ ɾ
1563
+ ʃ
1564
+ ʊ
1565
+ ʌ
1566
+ ʒ
1567
+ ʔ
1568
+ ʰ
1569
+ ʷ
1570
+ ʻ
1571
+ ʾ
1572
+ ʿ
1573
+ ˈ
1574
+ ː
1575
+ ˙
1576
+ ˜
1577
+ ˢ
1578
+ ̀
1579
+ ́
1580
+ ̅
1581
+ ̉
1582
+ ̣
1583
+ Α
1584
+ Β
1585
+ Δ
1586
+ Ε
1587
+ Θ
1588
+ Κ
1589
+ Λ
1590
+ Μ
1591
+ Ξ
1592
+ Π
1593
+ Σ
1594
+ Τ
1595
+ Φ
1596
+ Χ
1597
+ Ψ
1598
+ Ω
1599
+ ά
1600
+ έ
1601
+ ή
1602
+ ί
1603
+ α
1604
+ β
1605
+ γ
1606
+ δ
1607
+ ε
1608
+ ζ
1609
+ η
1610
+ θ
1611
+ ι
1612
+ κ
1613
+ λ
1614
+ μ
1615
+ ν
1616
+ ξ
1617
+ ο
1618
+ π
1619
+ ρ
1620
+ ς
1621
+ σ
1622
+ τ
1623
+ υ
1624
+ φ
1625
+ χ
1626
+ ψ
1627
+ ω
1628
+ ϊ
1629
+ ό
1630
+ ύ
1631
+ ώ
1632
+ ϕ
1633
+ ϵ
1634
+ Ё
1635
+ А
1636
+ Б
1637
+ В
1638
+ Г
1639
+ Д
1640
+ Е
1641
+ Ж
1642
+ З
1643
+ И
1644
+ Й
1645
+ К
1646
+ Л
1647
+ М
1648
+ Н
1649
+ О
1650
+ П
1651
+ Р
1652
+ С
1653
+ Т
1654
+ У
1655
+ Ф
1656
+ Х
1657
+ Ц
1658
+ Ч
1659
+ Ш
1660
+ Щ
1661
+ Ы
1662
+ Ь
1663
+ Э
1664
+ Ю
1665
+ Я
1666
+ а
1667
+ б
1668
+ в
1669
+ г
1670
+ д
1671
+ е
1672
+ ж
1673
+ з
1674
+ и
1675
+ й
1676
+ к
1677
+ л
1678
+ м
1679
+ н
1680
+ о
1681
+ п
1682
+ р
1683
+ с
1684
+ т
1685
+ у
1686
+ ф
1687
+ х
1688
+ ц
1689
+ ч
1690
+ ш
1691
+ щ
1692
+ ъ
1693
+ ы
1694
+ ь
1695
+ э
1696
+ ю
1697
+ я
1698
+ ё
1699
+ і
1700
+ ְ
1701
+ ִ
1702
+ ֵ
1703
+ ֶ
1704
+ ַ
1705
+ ָ
1706
+ ֹ
1707
+ ּ
1708
+ ־
1709
+ ׁ
1710
+ א
1711
+ ב
1712
+ ג
1713
+ ד
1714
+ ה
1715
+ ו
1716
+ ז
1717
+ ח
1718
+ ט
1719
+ י
1720
+ כ
1721
+ ל
1722
+ ם
1723
+ מ
1724
+ ן
1725
+ נ
1726
+ ס
1727
+ ע
1728
+ פ
1729
+ ק
1730
+ ר
1731
+ ש
1732
+ ת
1733
+ أ
1734
+ ب
1735
+ ة
1736
+ ت
1737
+ ج
1738
+ ح
1739
+ د
1740
+ ر
1741
+ ز
1742
+ س
1743
+ ص
1744
+ ط
1745
+ ع
1746
+ ق
1747
+ ك
1748
+ ل
1749
+ م
1750
+ ن
1751
+ ه
1752
+ و
1753
+ ي
1754
+ َ
1755
+ ُ
1756
+ ِ
1757
+ ْ
1758
+
1759
+
1760
+
1761
+
1762
+
1763
+
1764
+
1765
+
1766
+
1767
+
1768
+
1769
+
1770
+
1771
+
1772
+
1773
+
1774
+
1775
+
1776
+
1777
+
1778
+
1779
+
1780
+
1781
+
1782
+
1783
+
1784
+
1785
+
1786
+
1787
+
1788
+
1789
+
1790
+
1791
+
1792
+
1793
+
1794
+
1795
+
1796
+
1797
+
1798
+
1799
+
1800
+
1801
+
1802
+
1803
+
1804
+
1805
+
1806
+
1807
+
1808
+
1809
+
1810
+
1811
+
1812
+ ế
1813
+
1814
+
1815
+
1816
+
1817
+
1818
+
1819
+
1820
+
1821
+
1822
+
1823
+
1824
+
1825
+
1826
+
1827
+
1828
+
1829
+
1830
+
1831
+
1832
+
1833
+
1834
+
1835
+
1836
+
1837
+
1838
+
1839
+
1840
+
1841
+
1842
+
1843
+
1844
+
1845
+
1846
+
1847
+
1848
+
1849
+
1850
+
1851
+
1852
+
1853
+
1854
+
1855
+
1856
+
1857
+
1858
+
1859
+
1860
+
1861
+
1862
+
1863
+
1864
+
1865
+
1866
+
1867
+
1868
+
1869
+
1870
+
1871
+
1872
+
1873
+
1874
+
1875
+
1876
+
1877
+
1878
+
1879
+
1880
+
1881
+
1882
+
1883
+
1884
+
1885
+
1886
+
1887
+
1888
+
1889
+
1890
+
1891
+
1892
+
1893
+
1894
+
1895
+
1896
+
1897
+
1898
+
1899
+
1900
+
1901
+
1902
+
1903
+
1904
+
1905
+
1906
+
1907
+
1908
+
1909
+
1910
+
1911
+
1912
+
1913
+
1914
+
1915
+
1916
+
1917
+
1918
+
1919
+
1920
+
1921
+
1922
+
1923
+
1924
+
1925
+
1926
+
1927
+
1928
+
1929
+
1930
+
1931
+
1932
+
1933
+
1934
+
1935
+
1936
+
1937
+
1938
+
1939
+
1940
+
1941
+
1942
+
1943
+
1944
+
1945
+
1946
+
1947
+
1948
+
1949
+
1950
+
1951
+
1952
+
1953
+
1954
+
1955
+
1956
+
1957
+
1958
+
1959
+
1960
+
1961
+
1962
+
1963
+
1964
+
1965
+
1966
+
1967
+
1968
+
1969
+
1970
+
1971
+
1972
+
1973
+
1974
+
1975
+
1976
+
1977
+
1978
+
1979
+
1980
+
1981
+
1982
+
1983
+
1984
+
1985
+
1986
+
1987
+
1988
+
1989
+
1990
+
1991
+
1992
+
1993
+
1994
+
1995
+
1996
+
1997
+
1998
+
1999
+
2000
+
2001
+
2002
+
2003
+
2004
+
2005
+
2006
+
2007
+
2008
+
2009
+
2010
+
2011
+
2012
+
2013
+
2014
+
2015
+
2016
+
2017
+
2018
+
2019
+
2020
+
2021
+
2022
+
2023
+
2024
+
2025
+
2026
+
2027
+
2028
+
2029
+
2030
+
2031
+
2032
+
2033
+
2034
+
2035
+
2036
+
2037
+
2038
+
2039
+
2040
+
2041
+
2042
+
2043
+
2044
+
2045
+
2046
+
2047
+
2048
+
2049
+
2050
+
2051
+
2052
+
2053
+
2054
+
2055
+
2056
+
2057
+
2058
+
2059
+
2060
+
2061
+
2062
+
2063
+
2064
+
2065
+
2066
+
2067
+
2068
+
2069
+
2070
+
2071
+
2072
+
2073
+
2074
+
2075
+
2076
+
2077
+
2078
+
2079
+
2080
+
2081
+
2082
+
2083
+
2084
+
2085
+
2086
+
2087
+
2088
+
2089
+
2090
+
2091
+
2092
+
2093
+
2094
+
2095
+
2096
+
2097
+
2098
+
2099
+
2100
+
2101
+
2102
+
2103
+
2104
+
2105
+
2106
+
2107
+
2108
+
2109
+
2110
+
2111
+
2112
+
2113
+
2114
+
2115
+
2116
+
2117
+
2118
+
2119
+
2120
+
2121
+
2122
+
2123
+
2124
+
2125
+
2126
+
2127
+
2128
+
2129
+
2130
+
2131
+
2132
+
2133
+
2134
+
2135
+
2136
+
2137
+
2138
+
2139
+
2140
+
2141
+
2142
+
2143
+
2144
+
2145
+
2146
+
2147
+
2148
+
2149
+
2150
+
2151
+
2152
+
2153
+
2154
+
2155
+
2156
+
2157
+
2158
+
2159
+
2160
+
2161
+
2162
+
2163
+
2164
+
2165
+
2166
+
2167
+
2168
+
2169
+
2170
+
2171
+
2172
+
2173
+
2174
+
2175
+
2176
+
2177
+
2178
+
2179
+
2180
+
2181
+
2182
+
2183
+
2184
+
2185
+
2186
+
2187
+
2188
+
2189
+
2190
+
2191
+
2192
+
2193
+
2194
+
2195
+
2196
+
2197
+
2198
+
2199
+
2200
+
2201
+
2202
+
2203
+
2204
+
2205
+
2206
+
2207
+
2208
+
2209
+
2210
+
2211
+
2212
+
2213
+
2214
+
2215
+
2216
+
2217
+
2218
+
2219
+
2220
+
2221
+
2222
+
2223
+
2224
+
2225
+
2226
+
2227
+
2228
+
2229
+
2230
+
2231
+
2232
+
2233
+
2234
+
2235
+
2236
+
2237
+
2238
+
2239
+
2240
+
2241
+
2242
+
2243
+
2244
+
2245
+
2246
+
2247
+
2248
+
2249
+
2250
+
2251
+
2252
+
2253
+
2254
+
2255
+
2256
+
2257
+
2258
+
2259
+
2260
+
2261
+
2262
+
2263
+
2264
+
2265
+
2266
+
2267
+
2268
+
2269
+
2270
+
2271
+
2272
+
2273
+
2274
+
2275
+
2276
+
2277
+
2278
+
2279
+
2280
+
2281
+
2282
+
2283
+
2284
+
2285
+
2286
+
2287
+
2288
+
2289
+
2290
+
2291
+
2292
+
2293
+
2294
+
2295
+
2296
+
2297
+
2298
+
2299
+
2300
+
2301
+
2302
+
2303
+
2304
+
2305
+
2306
+
2307
+
2308
+
2309
+
2310
+
2311
+
2312
+
2313
+
2314
+
2315
+
2316
+
2317
+
2318
+
2319
+
2320
+
2321
+
2322
+
2323
+
2324
+
2325
+
2326
+
2327
+
2328
+
2329
+
2330
+
2331
+
2332
+
2333
+
2334
+
2335
+
2336
+
2337
+
2338
+
2339
+
2340
+
2341
+
2342
+
2343
+
2344
+
2345
+
2346
+
2347
+
2348
+
2349
+
2350
+
2351
+
2352
+
2353
+
2354
+
2355
+
2356
+
2357
+
2358
+
2359
+
2360
+
2361
+
2362
+
2363
+
2364
+
2365
+
2366
+
2367
+
2368
+
2369
+
2370
+
2371
+
2372
+
2373
+
2374
+
2375
+
2376
+
2377
+
2378
+
2379
+
2380
+
2381
+
2382
+
2383
+
2384
+
2385
+
2386
+
2387
+
2388
+
2389
+
2390
+
2391
+
2392
+
2393
+
2394
+
2395
+
2396
+
2397
+
2398
+
2399
+
2400
+
2401
+
2402
+
2403
+
2404
+
2405
+
2406
+
2407
+
2408
+
2409
+
2410
+
2411
+
2412
+
2413
+
2414
+
2415
+
2416
+
2417
+
2418
+
2419
+
2420
+
2421
+
2422
+
2423
+
2424
+
2425
+
2426
+
2427
+
2428
+
2429
+
2430
+
2431
+
2432
+
2433
+
2434
+
2435
+
2436
+
2437
+
2438
+
2439
+
2440
+
2441
+
2442
+
2443
+
2444
+
2445
+
2446
+
2447
+
2448
+
2449
+
2450
+
2451
+
2452
+
2453
+
2454
+
2455
+
2456
+
2457
+
2458
+
2459
+
2460
+
2461
+
2462
+
2463
+
2464
+
2465
+
2466
+
2467
+
2468
+
2469
+
2470
+
2471
+
2472
+
2473
+
2474
+
2475
+
2476
+
2477
+
2478
+
2479
+
2480
+
2481
+
2482
+
2483
+
2484
+
2485
+
2486
+
2487
+
2488
+
2489
+
2490
+
2491
+
2492
+
2493
+
2494
+
2495
+
2496
+
2497
+
2498
+
2499
+
2500
+
2501
+
2502
+
2503
+
2504
+
2505
+
2506
+
2507
+
2508
+
2509
+
2510
+
2511
+
2512
+
2513
+
2514
+
2515
+
2516
+
2517
+
2518
+
2519
+
2520
+
2521
+
2522
+
2523
+
2524
+
2525
+
2526
+
2527
+
2528
+
2529
+
2530
+
2531
+
2532
+
2533
+
2534
+
2535
+
2536
+
2537
+
2538
+
2539
+
2540
+
2541
+
2542
+
2543
+
2544
+
2545
+
2546
+
2547
+
2548
+
2549
+
2550
+
2551
+
2552
+
2553
+
2554
+
2555
+
2556
+
2557
+
2558
+
2559
+
2560
+
2561
+
2562
+
2563
+
2564
+
2565
+
2566
+
2567
+
2568
+
2569
+
2570
+
2571
+ 𠮶
Test-F5/gradio_output/gen_20250510_001635.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc6bed861a1eec348edd7cb0e9641f48298171a89bf7420dada36c89286cc7ba
3
+ size 640556
Test-F5/gradio_output/gen_20250510_001804.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:743527a1b87425245c02ad1e1bfd9a9359b56cebe8bccb6fa62cb23ea8a19ef7
3
+ size 640556
Test-F5/gradio_output/gen_20250510_002157.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a70b8e5ae2a47a71804a7eaab12d4784b0497c7c5f04d8d57aee1ccc0f2be67
3
+ size 708140
Test-F5/ref.wav ADDED
Binary file (90.5 kB). View file
 
Test-F5/ref2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dd35d18bd991a3438ad705900f8cb9ffb653813866e3a86b2cea59042efc5ee
3
+ size 233942
Test-F5/src/f5_tts.egg-info/PKG-INFO ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: f5-tts
3
+ Version: 1.0.1
4
+ Summary: F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching
5
+ License: MIT License
6
+ Project-URL: Homepage, https://github.com/SWivid/F5-TTS
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Programming Language :: Python :: 3
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: accelerate>=0.33.0
13
+ Requires-Dist: bitsandbytes>0.37.0; platform_machine != "arm64" and platform_system != "Darwin"
14
+ Requires-Dist: cached_path
15
+ Requires-Dist: click
16
+ Requires-Dist: datasets
17
+ Requires-Dist: ema_pytorch>=0.5.2
18
+ Requires-Dist: gradio>=3.45.2
19
+ Requires-Dist: hydra-core>=1.3.0
20
+ Requires-Dist: jieba
21
+ Requires-Dist: librosa
22
+ Requires-Dist: matplotlib
23
+ Requires-Dist: numpy<=1.26.4
24
+ Requires-Dist: pydub
25
+ Requires-Dist: pypinyin
26
+ Requires-Dist: safetensors
27
+ Requires-Dist: soundfile
28
+ Requires-Dist: tomli
29
+ Requires-Dist: torch>=2.0.0
30
+ Requires-Dist: torchaudio>=2.0.0
31
+ Requires-Dist: torchdiffeq
32
+ Requires-Dist: tqdm>=4.65.0
33
+ Requires-Dist: transformers
34
+ Requires-Dist: transformers_stream_generator
35
+ Requires-Dist: vocos
36
+ Requires-Dist: wandb
37
+ Requires-Dist: x_transformers>=1.31.14
38
+ Provides-Extra: eval
39
+ Requires-Dist: faster_whisper==0.10.1; extra == "eval"
40
+ Requires-Dist: funasr; extra == "eval"
41
+ Requires-Dist: jiwer; extra == "eval"
42
+ Requires-Dist: modelscope; extra == "eval"
43
+ Requires-Dist: zhconv; extra == "eval"
44
+ Requires-Dist: zhon; extra == "eval"
45
+ Dynamic: license-file
46
+
47
+ # F5-TTS-Vietnamese
48
+ ![F5-TTS Architecture](tests/f5-tts.png)
49
+
50
+ A fine-tuning pipeline for training a Vietnamese speech synthesis model using the F5-TTS architecture.
51
+
52
+ Try demo at: https://huggingface.co/spaces/hynt/F5-TTS-Vietnamese-100h
53
+
54
+ ## Tips for training
55
+ - 100 hours of data is generally sufficient to train a Vietnamese Text-to-Speech model for specific voices. However, to achieve optimal performance in voice cloning across a wide range of speakers, a larger dataset is recommended. I fine-tuned an F5-TTS model on approximately 1000 hours of data, which resulted in excellent voice cloning performance.
56
+ - Having a large amount of speaker hours with highly accurate transcriptions is crucial — the more, the better. This helps the model generalize better to unseen speakers, resulting in lower WER after training and reducing hallucinations.
57
+
58
+ ## Tips for inference
59
+ - It is recommended to select sample audios that are clear and have minimal interruptions, and should be less than 10 seconds long, as this will improve the synthesis results.
60
+ - If the reference audio text is not provided, the default model used will be whisper-large-v3-turbo. Consequently, Vietnamese may not be accurately recognized in some cases, which can result in poor speech synthesis quality.
61
+ - In case you want to synthesize speech from a long text paragraph, it is recommended to replace the chunks function (located in **src/f5_tts/infer/utils_infer.py**) with the modified chunk_text function below:
62
+
63
+ ```bash
64
+ def chunk_text(text, max_chars=135):
65
+ sentences = [s.strip() for s in text.split('. ') if s.strip()]
66
+ i = 0
67
+ while i < len(sentences):
68
+ if len(sentences[i].split()) < 4:
69
+ if i == 0:
70
+ # Merge with the next sentence
71
+ sentences[i + 1] = sentences[i] + ', ' + sentences[i + 1]
72
+ del sentences[i]
73
+ else:
74
+ # Merge with the previous sentence
75
+ sentences[i - 1] = sentences[i - 1] + ', ' + sentences[i]
76
+ del sentences[i]
77
+ i -= 1
78
+ else:
79
+ i += 1
80
+
81
+ final_sentences = []
82
+ for sentence in sentences:
83
+ parts = [p.strip() for p in sentence.split(', ')]
84
+ buffer = []
85
+ for part in parts:
86
+ buffer.append(part)
87
+ total_words = sum(len(p.split()) for p in buffer)
88
+ if total_words > 20:
89
+ # Split into separate chunks
90
+ long_part = ', '.join(buffer)
91
+ final_sentences.append(long_part)
92
+ buffer = []
93
+ if buffer:
94
+ final_sentences.append(', '.join(buffer))
95
+
96
+ if len(final_sentences[-1].split()) < 4 and len(final_sentences) >= 2:
97
+ final_sentences[-2] = final_sentences[-2] + ", " + final_sentences[-1]
98
+ final_sentences = final_sentences[0:-1]
99
+
100
+ return final_sentences
101
+ ```
102
+
103
+ ## Installation
104
+
105
+ ### Create a separate environment if needed
106
+
107
+ ```bash
108
+ # Create a python 3.10 conda env (you could also use virtualenv)
109
+ conda create -n f5-tts python=3.10
110
+ conda activate f5-tts
111
+ ```
112
+
113
+ ### Install PyTorch
114
+
115
+ > ```bash
116
+ > # Install pytorch with your CUDA version, e.g.
117
+ > pip install torch==2.4.0+cu124 torchaudio==2.4.0+cu124 --extra-index-url https://download.pytorch.org/whl/cu124
118
+ > ```
119
+
120
+ ### Install f5-tts module:
121
+
122
+ > ```bash
123
+ > cd F5-TTS-Vietnamese
124
+ > pip install -e .
125
+ > ```
126
+
127
+ ### Install sox, ffmpeg
128
+
129
+ > ```bash
130
+ > sudo apt-get update
131
+ > sudo apt-get install sox ffmpeg
132
+ > ```
133
+
134
+ ## Fine-tuning pipline
135
+
136
+ Steps:
137
+
138
+ - Prepare `audio_name` and corresponding transcriptions
139
+ - Add missing vocabulary from your dataset to the pretrained model
140
+ - Expand the model's embedding to support the updated vocabulary
141
+ - Perform feature extraction
142
+ - Fine-tune the model
143
+
144
+ ```bash
145
+ bash fine_tuning.sh
146
+ ```
147
+
148
+ ### Inference
149
+
150
+ ```bash
151
+ bash infer.sh
152
+ ```
153
+
154
+ ### References
155
+
156
+ - Original F5-TTS repository: [https://github.com/SWivid/F5-TTS](https://github.com/SWivid/F5-TTS)
Test-F5/src/f5_tts.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .gitignore
2
+ .gitmodules
3
+ Dockerfile
4
+ LICENSE
5
+ README.md
6
+ check_vocab_pretrained.py
7
+ convert_sr.py
8
+ extend_embedding_pretrained.py
9
+ fine_tuning.sh
10
+ infer.sh
11
+ prepare_metadata.py
12
+ pyproject.toml
13
+ ref.wav
14
+ ref2.wav
15
+ data/Emilia_ZH_EN_pinyin/vocab.txt
16
+ src/f5_tts/api.py
17
+ src/f5_tts/socket_client.py
18
+ src/f5_tts/socket_server.py
19
+ src/f5_tts.egg-info/PKG-INFO
20
+ src/f5_tts.egg-info/SOURCES.txt
21
+ src/f5_tts.egg-info/dependency_links.txt
22
+ src/f5_tts.egg-info/entry_points.txt
23
+ src/f5_tts.egg-info/requires.txt
24
+ src/f5_tts.egg-info/top_level.txt
25
+ src/f5_tts/__pycache__/api.cpython-310.pyc
26
+ src/f5_tts/configs/E2TTS_Base.yaml
27
+ src/f5_tts/configs/E2TTS_Small.yaml
28
+ src/f5_tts/configs/F5TTS_Base.yaml
29
+ src/f5_tts/configs/F5TTS_Small.yaml
30
+ src/f5_tts/configs/F5TTS_v1_Base.yaml
31
+ src/f5_tts/eval/README.md
32
+ src/f5_tts/eval/ecapa_tdnn.py
33
+ src/f5_tts/eval/eval_infer_batch.py
34
+ src/f5_tts/eval/eval_infer_batch.sh
35
+ src/f5_tts/eval/eval_librispeech_test_clean.py
36
+ src/f5_tts/eval/eval_seedtts_testset.py
37
+ src/f5_tts/eval/eval_utmos.py
38
+ src/f5_tts/eval/utils_eval.py
39
+ src/f5_tts/infer/README.md
40
+ src/f5_tts/infer/SHARED.md
41
+ src/f5_tts/infer/infer_cli.py
42
+ src/f5_tts/infer/infer_gradio.py
43
+ src/f5_tts/infer/speech_edit.py
44
+ src/f5_tts/infer/utils_infer.py
45
+ src/f5_tts/infer/__pycache__/infer_cli.cpython-310.pyc
46
+ src/f5_tts/infer/__pycache__/utils_infer.cpython-310.pyc
47
+ src/f5_tts/infer/examples/vocab.txt
48
+ src/f5_tts/infer/examples/basic/basic.toml
49
+ src/f5_tts/infer/examples/basic/basic_ref_en.wav
50
+ src/f5_tts/infer/examples/basic/basic_ref_zh.wav
51
+ src/f5_tts/infer/examples/multi/country.flac
52
+ src/f5_tts/infer/examples/multi/main.flac
53
+ src/f5_tts/infer/examples/multi/story.toml
54
+ src/f5_tts/infer/examples/multi/story.txt
55
+ src/f5_tts/infer/examples/multi/town.flac
56
+ src/f5_tts/model/__init__.py
57
+ src/f5_tts/model/cfm.py
58
+ src/f5_tts/model/dataset.py
59
+ src/f5_tts/model/modules.py
60
+ src/f5_tts/model/trainer.py
61
+ src/f5_tts/model/utils.py
62
+ src/f5_tts/model/__pycache__/__init__.cpython-310.pyc
63
+ src/f5_tts/model/__pycache__/cfm.cpython-310.pyc
64
+ src/f5_tts/model/__pycache__/dataset.cpython-310.pyc
65
+ src/f5_tts/model/__pycache__/modules.cpython-310.pyc
66
+ src/f5_tts/model/__pycache__/trainer.cpython-310.pyc
67
+ src/f5_tts/model/__pycache__/utils.cpython-310.pyc
68
+ src/f5_tts/model/backbones/README.md
69
+ src/f5_tts/model/backbones/dit.py
70
+ src/f5_tts/model/backbones/mmdit.py
71
+ src/f5_tts/model/backbones/unett.py
72
+ src/f5_tts/model/backbones/__pycache__/dit.cpython-310.pyc
73
+ src/f5_tts/model/backbones/__pycache__/mmdit.cpython-310.pyc
74
+ src/f5_tts/model/backbones/__pycache__/unett.cpython-310.pyc
75
+ src/f5_tts/scripts/count_max_epoch.py
76
+ src/f5_tts/scripts/count_params_gflops.py
77
+ src/f5_tts/train/README.md
78
+ src/f5_tts/train/finetune_cli.py
79
+ src/f5_tts/train/finetune_gradio.py
80
+ src/f5_tts/train/train.py
81
+ src/f5_tts/train/__pycache__/finetune_gradio.cpython-310.pyc
82
+ src/f5_tts/train/datasets/prepare_csv_wavs.py
83
+ src/f5_tts/train/datasets/prepare_emilia.py
84
+ src/f5_tts/train/datasets/prepare_libritts.py
85
+ src/f5_tts/train/datasets/prepare_ljspeech.py
86
+ src/f5_tts/train/datasets/prepare_wenetspeech4tts.py
87
+ tests/f5-tts.png
88
+ tests/infer_cli_basic.wav
Test-F5/src/f5_tts.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
Test-F5/src/f5_tts.egg-info/entry_points.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [console_scripts]
2
+ f5-tts_finetune-cli = f5_tts.train.finetune_cli:main
3
+ f5-tts_finetune-gradio = f5_tts.train.finetune_gradio:main
4
+ f5-tts_infer-cli = f5_tts.infer.infer_cli:main
5
+ f5-tts_infer-gradio = f5_tts.infer.infer_gradio:main
Test-F5/src/f5_tts.egg-info/requires.txt ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate>=0.33.0
2
+ cached_path
3
+ click
4
+ datasets
5
+ ema_pytorch>=0.5.2
6
+ gradio>=3.45.2
7
+ hydra-core>=1.3.0
8
+ jieba
9
+ librosa
10
+ matplotlib
11
+ numpy<=1.26.4
12
+ pydub
13
+ pypinyin
14
+ safetensors
15
+ soundfile
16
+ tomli
17
+ torch>=2.0.0
18
+ torchaudio>=2.0.0
19
+ torchdiffeq
20
+ tqdm>=4.65.0
21
+ transformers
22
+ transformers_stream_generator
23
+ vocos
24
+ wandb
25
+ x_transformers>=1.31.14
26
+
27
+ [:platform_machine != "arm64" and platform_system != "Darwin"]
28
+ bitsandbytes>0.37.0
29
+
30
+ [eval]
31
+ faster_whisper==0.10.1
32
+ funasr
33
+ jiwer
34
+ modelscope
35
+ zhconv
36
+ zhon
Test-F5/src/f5_tts.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ f5_tts
Test-F5/src/f5_tts/__pycache__/api.cpython-310.pyc ADDED
Binary file (3.97 kB). View file
 
Test-F5/src/f5_tts/api.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import sys
3
+ from importlib.resources import files
4
+
5
+ import soundfile as sf
6
+ import tqdm
7
+ from cached_path import cached_path
8
+ from omegaconf import OmegaConf
9
+
10
+ from f5_tts.infer.utils_infer import (
11
+ load_model,
12
+ load_vocoder,
13
+ transcribe,
14
+ preprocess_ref_audio_text,
15
+ infer_process,
16
+ remove_silence_for_generated_wav,
17
+ save_spectrogram,
18
+ )
19
+ from f5_tts.model import DiT, UNetT # noqa: F401. used for config
20
+ from f5_tts.model.utils import seed_everything
21
+
22
+
23
+ class F5TTS:
24
+ def __init__(
25
+ self,
26
+ model="F5TTS_v1_Base",
27
+ ckpt_file="",
28
+ vocab_file="",
29
+ ode_method="euler",
30
+ use_ema=True,
31
+ vocoder_local_path=None,
32
+ device=None,
33
+ hf_cache_dir=None,
34
+ ):
35
+ model_cfg = OmegaConf.load(str(files("f5_tts").joinpath(f"configs/{model}.yaml")))
36
+ model_cls = globals()[model_cfg.model.backbone]
37
+ model_arc = model_cfg.model.arch
38
+
39
+ self.mel_spec_type = model_cfg.model.mel_spec.mel_spec_type
40
+ self.target_sample_rate = model_cfg.model.mel_spec.target_sample_rate
41
+
42
+ self.ode_method = ode_method
43
+ self.use_ema = use_ema
44
+
45
+ if device is not None:
46
+ self.device = device
47
+ else:
48
+ import torch
49
+
50
+ self.device = (
51
+ "cuda"
52
+ if torch.cuda.is_available()
53
+ else "xpu"
54
+ if torch.xpu.is_available()
55
+ else "mps"
56
+ if torch.backends.mps.is_available()
57
+ else "cpu"
58
+ )
59
+
60
+ # Load models
61
+ self.vocoder = load_vocoder(
62
+ self.mel_spec_type, vocoder_local_path is not None, vocoder_local_path, self.device, hf_cache_dir
63
+ )
64
+
65
+ repo_name, ckpt_step, ckpt_type = "F5-TTS", 1250000, "safetensors"
66
+
67
+ # override for previous models
68
+ if model == "F5TTS_Base":
69
+ if self.mel_spec_type == "vocos":
70
+ ckpt_step = 1200000
71
+ elif self.mel_spec_type == "bigvgan":
72
+ model = "F5TTS_Base_bigvgan"
73
+ ckpt_type = "pt"
74
+ elif model == "E2TTS_Base":
75
+ repo_name = "E2-TTS"
76
+ ckpt_step = 1200000
77
+ else:
78
+ raise ValueError(f"Unknown model type: {model}")
79
+
80
+ if not ckpt_file:
81
+ ckpt_file = str(
82
+ cached_path(f"hf://SWivid/{repo_name}/{model}/model_{ckpt_step}.{ckpt_type}", cache_dir=hf_cache_dir)
83
+ )
84
+ self.ema_model = load_model(
85
+ model_cls, model_arc, ckpt_file, self.mel_spec_type, vocab_file, self.ode_method, self.use_ema, self.device
86
+ )
87
+
88
+ def transcribe(self, ref_audio, language=None):
89
+ return transcribe(ref_audio, language)
90
+
91
+ def export_wav(self, wav, file_wave, remove_silence=False):
92
+ sf.write(file_wave, wav, self.target_sample_rate)
93
+
94
+ if remove_silence:
95
+ remove_silence_for_generated_wav(file_wave)
96
+
97
+ def export_spectrogram(self, spec, file_spec):
98
+ save_spectrogram(spec, file_spec)
99
+
100
+ def infer(
101
+ self,
102
+ ref_file,
103
+ ref_text,
104
+ gen_text,
105
+ show_info=print,
106
+ progress=tqdm,
107
+ target_rms=0.1,
108
+ cross_fade_duration=0.15,
109
+ sway_sampling_coef=-1,
110
+ cfg_strength=2,
111
+ nfe_step=32,
112
+ speed=1.0,
113
+ fix_duration=None,
114
+ remove_silence=False,
115
+ file_wave=None,
116
+ file_spec=None,
117
+ seed=None,
118
+ ):
119
+ if seed is None:
120
+ self.seed = random.randint(0, sys.maxsize)
121
+ seed_everything(self.seed)
122
+
123
+ ref_file, ref_text = preprocess_ref_audio_text(ref_file, ref_text, device=self.device)
124
+
125
+ wav, sr, spec = infer_process(
126
+ ref_file,
127
+ ref_text,
128
+ gen_text,
129
+ self.ema_model,
130
+ self.vocoder,
131
+ self.mel_spec_type,
132
+ show_info=show_info,
133
+ progress=progress,
134
+ target_rms=target_rms,
135
+ cross_fade_duration=cross_fade_duration,
136
+ nfe_step=nfe_step,
137
+ cfg_strength=cfg_strength,
138
+ sway_sampling_coef=sway_sampling_coef,
139
+ speed=speed,
140
+ fix_duration=fix_duration,
141
+ device=self.device,
142
+ )
143
+
144
+ if file_wave is not None:
145
+ self.export_wav(wav, file_wave, remove_silence)
146
+
147
+ if file_spec is not None:
148
+ self.export_spectrogram(spec, file_spec)
149
+
150
+ return wav, sr, spec
151
+
152
+
153
+ if __name__ == "__main__":
154
+ f5tts = F5TTS()
155
+
156
+ wav, sr, spec = f5tts.infer(
157
+ ref_file=str(files("f5_tts").joinpath("infer/examples/basic/basic_ref_en.wav")),
158
+ ref_text="some call me nature, others call me mother nature.",
159
+ gen_text="""I don't really care what you call me. I've been a silent spectator, watching species evolve, empires rise and fall. But always remember, I am mighty and enduring. Respect me and I'll nurture you; ignore me and you shall face the consequences.""",
160
+ file_wave=str(files("f5_tts").joinpath("../../tests/api_out.wav")),
161
+ file_spec=str(files("f5_tts").joinpath("../../tests/api_out.png")),
162
+ seed=None,
163
+ )
164
+
165
+ print("seed :", f5tts.seed)
Test-F5/src/f5_tts/configs/E2TTS_Base.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
4
+
5
+ datasets:
6
+ name: Emilia_ZH_EN # dataset name
7
+ batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
8
+ batch_size_type: frame # frame | sample
9
+ max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
10
+ num_workers: 16
11
+
12
+ optim:
13
+ epochs: 11
14
+ learning_rate: 7.5e-5
15
+ num_warmup_updates: 20000 # warmup updates
16
+ grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
17
+ max_grad_norm: 1.0 # gradient clipping
18
+ bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
19
+
20
+ model:
21
+ name: E2TTS_Base
22
+ tokenizer: pinyin
23
+ tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
24
+ backbone: UNetT
25
+ arch:
26
+ dim: 1024
27
+ depth: 24
28
+ heads: 16
29
+ ff_mult: 4
30
+ text_mask_padding: False
31
+ pe_attn_head: 1
32
+ mel_spec:
33
+ target_sample_rate: 24000
34
+ n_mel_channels: 100
35
+ hop_length: 256
36
+ win_length: 1024
37
+ n_fft: 1024
38
+ mel_spec_type: vocos # vocos | bigvgan
39
+ vocoder:
40
+ is_local: False # use local offline ckpt or not
41
+ local_path: null # local vocoder path
42
+
43
+ ckpts:
44
+ logger: wandb # wandb | tensorboard | null
45
+ log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
46
+ save_per_updates: 50000 # save checkpoint per updates
47
+ keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
48
+ last_per_updates: 5000 # save last checkpoint per updates
49
+ save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
Test-F5/src/f5_tts/configs/E2TTS_Small.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
4
+
5
+ datasets:
6
+ name: Emilia_ZH_EN
7
+ batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
8
+ batch_size_type: frame # frame | sample
9
+ max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
10
+ num_workers: 16
11
+
12
+ optim:
13
+ epochs: 11
14
+ learning_rate: 7.5e-5
15
+ num_warmup_updates: 20000 # warmup updates
16
+ grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
17
+ max_grad_norm: 1.0
18
+ bnb_optimizer: False
19
+
20
+ model:
21
+ name: E2TTS_Small
22
+ tokenizer: pinyin
23
+ tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
24
+ backbone: UNetT
25
+ arch:
26
+ dim: 768
27
+ depth: 20
28
+ heads: 12
29
+ ff_mult: 4
30
+ text_mask_padding: False
31
+ pe_attn_head: 1
32
+ mel_spec:
33
+ target_sample_rate: 24000
34
+ n_mel_channels: 100
35
+ hop_length: 256
36
+ win_length: 1024
37
+ n_fft: 1024
38
+ mel_spec_type: vocos # vocos | bigvgan
39
+ vocoder:
40
+ is_local: False # use local offline ckpt or not
41
+ local_path: null # local vocoder path
42
+
43
+ ckpts:
44
+ logger: wandb # wandb | tensorboard | null
45
+ log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
46
+ save_per_updates: 50000 # save checkpoint per updates
47
+ keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
48
+ last_per_updates: 5000 # save last checkpoint per updates
49
+ save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
Test-F5/src/f5_tts/configs/F5TTS_Base.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
4
+
5
+ datasets:
6
+ name: your_training_dataset # dataset name
7
+ batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
8
+ batch_size_type: frame # frame | sample
9
+ max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
10
+ num_workers: 16
11
+
12
+ optim:
13
+ epochs: 11
14
+ learning_rate: 7.5e-5
15
+ num_warmup_updates: 20000 # warmup updates
16
+ grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
17
+ max_grad_norm: 1.0 # gradient clipping
18
+ bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
19
+
20
+ model:
21
+ name: F5TTS_Base # model name
22
+ tokenizer: char # tokenizer type
23
+ tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
24
+ backbone: DiT
25
+ arch:
26
+ dim: 1024
27
+ depth: 22
28
+ heads: 16
29
+ ff_mult: 2
30
+ text_dim: 512
31
+ text_mask_padding: False
32
+ conv_layers: 4
33
+ pe_attn_head: 1
34
+ checkpoint_activations: False # recompute activations and save memory for extra compute
35
+ mel_spec:
36
+ target_sample_rate: 24000
37
+ n_mel_channels: 100
38
+ hop_length: 256
39
+ win_length: 1024
40
+ n_fft: 1024
41
+ mel_spec_type: vocos # vocos | bigvgan
42
+ vocoder:
43
+ is_local: False # use local offline ckpt or not
44
+ local_path: null # local vocoder path
45
+
46
+ ckpts:
47
+ logger: tensorboard # wandb | tensorboard | null
48
+ log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
49
+ save_per_updates: 50000 # save checkpoint per updates
50
+ keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
51
+ last_per_updates: 5000 # save last checkpoint per updates
52
+ save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
Test-F5/src/f5_tts/configs/F5TTS_Small.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
4
+
5
+ datasets:
6
+ name: Emilia_ZH_EN
7
+ batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
8
+ batch_size_type: frame # frame | sample
9
+ max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
10
+ num_workers: 16
11
+
12
+ optim:
13
+ epochs: 11
14
+ learning_rate: 7.5e-5
15
+ num_warmup_updates: 20000 # warmup updates
16
+ grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
17
+ max_grad_norm: 1.0 # gradient clipping
18
+ bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
19
+
20
+ model:
21
+ name: F5TTS_Small
22
+ tokenizer: pinyin
23
+ tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
24
+ backbone: DiT
25
+ arch:
26
+ dim: 768
27
+ depth: 18
28
+ heads: 12
29
+ ff_mult: 2
30
+ text_dim: 512
31
+ text_mask_padding: False
32
+ conv_layers: 4
33
+ pe_attn_head: 1
34
+ checkpoint_activations: False # recompute activations and save memory for extra compute
35
+ mel_spec:
36
+ target_sample_rate: 24000
37
+ n_mel_channels: 100
38
+ hop_length: 256
39
+ win_length: 1024
40
+ n_fft: 1024
41
+ mel_spec_type: vocos # vocos | bigvgan
42
+ vocoder:
43
+ is_local: False # use local offline ckpt or not
44
+ local_path: null # local vocoder path
45
+
46
+ ckpts:
47
+ logger: wandb # wandb | tensorboard | null
48
+ log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
49
+ save_per_updates: 50000 # save checkpoint per updates
50
+ keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
51
+ last_per_updates: 5000 # save last checkpoint per updates
52
+ save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
Test-F5/src/f5_tts/configs/F5TTS_v1_Base.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
4
+
5
+ datasets:
6
+ name: Emilia_ZH_EN # dataset name
7
+ batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
8
+ batch_size_type: frame # frame | sample
9
+ max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
10
+ num_workers: 16
11
+
12
+ optim:
13
+ epochs: 11
14
+ learning_rate: 7.5e-5
15
+ num_warmup_updates: 20000 # warmup updates
16
+ grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
17
+ max_grad_norm: 1.0 # gradient clipping
18
+ bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
19
+
20
+ model:
21
+ name: F5TTS_v1_Base # model name
22
+ tokenizer: pinyin # tokenizer type
23
+ tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
24
+ backbone: DiT
25
+ arch:
26
+ dim: 1024
27
+ depth: 22
28
+ heads: 16
29
+ ff_mult: 2
30
+ text_dim: 512
31
+ text_mask_padding: True
32
+ qk_norm: null # null | rms_norm
33
+ conv_layers: 4
34
+ pe_attn_head: null
35
+ checkpoint_activations: False # recompute activations and save memory for extra compute
36
+ mel_spec:
37
+ target_sample_rate: 24000
38
+ n_mel_channels: 100
39
+ hop_length: 256
40
+ win_length: 1024
41
+ n_fft: 1024
42
+ mel_spec_type: vocos # vocos | bigvgan
43
+ vocoder:
44
+ is_local: False # use local offline ckpt or not
45
+ local_path: null # local vocoder path
46
+
47
+ ckpts:
48
+ logger: wandb # wandb | tensorboard | null
49
+ log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
50
+ save_per_updates: 50000 # save checkpoint per updates
51
+ keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
52
+ last_per_updates: 5000 # save last checkpoint per updates
53
+ save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
Test-F5/src/f5_tts/eval/README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Evaluation
3
+
4
+ Install packages for evaluation:
5
+
6
+ ```bash
7
+ pip install -e .[eval]
8
+ ```
9
+
10
+ ## Generating Samples for Evaluation
11
+
12
+ ### Prepare Test Datasets
13
+
14
+ 1. *Seed-TTS testset*: Download from [seed-tts-eval](https://github.com/BytedanceSpeech/seed-tts-eval).
15
+ 2. *LibriSpeech test-clean*: Download from [OpenSLR](http://www.openslr.org/12/).
16
+ 3. Unzip the downloaded datasets and place them in the `data/` directory.
17
+ 4. Update the path for *LibriSpeech test-clean* data in `src/f5_tts/eval/eval_infer_batch.py`
18
+ 5. Our filtered LibriSpeech-PC 4-10s subset: `data/librispeech_pc_test_clean_cross_sentence.lst`
19
+
20
+ ### Batch Inference for Test Set
21
+
22
+ To run batch inference for evaluations, execute the following commands:
23
+
24
+ ```bash
25
+ # batch inference for evaluations
26
+ accelerate config # if not set before
27
+ bash src/f5_tts/eval/eval_infer_batch.sh
28
+ ```
29
+
30
+ ## Objective Evaluation on Generated Results
31
+
32
+ ### Download Evaluation Model Checkpoints
33
+
34
+ 1. Chinese ASR Model: [Paraformer-zh](https://huggingface.co/funasr/paraformer-zh)
35
+ 2. English ASR Model: [Faster-Whisper](https://huggingface.co/Systran/faster-whisper-large-v3)
36
+ 3. WavLM Model: Download from [Google Drive](https://drive.google.com/file/d/1-aE1NfzpRCLxA4GUxX9ITI3F9LlbtEGP/view).
37
+
38
+ Then update in the following scripts with the paths you put evaluation model ckpts to.
39
+
40
+ ### Objective Evaluation
41
+
42
+ Update the path with your batch-inferenced results, and carry out WER / SIM / UTMOS evaluations:
43
+ ```bash
44
+ # Evaluation [WER] for Seed-TTS test [ZH] set
45
+ python src/f5_tts/eval/eval_seedtts_testset.py --eval_task wer --lang zh --gen_wav_dir <GEN_WAV_DIR> --gpu_nums 8
46
+
47
+ # Evaluation [SIM] for LibriSpeech-PC test-clean (cross-sentence)
48
+ python src/f5_tts/eval/eval_librispeech_test_clean.py --eval_task sim --gen_wav_dir <GEN_WAV_DIR> --librispeech_test_clean_path <TEST_CLEAN_PATH>
49
+
50
+ # Evaluation [UTMOS]. --ext: Audio extension
51
+ python src/f5_tts/eval/eval_utmos.py --audio_dir <WAV_DIR> --ext wav
52
+ ```
Test-F5/src/f5_tts/eval/ecapa_tdnn.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # just for speaker similarity evaluation, third-party code
2
+
3
+ # From https://github.com/microsoft/UniSpeech/blob/main/downstreams/speaker_verification/models/
4
+ # part of the code is borrowed from https://github.com/lawlict/ECAPA-TDNN
5
+
6
+ import os
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+
11
+
12
+ """ Res2Conv1d + BatchNorm1d + ReLU
13
+ """
14
+
15
+
16
+ class Res2Conv1dReluBn(nn.Module):
17
+ """
18
+ in_channels == out_channels == channels
19
+ """
20
+
21
+ def __init__(self, channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=True, scale=4):
22
+ super().__init__()
23
+ assert channels % scale == 0, "{} % {} != 0".format(channels, scale)
24
+ self.scale = scale
25
+ self.width = channels // scale
26
+ self.nums = scale if scale == 1 else scale - 1
27
+
28
+ self.convs = []
29
+ self.bns = []
30
+ for i in range(self.nums):
31
+ self.convs.append(nn.Conv1d(self.width, self.width, kernel_size, stride, padding, dilation, bias=bias))
32
+ self.bns.append(nn.BatchNorm1d(self.width))
33
+ self.convs = nn.ModuleList(self.convs)
34
+ self.bns = nn.ModuleList(self.bns)
35
+
36
+ def forward(self, x):
37
+ out = []
38
+ spx = torch.split(x, self.width, 1)
39
+ for i in range(self.nums):
40
+ if i == 0:
41
+ sp = spx[i]
42
+ else:
43
+ sp = sp + spx[i]
44
+ # Order: conv -> relu -> bn
45
+ sp = self.convs[i](sp)
46
+ sp = self.bns[i](F.relu(sp))
47
+ out.append(sp)
48
+ if self.scale != 1:
49
+ out.append(spx[self.nums])
50
+ out = torch.cat(out, dim=1)
51
+
52
+ return out
53
+
54
+
55
+ """ Conv1d + BatchNorm1d + ReLU
56
+ """
57
+
58
+
59
+ class Conv1dReluBn(nn.Module):
60
+ def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=True):
61
+ super().__init__()
62
+ self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)
63
+ self.bn = nn.BatchNorm1d(out_channels)
64
+
65
+ def forward(self, x):
66
+ return self.bn(F.relu(self.conv(x)))
67
+
68
+
69
+ """ The SE connection of 1D case.
70
+ """
71
+
72
+
73
+ class SE_Connect(nn.Module):
74
+ def __init__(self, channels, se_bottleneck_dim=128):
75
+ super().__init__()
76
+ self.linear1 = nn.Linear(channels, se_bottleneck_dim)
77
+ self.linear2 = nn.Linear(se_bottleneck_dim, channels)
78
+
79
+ def forward(self, x):
80
+ out = x.mean(dim=2)
81
+ out = F.relu(self.linear1(out))
82
+ out = torch.sigmoid(self.linear2(out))
83
+ out = x * out.unsqueeze(2)
84
+
85
+ return out
86
+
87
+
88
+ """ SE-Res2Block of the ECAPA-TDNN architecture.
89
+ """
90
+
91
+ # def SE_Res2Block(channels, kernel_size, stride, padding, dilation, scale):
92
+ # return nn.Sequential(
93
+ # Conv1dReluBn(channels, 512, kernel_size=1, stride=1, padding=0),
94
+ # Res2Conv1dReluBn(512, kernel_size, stride, padding, dilation, scale=scale),
95
+ # Conv1dReluBn(512, channels, kernel_size=1, stride=1, padding=0),
96
+ # SE_Connect(channels)
97
+ # )
98
+
99
+
100
+ class SE_Res2Block(nn.Module):
101
+ def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, scale, se_bottleneck_dim):
102
+ super().__init__()
103
+ self.Conv1dReluBn1 = Conv1dReluBn(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
104
+ self.Res2Conv1dReluBn = Res2Conv1dReluBn(out_channels, kernel_size, stride, padding, dilation, scale=scale)
105
+ self.Conv1dReluBn2 = Conv1dReluBn(out_channels, out_channels, kernel_size=1, stride=1, padding=0)
106
+ self.SE_Connect = SE_Connect(out_channels, se_bottleneck_dim)
107
+
108
+ self.shortcut = None
109
+ if in_channels != out_channels:
110
+ self.shortcut = nn.Conv1d(
111
+ in_channels=in_channels,
112
+ out_channels=out_channels,
113
+ kernel_size=1,
114
+ )
115
+
116
+ def forward(self, x):
117
+ residual = x
118
+ if self.shortcut:
119
+ residual = self.shortcut(x)
120
+
121
+ x = self.Conv1dReluBn1(x)
122
+ x = self.Res2Conv1dReluBn(x)
123
+ x = self.Conv1dReluBn2(x)
124
+ x = self.SE_Connect(x)
125
+
126
+ return x + residual
127
+
128
+
129
+ """ Attentive weighted mean and standard deviation pooling.
130
+ """
131
+
132
+
133
+ class AttentiveStatsPool(nn.Module):
134
+ def __init__(self, in_dim, attention_channels=128, global_context_att=False):
135
+ super().__init__()
136
+ self.global_context_att = global_context_att
137
+
138
+ # Use Conv1d with stride == 1 rather than Linear, then we don't need to transpose inputs.
139
+ if global_context_att:
140
+ self.linear1 = nn.Conv1d(in_dim * 3, attention_channels, kernel_size=1) # equals W and b in the paper
141
+ else:
142
+ self.linear1 = nn.Conv1d(in_dim, attention_channels, kernel_size=1) # equals W and b in the paper
143
+ self.linear2 = nn.Conv1d(attention_channels, in_dim, kernel_size=1) # equals V and k in the paper
144
+
145
+ def forward(self, x):
146
+ if self.global_context_att:
147
+ context_mean = torch.mean(x, dim=-1, keepdim=True).expand_as(x)
148
+ context_std = torch.sqrt(torch.var(x, dim=-1, keepdim=True) + 1e-10).expand_as(x)
149
+ x_in = torch.cat((x, context_mean, context_std), dim=1)
150
+ else:
151
+ x_in = x
152
+
153
+ # DON'T use ReLU here! In experiments, I find ReLU hard to converge.
154
+ alpha = torch.tanh(self.linear1(x_in))
155
+ # alpha = F.relu(self.linear1(x_in))
156
+ alpha = torch.softmax(self.linear2(alpha), dim=2)
157
+ mean = torch.sum(alpha * x, dim=2)
158
+ residuals = torch.sum(alpha * (x**2), dim=2) - mean**2
159
+ std = torch.sqrt(residuals.clamp(min=1e-9))
160
+ return torch.cat([mean, std], dim=1)
161
+
162
+
163
+ class ECAPA_TDNN(nn.Module):
164
+ def __init__(
165
+ self,
166
+ feat_dim=80,
167
+ channels=512,
168
+ emb_dim=192,
169
+ global_context_att=False,
170
+ feat_type="wavlm_large",
171
+ sr=16000,
172
+ feature_selection="hidden_states",
173
+ update_extract=False,
174
+ config_path=None,
175
+ ):
176
+ super().__init__()
177
+
178
+ self.feat_type = feat_type
179
+ self.feature_selection = feature_selection
180
+ self.update_extract = update_extract
181
+ self.sr = sr
182
+
183
+ torch.hub._validate_not_a_forked_repo = lambda a, b, c: True
184
+ try:
185
+ local_s3prl_path = os.path.expanduser("~/.cache/torch/hub/s3prl_s3prl_main")
186
+ self.feature_extract = torch.hub.load(local_s3prl_path, feat_type, source="local", config_path=config_path)
187
+ except: # noqa: E722
188
+ self.feature_extract = torch.hub.load("s3prl/s3prl", feat_type)
189
+
190
+ if len(self.feature_extract.model.encoder.layers) == 24 and hasattr(
191
+ self.feature_extract.model.encoder.layers[23].self_attn, "fp32_attention"
192
+ ):
193
+ self.feature_extract.model.encoder.layers[23].self_attn.fp32_attention = False
194
+ if len(self.feature_extract.model.encoder.layers) == 24 and hasattr(
195
+ self.feature_extract.model.encoder.layers[11].self_attn, "fp32_attention"
196
+ ):
197
+ self.feature_extract.model.encoder.layers[11].self_attn.fp32_attention = False
198
+
199
+ self.feat_num = self.get_feat_num()
200
+ self.feature_weight = nn.Parameter(torch.zeros(self.feat_num))
201
+
202
+ if feat_type != "fbank" and feat_type != "mfcc":
203
+ freeze_list = ["final_proj", "label_embs_concat", "mask_emb", "project_q", "quantizer"]
204
+ for name, param in self.feature_extract.named_parameters():
205
+ for freeze_val in freeze_list:
206
+ if freeze_val in name:
207
+ param.requires_grad = False
208
+ break
209
+
210
+ if not self.update_extract:
211
+ for param in self.feature_extract.parameters():
212
+ param.requires_grad = False
213
+
214
+ self.instance_norm = nn.InstanceNorm1d(feat_dim)
215
+ # self.channels = [channels] * 4 + [channels * 3]
216
+ self.channels = [channels] * 4 + [1536]
217
+
218
+ self.layer1 = Conv1dReluBn(feat_dim, self.channels[0], kernel_size=5, padding=2)
219
+ self.layer2 = SE_Res2Block(
220
+ self.channels[0],
221
+ self.channels[1],
222
+ kernel_size=3,
223
+ stride=1,
224
+ padding=2,
225
+ dilation=2,
226
+ scale=8,
227
+ se_bottleneck_dim=128,
228
+ )
229
+ self.layer3 = SE_Res2Block(
230
+ self.channels[1],
231
+ self.channels[2],
232
+ kernel_size=3,
233
+ stride=1,
234
+ padding=3,
235
+ dilation=3,
236
+ scale=8,
237
+ se_bottleneck_dim=128,
238
+ )
239
+ self.layer4 = SE_Res2Block(
240
+ self.channels[2],
241
+ self.channels[3],
242
+ kernel_size=3,
243
+ stride=1,
244
+ padding=4,
245
+ dilation=4,
246
+ scale=8,
247
+ se_bottleneck_dim=128,
248
+ )
249
+
250
+ # self.conv = nn.Conv1d(self.channels[-1], self.channels[-1], kernel_size=1)
251
+ cat_channels = channels * 3
252
+ self.conv = nn.Conv1d(cat_channels, self.channels[-1], kernel_size=1)
253
+ self.pooling = AttentiveStatsPool(
254
+ self.channels[-1], attention_channels=128, global_context_att=global_context_att
255
+ )
256
+ self.bn = nn.BatchNorm1d(self.channels[-1] * 2)
257
+ self.linear = nn.Linear(self.channels[-1] * 2, emb_dim)
258
+
259
+ def get_feat_num(self):
260
+ self.feature_extract.eval()
261
+ wav = [torch.randn(self.sr).to(next(self.feature_extract.parameters()).device)]
262
+ with torch.no_grad():
263
+ features = self.feature_extract(wav)
264
+ select_feature = features[self.feature_selection]
265
+ if isinstance(select_feature, (list, tuple)):
266
+ return len(select_feature)
267
+ else:
268
+ return 1
269
+
270
+ def get_feat(self, x):
271
+ if self.update_extract:
272
+ x = self.feature_extract([sample for sample in x])
273
+ else:
274
+ with torch.no_grad():
275
+ if self.feat_type == "fbank" or self.feat_type == "mfcc":
276
+ x = self.feature_extract(x) + 1e-6 # B x feat_dim x time_len
277
+ else:
278
+ x = self.feature_extract([sample for sample in x])
279
+
280
+ if self.feat_type == "fbank":
281
+ x = x.log()
282
+
283
+ if self.feat_type != "fbank" and self.feat_type != "mfcc":
284
+ x = x[self.feature_selection]
285
+ if isinstance(x, (list, tuple)):
286
+ x = torch.stack(x, dim=0)
287
+ else:
288
+ x = x.unsqueeze(0)
289
+ norm_weights = F.softmax(self.feature_weight, dim=-1).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
290
+ x = (norm_weights * x).sum(dim=0)
291
+ x = torch.transpose(x, 1, 2) + 1e-6
292
+
293
+ x = self.instance_norm(x)
294
+ return x
295
+
296
+ def forward(self, x):
297
+ x = self.get_feat(x)
298
+
299
+ out1 = self.layer1(x)
300
+ out2 = self.layer2(out1)
301
+ out3 = self.layer3(out2)
302
+ out4 = self.layer4(out3)
303
+
304
+ out = torch.cat([out2, out3, out4], dim=1)
305
+ out = F.relu(self.conv(out))
306
+ out = self.bn(self.pooling(out))
307
+ out = self.linear(out)
308
+
309
+ return out
310
+
311
+
312
+ def ECAPA_TDNN_SMALL(
313
+ feat_dim,
314
+ emb_dim=256,
315
+ feat_type="wavlm_large",
316
+ sr=16000,
317
+ feature_selection="hidden_states",
318
+ update_extract=False,
319
+ config_path=None,
320
+ ):
321
+ return ECAPA_TDNN(
322
+ feat_dim=feat_dim,
323
+ channels=512,
324
+ emb_dim=emb_dim,
325
+ feat_type=feat_type,
326
+ sr=sr,
327
+ feature_selection=feature_selection,
328
+ update_extract=update_extract,
329
+ config_path=config_path,
330
+ )
Test-F5/src/f5_tts/eval/eval_infer_batch.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.getcwd())
5
+
6
+ import argparse
7
+ import time
8
+ from importlib.resources import files
9
+
10
+ import torch
11
+ import torchaudio
12
+ from accelerate import Accelerator
13
+ from omegaconf import OmegaConf
14
+ from tqdm import tqdm
15
+
16
+ from f5_tts.eval.utils_eval import (
17
+ get_inference_prompt,
18
+ get_librispeech_test_clean_metainfo,
19
+ get_seedtts_testset_metainfo,
20
+ )
21
+ from f5_tts.infer.utils_infer import load_checkpoint, load_vocoder
22
+ from f5_tts.model import CFM, DiT, UNetT # noqa: F401. used for config
23
+ from f5_tts.model.utils import get_tokenizer
24
+
25
+ accelerator = Accelerator()
26
+ device = f"cuda:{accelerator.process_index}"
27
+
28
+
29
+ use_ema = True
30
+ target_rms = 0.1
31
+
32
+
33
+ rel_path = str(files("f5_tts").joinpath("../../"))
34
+
35
+
36
+ def main():
37
+ parser = argparse.ArgumentParser(description="batch inference")
38
+
39
+ parser.add_argument("-s", "--seed", default=None, type=int)
40
+ parser.add_argument("-n", "--expname", required=True)
41
+ parser.add_argument("-c", "--ckptstep", default=1250000, type=int)
42
+
43
+ parser.add_argument("-nfe", "--nfestep", default=32, type=int)
44
+ parser.add_argument("-o", "--odemethod", default="euler")
45
+ parser.add_argument("-ss", "--swaysampling", default=-1, type=float)
46
+
47
+ parser.add_argument("-t", "--testset", required=True)
48
+
49
+ args = parser.parse_args()
50
+
51
+ seed = args.seed
52
+ exp_name = args.expname
53
+ ckpt_step = args.ckptstep
54
+
55
+ nfe_step = args.nfestep
56
+ ode_method = args.odemethod
57
+ sway_sampling_coef = args.swaysampling
58
+
59
+ testset = args.testset
60
+
61
+ infer_batch_size = 1 # max frames. 1 for ddp single inference (recommended)
62
+ cfg_strength = 2.0
63
+ speed = 1.0
64
+ use_truth_duration = False
65
+ no_ref_audio = False
66
+
67
+ model_cfg = OmegaConf.load(str(files("f5_tts").joinpath(f"configs/{exp_name}.yaml")))
68
+ model_cls = globals()[model_cfg.model.backbone]
69
+ model_arc = model_cfg.model.arch
70
+
71
+ dataset_name = model_cfg.datasets.name
72
+ tokenizer = model_cfg.model.tokenizer
73
+
74
+ mel_spec_type = model_cfg.model.mel_spec.mel_spec_type
75
+ target_sample_rate = model_cfg.model.mel_spec.target_sample_rate
76
+ n_mel_channels = model_cfg.model.mel_spec.n_mel_channels
77
+ hop_length = model_cfg.model.mel_spec.hop_length
78
+ win_length = model_cfg.model.mel_spec.win_length
79
+ n_fft = model_cfg.model.mel_spec.n_fft
80
+
81
+ if testset == "ls_pc_test_clean":
82
+ metalst = rel_path + "/data/librispeech_pc_test_clean_cross_sentence.lst"
83
+ librispeech_test_clean_path = "<SOME_PATH>/LibriSpeech/test-clean" # test-clean path
84
+ metainfo = get_librispeech_test_clean_metainfo(metalst, librispeech_test_clean_path)
85
+
86
+ elif testset == "seedtts_test_zh":
87
+ metalst = rel_path + "/data/seedtts_testset/zh/meta.lst"
88
+ metainfo = get_seedtts_testset_metainfo(metalst)
89
+
90
+ elif testset == "seedtts_test_en":
91
+ metalst = rel_path + "/data/seedtts_testset/en/meta.lst"
92
+ metainfo = get_seedtts_testset_metainfo(metalst)
93
+
94
+ # path to save genereted wavs
95
+ output_dir = (
96
+ f"{rel_path}/"
97
+ f"results/{exp_name}_{ckpt_step}/{testset}/"
98
+ f"seed{seed}_{ode_method}_nfe{nfe_step}_{mel_spec_type}"
99
+ f"{f'_ss{sway_sampling_coef}' if sway_sampling_coef else ''}"
100
+ f"_cfg{cfg_strength}_speed{speed}"
101
+ f"{'_gt-dur' if use_truth_duration else ''}"
102
+ f"{'_no-ref-audio' if no_ref_audio else ''}"
103
+ )
104
+
105
+ # -------------------------------------------------#
106
+
107
+ prompts_all = get_inference_prompt(
108
+ metainfo,
109
+ speed=speed,
110
+ tokenizer=tokenizer,
111
+ target_sample_rate=target_sample_rate,
112
+ n_mel_channels=n_mel_channels,
113
+ hop_length=hop_length,
114
+ mel_spec_type=mel_spec_type,
115
+ target_rms=target_rms,
116
+ use_truth_duration=use_truth_duration,
117
+ infer_batch_size=infer_batch_size,
118
+ )
119
+
120
+ # Vocoder model
121
+ local = False
122
+ if mel_spec_type == "vocos":
123
+ vocoder_local_path = "../checkpoints/charactr/vocos-mel-24khz"
124
+ elif mel_spec_type == "bigvgan":
125
+ vocoder_local_path = "../checkpoints/bigvgan_v2_24khz_100band_256x"
126
+ vocoder = load_vocoder(vocoder_name=mel_spec_type, is_local=local, local_path=vocoder_local_path)
127
+
128
+ # Tokenizer
129
+ vocab_char_map, vocab_size = get_tokenizer(dataset_name, tokenizer)
130
+
131
+ # Model
132
+ model = CFM(
133
+ transformer=model_cls(**model_arc, text_num_embeds=vocab_size, mel_dim=n_mel_channels),
134
+ mel_spec_kwargs=dict(
135
+ n_fft=n_fft,
136
+ hop_length=hop_length,
137
+ win_length=win_length,
138
+ n_mel_channels=n_mel_channels,
139
+ target_sample_rate=target_sample_rate,
140
+ mel_spec_type=mel_spec_type,
141
+ ),
142
+ odeint_kwargs=dict(
143
+ method=ode_method,
144
+ ),
145
+ vocab_char_map=vocab_char_map,
146
+ ).to(device)
147
+
148
+ ckpt_path = rel_path + f"/ckpts/{exp_name}/model_{ckpt_step}.pt"
149
+ if not os.path.exists(ckpt_path):
150
+ print("Loading from self-organized training checkpoints rather than released pretrained.")
151
+ ckpt_path = rel_path + f"/{model_cfg.ckpts.save_dir}/model_{ckpt_step}.pt"
152
+ dtype = torch.float32 if mel_spec_type == "bigvgan" else None
153
+ model = load_checkpoint(model, ckpt_path, device, dtype=dtype, use_ema=use_ema)
154
+
155
+ if not os.path.exists(output_dir) and accelerator.is_main_process:
156
+ os.makedirs(output_dir)
157
+
158
+ # start batch inference
159
+ accelerator.wait_for_everyone()
160
+ start = time.time()
161
+
162
+ with accelerator.split_between_processes(prompts_all) as prompts:
163
+ for prompt in tqdm(prompts, disable=not accelerator.is_local_main_process):
164
+ utts, ref_rms_list, ref_mels, ref_mel_lens, total_mel_lens, final_text_list = prompt
165
+ ref_mels = ref_mels.to(device)
166
+ ref_mel_lens = torch.tensor(ref_mel_lens, dtype=torch.long).to(device)
167
+ total_mel_lens = torch.tensor(total_mel_lens, dtype=torch.long).to(device)
168
+
169
+ # Inference
170
+ with torch.inference_mode():
171
+ generated, _ = model.sample(
172
+ cond=ref_mels,
173
+ text=final_text_list,
174
+ duration=total_mel_lens,
175
+ lens=ref_mel_lens,
176
+ steps=nfe_step,
177
+ cfg_strength=cfg_strength,
178
+ sway_sampling_coef=sway_sampling_coef,
179
+ no_ref_audio=no_ref_audio,
180
+ seed=seed,
181
+ )
182
+ # Final result
183
+ for i, gen in enumerate(generated):
184
+ gen = gen[ref_mel_lens[i] : total_mel_lens[i], :].unsqueeze(0)
185
+ gen_mel_spec = gen.permute(0, 2, 1).to(torch.float32)
186
+ if mel_spec_type == "vocos":
187
+ generated_wave = vocoder.decode(gen_mel_spec).cpu()
188
+ elif mel_spec_type == "bigvgan":
189
+ generated_wave = vocoder(gen_mel_spec).squeeze(0).cpu()
190
+
191
+ if ref_rms_list[i] < target_rms:
192
+ generated_wave = generated_wave * ref_rms_list[i] / target_rms
193
+ torchaudio.save(f"{output_dir}/{utts[i]}.wav", generated_wave, target_sample_rate)
194
+
195
+ accelerator.wait_for_everyone()
196
+ if accelerator.is_main_process:
197
+ timediff = time.time() - start
198
+ print(f"Done batch inference in {timediff / 60 :.2f} minutes.")
199
+
200
+
201
+ if __name__ == "__main__":
202
+ main()
Test-F5/src/f5_tts/eval/eval_infer_batch.sh ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # e.g. F5-TTS, 16 NFE
4
+ accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "F5TTS_v1_Base" -t "seedtts_test_zh" -nfe 16
5
+ accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "F5TTS_v1_Base" -t "seedtts_test_en" -nfe 16
6
+ accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "F5TTS_v1_Base" -t "ls_pc_test_clean" -nfe 16
7
+
8
+ # e.g. Vanilla E2 TTS, 32 NFE
9
+ accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "E2TTS_Base" -c 1200000 -t "seedtts_test_zh" -o "midpoint" -ss 0
10
+ accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "E2TTS_Base" -c 1200000 -t "seedtts_test_en" -o "midpoint" -ss 0
11
+ accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "E2TTS_Base" -c 1200000 -t "ls_pc_test_clean" -o "midpoint" -ss 0
12
+
13
+ # e.g. evaluate F5-TTS 16 NFE result on Seed-TTS test-zh
14
+ python src/f5_tts/eval/eval_seedtts_testset.py -e wer -l zh --gen_wav_dir results/F5TTS_v1_Base_1250000/seedtts_test_zh/seed0_euler_nfe32_vocos_ss-1_cfg2.0_speed1.0 --gpu_nums 8
15
+ python src/f5_tts/eval/eval_seedtts_testset.py -e sim -l zh --gen_wav_dir results/F5TTS_v1_Base_1250000/seedtts_test_zh/seed0_euler_nfe32_vocos_ss-1_cfg2.0_speed1.0 --gpu_nums 8
16
+ python src/f5_tts/eval/eval_utmos.py --audio_dir results/F5TTS_v1_Base_1250000/seedtts_test_zh/seed0_euler_nfe32_vocos_ss-1_cfg2.0_speed1.0
17
+
18
+ # etc.
Test-F5/src/f5_tts/eval/eval_librispeech_test_clean.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Evaluate with Librispeech test-clean, ~3s prompt to generate 4-10s audio (the way of valle/voicebox evaluation)
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import sys
7
+
8
+ sys.path.append(os.getcwd())
9
+
10
+ import multiprocessing as mp
11
+ from importlib.resources import files
12
+
13
+ import numpy as np
14
+ from f5_tts.eval.utils_eval import (
15
+ get_librispeech_test,
16
+ run_asr_wer,
17
+ run_sim,
18
+ )
19
+
20
+ rel_path = str(files("f5_tts").joinpath("../../"))
21
+
22
+
23
+ def get_args():
24
+ parser = argparse.ArgumentParser()
25
+ parser.add_argument("-e", "--eval_task", type=str, default="wer", choices=["sim", "wer"])
26
+ parser.add_argument("-l", "--lang", type=str, default="en")
27
+ parser.add_argument("-g", "--gen_wav_dir", type=str, required=True)
28
+ parser.add_argument("-p", "--librispeech_test_clean_path", type=str, required=True)
29
+ parser.add_argument("-n", "--gpu_nums", type=int, default=8, help="Number of GPUs to use")
30
+ parser.add_argument("--local", action="store_true", help="Use local custom checkpoint directory")
31
+ return parser.parse_args()
32
+
33
+
34
+ def main():
35
+ args = get_args()
36
+ eval_task = args.eval_task
37
+ lang = args.lang
38
+ librispeech_test_clean_path = args.librispeech_test_clean_path # test-clean path
39
+ gen_wav_dir = args.gen_wav_dir
40
+ metalst = rel_path + "/data/librispeech_pc_test_clean_cross_sentence.lst"
41
+
42
+ gpus = list(range(args.gpu_nums))
43
+ test_set = get_librispeech_test(metalst, gen_wav_dir, gpus, librispeech_test_clean_path)
44
+
45
+ ## In LibriSpeech, some speakers utilized varying voice characteristics for different characters in the book,
46
+ ## leading to a low similarity for the ground truth in some cases.
47
+ # test_set = get_librispeech_test(metalst, gen_wav_dir, gpus, librispeech_test_clean_path, eval_ground_truth = True) # eval ground truth
48
+
49
+ local = args.local
50
+ if local: # use local custom checkpoint dir
51
+ asr_ckpt_dir = "../checkpoints/Systran/faster-whisper-large-v3"
52
+ else:
53
+ asr_ckpt_dir = "" # auto download to cache dir
54
+ wavlm_ckpt_dir = "../checkpoints/UniSpeech/wavlm_large_finetune.pth"
55
+
56
+ # --------------------------------------------------------------------------
57
+
58
+ full_results = []
59
+ metrics = []
60
+
61
+ if eval_task == "wer":
62
+ with mp.Pool(processes=len(gpus)) as pool:
63
+ args = [(rank, lang, sub_test_set, asr_ckpt_dir) for (rank, sub_test_set) in test_set]
64
+ results = pool.map(run_asr_wer, args)
65
+ for r in results:
66
+ full_results.extend(r)
67
+ elif eval_task == "sim":
68
+ with mp.Pool(processes=len(gpus)) as pool:
69
+ args = [(rank, sub_test_set, wavlm_ckpt_dir) for (rank, sub_test_set) in test_set]
70
+ results = pool.map(run_sim, args)
71
+ for r in results:
72
+ full_results.extend(r)
73
+ else:
74
+ raise ValueError(f"Unknown metric type: {eval_task}")
75
+
76
+ result_path = f"{gen_wav_dir}/_{eval_task}_results.jsonl"
77
+ with open(result_path, "w") as f:
78
+ for line in full_results:
79
+ metrics.append(line[eval_task])
80
+ f.write(json.dumps(line, ensure_ascii=False) + "\n")
81
+ metric = round(np.mean(metrics), 5)
82
+ f.write(f"\n{eval_task.upper()}: {metric}\n")
83
+
84
+ print(f"\nTotal {len(metrics)} samples")
85
+ print(f"{eval_task.upper()}: {metric}")
86
+ print(f"{eval_task.upper()} results saved to {result_path}")
87
+
88
+
89
+ if __name__ == "__main__":
90
+ main()
Test-F5/src/f5_tts/eval/eval_seedtts_testset.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Evaluate with Seed-TTS testset
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import sys
7
+
8
+ sys.path.append(os.getcwd())
9
+
10
+ import multiprocessing as mp
11
+ from importlib.resources import files
12
+
13
+ import numpy as np
14
+ from f5_tts.eval.utils_eval import (
15
+ get_seed_tts_test,
16
+ run_asr_wer,
17
+ run_sim,
18
+ )
19
+
20
+ rel_path = str(files("f5_tts").joinpath("../../"))
21
+
22
+
23
+ def get_args():
24
+ parser = argparse.ArgumentParser()
25
+ parser.add_argument("-e", "--eval_task", type=str, default="wer", choices=["sim", "wer"])
26
+ parser.add_argument("-l", "--lang", type=str, default="en", choices=["zh", "en"])
27
+ parser.add_argument("-g", "--gen_wav_dir", type=str, required=True)
28
+ parser.add_argument("-n", "--gpu_nums", type=int, default=8, help="Number of GPUs to use")
29
+ parser.add_argument("--local", action="store_true", help="Use local custom checkpoint directory")
30
+ return parser.parse_args()
31
+
32
+
33
+ def main():
34
+ args = get_args()
35
+ eval_task = args.eval_task
36
+ lang = args.lang
37
+ gen_wav_dir = args.gen_wav_dir
38
+ metalst = rel_path + f"/data/seedtts_testset/{lang}/meta.lst" # seed-tts testset
39
+
40
+ # NOTE. paraformer-zh result will be slightly different according to the number of gpus, cuz batchsize is different
41
+ # zh 1.254 seems a result of 4 workers wer_seed_tts
42
+ gpus = list(range(args.gpu_nums))
43
+ test_set = get_seed_tts_test(metalst, gen_wav_dir, gpus)
44
+
45
+ local = args.local
46
+ if local: # use local custom checkpoint dir
47
+ if lang == "zh":
48
+ asr_ckpt_dir = "../checkpoints/funasr" # paraformer-zh dir under funasr
49
+ elif lang == "en":
50
+ asr_ckpt_dir = "../checkpoints/Systran/faster-whisper-large-v3"
51
+ else:
52
+ asr_ckpt_dir = "" # auto download to cache dir
53
+ wavlm_ckpt_dir = "../checkpoints/UniSpeech/wavlm_large_finetune.pth"
54
+
55
+ # --------------------------------------------------------------------------
56
+
57
+ full_results = []
58
+ metrics = []
59
+
60
+ if eval_task == "wer":
61
+ with mp.Pool(processes=len(gpus)) as pool:
62
+ args = [(rank, lang, sub_test_set, asr_ckpt_dir) for (rank, sub_test_set) in test_set]
63
+ results = pool.map(run_asr_wer, args)
64
+ for r in results:
65
+ full_results.extend(r)
66
+ elif eval_task == "sim":
67
+ with mp.Pool(processes=len(gpus)) as pool:
68
+ args = [(rank, sub_test_set, wavlm_ckpt_dir) for (rank, sub_test_set) in test_set]
69
+ results = pool.map(run_sim, args)
70
+ for r in results:
71
+ full_results.extend(r)
72
+ else:
73
+ raise ValueError(f"Unknown metric type: {eval_task}")
74
+
75
+ result_path = f"{gen_wav_dir}/_{eval_task}_results.jsonl"
76
+ with open(result_path, "w") as f:
77
+ for line in full_results:
78
+ metrics.append(line[eval_task])
79
+ f.write(json.dumps(line, ensure_ascii=False) + "\n")
80
+ metric = round(np.mean(metrics), 5)
81
+ f.write(f"\n{eval_task.upper()}: {metric}\n")
82
+
83
+ print(f"\nTotal {len(metrics)} samples")
84
+ print(f"{eval_task.upper()}: {metric}")
85
+ print(f"{eval_task.upper()} results saved to {result_path}")
86
+
87
+
88
+ if __name__ == "__main__":
89
+ main()
Test-F5/src/f5_tts/eval/eval_utmos.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ from pathlib import Path
4
+
5
+ import librosa
6
+ import torch
7
+ from tqdm import tqdm
8
+
9
+
10
+ def main():
11
+ parser = argparse.ArgumentParser(description="UTMOS Evaluation")
12
+ parser.add_argument("--audio_dir", type=str, required=True, help="Audio file path.")
13
+ parser.add_argument("--ext", type=str, default="wav", help="Audio extension.")
14
+ args = parser.parse_args()
15
+
16
+ device = "cuda" if torch.cuda.is_available() else "xpu" if torch.xpu.is_available() else "cpu"
17
+
18
+ predictor = torch.hub.load("tarepan/SpeechMOS:v1.2.0", "utmos22_strong", trust_repo=True)
19
+ predictor = predictor.to(device)
20
+
21
+ audio_paths = list(Path(args.audio_dir).rglob(f"*.{args.ext}"))
22
+ utmos_score = 0
23
+
24
+ utmos_result_path = Path(args.audio_dir) / "_utmos_results.jsonl"
25
+ with open(utmos_result_path, "w", encoding="utf-8") as f:
26
+ for audio_path in tqdm(audio_paths, desc="Processing"):
27
+ wav, sr = librosa.load(audio_path, sr=None, mono=True)
28
+ wav_tensor = torch.from_numpy(wav).to(device).unsqueeze(0)
29
+ score = predictor(wav_tensor, sr)
30
+ line = {}
31
+ line["wav"], line["utmos"] = str(audio_path.stem), score.item()
32
+ utmos_score += score.item()
33
+ f.write(json.dumps(line, ensure_ascii=False) + "\n")
34
+ avg_score = utmos_score / len(audio_paths) if len(audio_paths) > 0 else 0
35
+ f.write(f"\nUTMOS: {avg_score:.4f}\n")
36
+
37
+ print(f"UTMOS: {avg_score:.4f}")
38
+ print(f"UTMOS results saved to {utmos_result_path}")
39
+
40
+
41
+ if __name__ == "__main__":
42
+ main()
Test-F5/src/f5_tts/eval/utils_eval.py ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import os
3
+ import random
4
+ import string
5
+ from pathlib import Path
6
+
7
+ import torch
8
+ import torch.nn.functional as F
9
+ import torchaudio
10
+ from tqdm import tqdm
11
+
12
+ from f5_tts.eval.ecapa_tdnn import ECAPA_TDNN_SMALL
13
+ from f5_tts.model.modules import MelSpec
14
+ from f5_tts.model.utils import convert_char_to_pinyin
15
+
16
+
17
+ # seedtts testset metainfo: utt, prompt_text, prompt_wav, gt_text, gt_wav
18
+ def get_seedtts_testset_metainfo(metalst):
19
+ f = open(metalst)
20
+ lines = f.readlines()
21
+ f.close()
22
+ metainfo = []
23
+ for line in lines:
24
+ if len(line.strip().split("|")) == 5:
25
+ utt, prompt_text, prompt_wav, gt_text, gt_wav = line.strip().split("|")
26
+ elif len(line.strip().split("|")) == 4:
27
+ utt, prompt_text, prompt_wav, gt_text = line.strip().split("|")
28
+ gt_wav = os.path.join(os.path.dirname(metalst), "wavs", utt + ".wav")
29
+ if not os.path.isabs(prompt_wav):
30
+ prompt_wav = os.path.join(os.path.dirname(metalst), prompt_wav)
31
+ metainfo.append((utt, prompt_text, prompt_wav, gt_text, gt_wav))
32
+ return metainfo
33
+
34
+
35
+ # librispeech test-clean metainfo: gen_utt, ref_txt, ref_wav, gen_txt, gen_wav
36
+ def get_librispeech_test_clean_metainfo(metalst, librispeech_test_clean_path):
37
+ f = open(metalst)
38
+ lines = f.readlines()
39
+ f.close()
40
+ metainfo = []
41
+ for line in lines:
42
+ ref_utt, ref_dur, ref_txt, gen_utt, gen_dur, gen_txt = line.strip().split("\t")
43
+
44
+ # ref_txt = ref_txt[0] + ref_txt[1:].lower() + '.' # if use librispeech test-clean (no-pc)
45
+ ref_spk_id, ref_chaptr_id, _ = ref_utt.split("-")
46
+ ref_wav = os.path.join(librispeech_test_clean_path, ref_spk_id, ref_chaptr_id, ref_utt + ".flac")
47
+
48
+ # gen_txt = gen_txt[0] + gen_txt[1:].lower() + '.' # if use librispeech test-clean (no-pc)
49
+ gen_spk_id, gen_chaptr_id, _ = gen_utt.split("-")
50
+ gen_wav = os.path.join(librispeech_test_clean_path, gen_spk_id, gen_chaptr_id, gen_utt + ".flac")
51
+
52
+ metainfo.append((gen_utt, ref_txt, ref_wav, " " + gen_txt, gen_wav))
53
+
54
+ return metainfo
55
+
56
+
57
+ # padded to max length mel batch
58
+ def padded_mel_batch(ref_mels):
59
+ max_mel_length = torch.LongTensor([mel.shape[-1] for mel in ref_mels]).amax()
60
+ padded_ref_mels = []
61
+ for mel in ref_mels:
62
+ padded_ref_mel = F.pad(mel, (0, max_mel_length - mel.shape[-1]), value=0)
63
+ padded_ref_mels.append(padded_ref_mel)
64
+ padded_ref_mels = torch.stack(padded_ref_mels)
65
+ padded_ref_mels = padded_ref_mels.permute(0, 2, 1)
66
+ return padded_ref_mels
67
+
68
+
69
+ # get prompts from metainfo containing: utt, prompt_text, prompt_wav, gt_text, gt_wav
70
+
71
+
72
+ def get_inference_prompt(
73
+ metainfo,
74
+ speed=1.0,
75
+ tokenizer="pinyin",
76
+ polyphone=True,
77
+ target_sample_rate=24000,
78
+ n_fft=1024,
79
+ win_length=1024,
80
+ n_mel_channels=100,
81
+ hop_length=256,
82
+ mel_spec_type="vocos",
83
+ target_rms=0.1,
84
+ use_truth_duration=False,
85
+ infer_batch_size=1,
86
+ num_buckets=200,
87
+ min_secs=3,
88
+ max_secs=40,
89
+ ):
90
+ prompts_all = []
91
+
92
+ min_tokens = min_secs * target_sample_rate // hop_length
93
+ max_tokens = max_secs * target_sample_rate // hop_length
94
+
95
+ batch_accum = [0] * num_buckets
96
+ utts, ref_rms_list, ref_mels, ref_mel_lens, total_mel_lens, final_text_list = (
97
+ [[] for _ in range(num_buckets)] for _ in range(6)
98
+ )
99
+
100
+ mel_spectrogram = MelSpec(
101
+ n_fft=n_fft,
102
+ hop_length=hop_length,
103
+ win_length=win_length,
104
+ n_mel_channels=n_mel_channels,
105
+ target_sample_rate=target_sample_rate,
106
+ mel_spec_type=mel_spec_type,
107
+ )
108
+
109
+ for utt, prompt_text, prompt_wav, gt_text, gt_wav in tqdm(metainfo, desc="Processing prompts..."):
110
+ # Audio
111
+ ref_audio, ref_sr = torchaudio.load(prompt_wav)
112
+ ref_rms = torch.sqrt(torch.mean(torch.square(ref_audio)))
113
+ if ref_rms < target_rms:
114
+ ref_audio = ref_audio * target_rms / ref_rms
115
+ assert ref_audio.shape[-1] > 5000, f"Empty prompt wav: {prompt_wav}, or torchaudio backend issue."
116
+ if ref_sr != target_sample_rate:
117
+ resampler = torchaudio.transforms.Resample(ref_sr, target_sample_rate)
118
+ ref_audio = resampler(ref_audio)
119
+
120
+ # Text
121
+ if len(prompt_text[-1].encode("utf-8")) == 1:
122
+ prompt_text = prompt_text + " "
123
+ text = [prompt_text + gt_text]
124
+ if tokenizer == "pinyin":
125
+ text_list = convert_char_to_pinyin(text, polyphone=polyphone)
126
+ else:
127
+ text_list = text
128
+
129
+ # Duration, mel frame length
130
+ ref_mel_len = ref_audio.shape[-1] // hop_length
131
+ if use_truth_duration:
132
+ gt_audio, gt_sr = torchaudio.load(gt_wav)
133
+ if gt_sr != target_sample_rate:
134
+ resampler = torchaudio.transforms.Resample(gt_sr, target_sample_rate)
135
+ gt_audio = resampler(gt_audio)
136
+ total_mel_len = ref_mel_len + int(gt_audio.shape[-1] / hop_length / speed)
137
+
138
+ # # test vocoder resynthesis
139
+ # ref_audio = gt_audio
140
+ else:
141
+ ref_text_len = len(prompt_text.encode("utf-8"))
142
+ gen_text_len = len(gt_text.encode("utf-8"))
143
+ total_mel_len = ref_mel_len + int(ref_mel_len / ref_text_len * gen_text_len / speed)
144
+
145
+ # to mel spectrogram
146
+ ref_mel = mel_spectrogram(ref_audio)
147
+ ref_mel = ref_mel.squeeze(0)
148
+
149
+ # deal with batch
150
+ assert infer_batch_size > 0, "infer_batch_size should be greater than 0."
151
+ assert (
152
+ min_tokens <= total_mel_len <= max_tokens
153
+ ), f"Audio {utt} has duration {total_mel_len*hop_length//target_sample_rate}s out of range [{min_secs}, {max_secs}]."
154
+ bucket_i = math.floor((total_mel_len - min_tokens) / (max_tokens - min_tokens + 1) * num_buckets)
155
+
156
+ utts[bucket_i].append(utt)
157
+ ref_rms_list[bucket_i].append(ref_rms)
158
+ ref_mels[bucket_i].append(ref_mel)
159
+ ref_mel_lens[bucket_i].append(ref_mel_len)
160
+ total_mel_lens[bucket_i].append(total_mel_len)
161
+ final_text_list[bucket_i].extend(text_list)
162
+
163
+ batch_accum[bucket_i] += total_mel_len
164
+
165
+ if batch_accum[bucket_i] >= infer_batch_size:
166
+ # print(f"\n{len(ref_mels[bucket_i][0][0])}\n{ref_mel_lens[bucket_i]}\n{total_mel_lens[bucket_i]}")
167
+ prompts_all.append(
168
+ (
169
+ utts[bucket_i],
170
+ ref_rms_list[bucket_i],
171
+ padded_mel_batch(ref_mels[bucket_i]),
172
+ ref_mel_lens[bucket_i],
173
+ total_mel_lens[bucket_i],
174
+ final_text_list[bucket_i],
175
+ )
176
+ )
177
+ batch_accum[bucket_i] = 0
178
+ (
179
+ utts[bucket_i],
180
+ ref_rms_list[bucket_i],
181
+ ref_mels[bucket_i],
182
+ ref_mel_lens[bucket_i],
183
+ total_mel_lens[bucket_i],
184
+ final_text_list[bucket_i],
185
+ ) = [], [], [], [], [], []
186
+
187
+ # add residual
188
+ for bucket_i, bucket_frames in enumerate(batch_accum):
189
+ if bucket_frames > 0:
190
+ prompts_all.append(
191
+ (
192
+ utts[bucket_i],
193
+ ref_rms_list[bucket_i],
194
+ padded_mel_batch(ref_mels[bucket_i]),
195
+ ref_mel_lens[bucket_i],
196
+ total_mel_lens[bucket_i],
197
+ final_text_list[bucket_i],
198
+ )
199
+ )
200
+ # not only leave easy work for last workers
201
+ random.seed(666)
202
+ random.shuffle(prompts_all)
203
+
204
+ return prompts_all
205
+
206
+
207
+ # get wav_res_ref_text of seed-tts test metalst
208
+ # https://github.com/BytedanceSpeech/seed-tts-eval
209
+
210
+
211
+ def get_seed_tts_test(metalst, gen_wav_dir, gpus):
212
+ f = open(metalst)
213
+ lines = f.readlines()
214
+ f.close()
215
+
216
+ test_set_ = []
217
+ for line in tqdm(lines):
218
+ if len(line.strip().split("|")) == 5:
219
+ utt, prompt_text, prompt_wav, gt_text, gt_wav = line.strip().split("|")
220
+ elif len(line.strip().split("|")) == 4:
221
+ utt, prompt_text, prompt_wav, gt_text = line.strip().split("|")
222
+
223
+ if not os.path.exists(os.path.join(gen_wav_dir, utt + ".wav")):
224
+ continue
225
+ gen_wav = os.path.join(gen_wav_dir, utt + ".wav")
226
+ if not os.path.isabs(prompt_wav):
227
+ prompt_wav = os.path.join(os.path.dirname(metalst), prompt_wav)
228
+
229
+ test_set_.append((gen_wav, prompt_wav, gt_text))
230
+
231
+ num_jobs = len(gpus)
232
+ if num_jobs == 1:
233
+ return [(gpus[0], test_set_)]
234
+
235
+ wav_per_job = len(test_set_) // num_jobs + 1
236
+ test_set = []
237
+ for i in range(num_jobs):
238
+ test_set.append((gpus[i], test_set_[i * wav_per_job : (i + 1) * wav_per_job]))
239
+
240
+ return test_set
241
+
242
+
243
+ # get librispeech test-clean cross sentence test
244
+
245
+
246
+ def get_librispeech_test(metalst, gen_wav_dir, gpus, librispeech_test_clean_path, eval_ground_truth=False):
247
+ f = open(metalst)
248
+ lines = f.readlines()
249
+ f.close()
250
+
251
+ test_set_ = []
252
+ for line in tqdm(lines):
253
+ ref_utt, ref_dur, ref_txt, gen_utt, gen_dur, gen_txt = line.strip().split("\t")
254
+
255
+ if eval_ground_truth:
256
+ gen_spk_id, gen_chaptr_id, _ = gen_utt.split("-")
257
+ gen_wav = os.path.join(librispeech_test_clean_path, gen_spk_id, gen_chaptr_id, gen_utt + ".flac")
258
+ else:
259
+ if not os.path.exists(os.path.join(gen_wav_dir, gen_utt + ".wav")):
260
+ raise FileNotFoundError(f"Generated wav not found: {gen_utt}")
261
+ gen_wav = os.path.join(gen_wav_dir, gen_utt + ".wav")
262
+
263
+ ref_spk_id, ref_chaptr_id, _ = ref_utt.split("-")
264
+ ref_wav = os.path.join(librispeech_test_clean_path, ref_spk_id, ref_chaptr_id, ref_utt + ".flac")
265
+
266
+ test_set_.append((gen_wav, ref_wav, gen_txt))
267
+
268
+ num_jobs = len(gpus)
269
+ if num_jobs == 1:
270
+ return [(gpus[0], test_set_)]
271
+
272
+ wav_per_job = len(test_set_) // num_jobs + 1
273
+ test_set = []
274
+ for i in range(num_jobs):
275
+ test_set.append((gpus[i], test_set_[i * wav_per_job : (i + 1) * wav_per_job]))
276
+
277
+ return test_set
278
+
279
+
280
+ # load asr model
281
+
282
+
283
+ def load_asr_model(lang, ckpt_dir=""):
284
+ if lang == "zh":
285
+ from funasr import AutoModel
286
+
287
+ model = AutoModel(
288
+ model=os.path.join(ckpt_dir, "paraformer-zh"),
289
+ # vad_model = os.path.join(ckpt_dir, "fsmn-vad"),
290
+ # punc_model = os.path.join(ckpt_dir, "ct-punc"),
291
+ # spk_model = os.path.join(ckpt_dir, "cam++"),
292
+ disable_update=True,
293
+ ) # following seed-tts setting
294
+ elif lang == "en":
295
+ from faster_whisper import WhisperModel
296
+
297
+ model_size = "large-v3" if ckpt_dir == "" else ckpt_dir
298
+ model = WhisperModel(model_size, device="cuda", compute_type="float16")
299
+ return model
300
+
301
+
302
+ # WER Evaluation, the way Seed-TTS does
303
+
304
+
305
+ def run_asr_wer(args):
306
+ rank, lang, test_set, ckpt_dir = args
307
+
308
+ if lang == "zh":
309
+ import zhconv
310
+
311
+ torch.cuda.set_device(rank)
312
+ elif lang == "en":
313
+ os.environ["CUDA_VISIBLE_DEVICES"] = str(rank)
314
+ else:
315
+ raise NotImplementedError(
316
+ "lang support only 'zh' (funasr paraformer-zh), 'en' (faster-whisper-large-v3), for now."
317
+ )
318
+
319
+ asr_model = load_asr_model(lang, ckpt_dir=ckpt_dir)
320
+
321
+ from zhon.hanzi import punctuation
322
+
323
+ punctuation_all = punctuation + string.punctuation
324
+ wer_results = []
325
+
326
+ from jiwer import compute_measures
327
+
328
+ for gen_wav, prompt_wav, truth in tqdm(test_set):
329
+ if lang == "zh":
330
+ res = asr_model.generate(input=gen_wav, batch_size_s=300, disable_pbar=True)
331
+ hypo = res[0]["text"]
332
+ hypo = zhconv.convert(hypo, "zh-cn")
333
+ elif lang == "en":
334
+ segments, _ = asr_model.transcribe(gen_wav, beam_size=5, language="en")
335
+ hypo = ""
336
+ for segment in segments:
337
+ hypo = hypo + " " + segment.text
338
+
339
+ raw_truth = truth
340
+ raw_hypo = hypo
341
+
342
+ for x in punctuation_all:
343
+ truth = truth.replace(x, "")
344
+ hypo = hypo.replace(x, "")
345
+
346
+ truth = truth.replace(" ", " ")
347
+ hypo = hypo.replace(" ", " ")
348
+
349
+ if lang == "zh":
350
+ truth = " ".join([x for x in truth])
351
+ hypo = " ".join([x for x in hypo])
352
+ elif lang == "en":
353
+ truth = truth.lower()
354
+ hypo = hypo.lower()
355
+
356
+ measures = compute_measures(truth, hypo)
357
+ wer = measures["wer"]
358
+
359
+ # ref_list = truth.split(" ")
360
+ # subs = measures["substitutions"] / len(ref_list)
361
+ # dele = measures["deletions"] / len(ref_list)
362
+ # inse = measures["insertions"] / len(ref_list)
363
+
364
+ wer_results.append(
365
+ {
366
+ "wav": Path(gen_wav).stem,
367
+ "truth": raw_truth,
368
+ "hypo": raw_hypo,
369
+ "wer": wer,
370
+ }
371
+ )
372
+
373
+ return wer_results
374
+
375
+
376
+ # SIM Evaluation
377
+
378
+
379
+ def run_sim(args):
380
+ rank, test_set, ckpt_dir = args
381
+ device = f"cuda:{rank}"
382
+
383
+ model = ECAPA_TDNN_SMALL(feat_dim=1024, feat_type="wavlm_large", config_path=None)
384
+ state_dict = torch.load(ckpt_dir, weights_only=True, map_location=lambda storage, loc: storage)
385
+ model.load_state_dict(state_dict["model"], strict=False)
386
+
387
+ use_gpu = True if torch.cuda.is_available() else False
388
+ if use_gpu:
389
+ model = model.cuda(device)
390
+ model.eval()
391
+
392
+ sim_results = []
393
+ for gen_wav, prompt_wav, truth in tqdm(test_set):
394
+ wav1, sr1 = torchaudio.load(gen_wav)
395
+ wav2, sr2 = torchaudio.load(prompt_wav)
396
+
397
+ resample1 = torchaudio.transforms.Resample(orig_freq=sr1, new_freq=16000)
398
+ resample2 = torchaudio.transforms.Resample(orig_freq=sr2, new_freq=16000)
399
+ wav1 = resample1(wav1)
400
+ wav2 = resample2(wav2)
401
+
402
+ if use_gpu:
403
+ wav1 = wav1.cuda(device)
404
+ wav2 = wav2.cuda(device)
405
+ with torch.no_grad():
406
+ emb1 = model(wav1)
407
+ emb2 = model(wav2)
408
+
409
+ sim = F.cosine_similarity(emb1, emb2)[0].item()
410
+ # print(f"VSim score between two audios: {sim:.4f} (-1.0, 1.0).")
411
+ sim_results.append(
412
+ {
413
+ "wav": Path(gen_wav).stem,
414
+ "sim": sim,
415
+ }
416
+ )
417
+
418
+ return sim_results
Test-F5/src/f5_tts/infer/README.md ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Inference
2
+
3
+ The pretrained model checkpoints can be reached at [🤗 Hugging Face](https://huggingface.co/SWivid/F5-TTS) and [🤖 Model Scope](https://www.modelscope.cn/models/SWivid/F5-TTS_Emilia-ZH-EN), or will be automatically downloaded when running inference scripts.
4
+
5
+ **More checkpoints with whole community efforts can be found in [SHARED.md](SHARED.md), supporting more languages.**
6
+
7
+ Currently support **30s for a single** generation, which is the **total length** including both prompt and output audio. However, you can provide `infer_cli` and `infer_gradio` with longer text, will automatically do chunk generation. Long reference audio will be **clip short to ~15s**.
8
+
9
+ To avoid possible inference failures, make sure you have seen through the following instructions.
10
+
11
+ - Use reference audio <15s and leave some silence (e.g. 1s) at the end. Otherwise there is a risk of truncating in the middle of word, leading to suboptimal generation.
12
+ - Uppercased letters will be uttered letter by letter, so use lowercased letters for normal words.
13
+ - Add some spaces (blank: " ") or punctuations (e.g. "," ".") to explicitly introduce some pauses.
14
+ - Preprocess numbers to Chinese letters if you want to have them read in Chinese, otherwise in English.
15
+ - If the generation output is blank (pure silence), check for ffmpeg installation (various tutorials online, blogs, videos, etc.).
16
+ - Try turn off use_ema if using an early-stage finetuned checkpoint (which goes just few updates).
17
+
18
+
19
+ ## Gradio App
20
+
21
+ Currently supported features:
22
+
23
+ - Basic TTS with Chunk Inference
24
+ - Multi-Style / Multi-Speaker Generation
25
+ - Voice Chat powered by Qwen2.5-3B-Instruct
26
+ - [Custom inference with more language support](src/f5_tts/infer/SHARED.md)
27
+
28
+ The cli command `f5-tts_infer-gradio` equals to `python src/f5_tts/infer/infer_gradio.py`, which launches a Gradio APP (web interface) for inference.
29
+
30
+ The script will load model checkpoints from Huggingface. You can also manually download files and update the path to `load_model()` in `infer_gradio.py`. Currently only load TTS models first, will load ASR model to do transcription if `ref_text` not provided, will load LLM model if use Voice Chat.
31
+
32
+ More flags options:
33
+
34
+ ```bash
35
+ # Automatically launch the interface in the default web browser
36
+ f5-tts_infer-gradio --inbrowser
37
+
38
+ # Set the root path of the application, if it's not served from the root ("/") of the domain
39
+ # For example, if the application is served at "https://example.com/myapp"
40
+ f5-tts_infer-gradio --root_path "/myapp"
41
+ ```
42
+
43
+ Could also be used as a component for larger application:
44
+ ```python
45
+ import gradio as gr
46
+ from f5_tts.infer.infer_gradio import app
47
+
48
+ with gr.Blocks() as main_app:
49
+ gr.Markdown("# This is an example of using F5-TTS within a bigger Gradio app")
50
+
51
+ # ... other Gradio components
52
+
53
+ app.render()
54
+
55
+ main_app.launch()
56
+ ```
57
+
58
+
59
+ ## CLI Inference
60
+
61
+ The cli command `f5-tts_infer-cli` equals to `python src/f5_tts/infer/infer_cli.py`, which is a command line tool for inference.
62
+
63
+ The script will load model checkpoints from Huggingface. You can also manually download files and use `--ckpt_file` to specify the model you want to load, or directly update in `infer_cli.py`.
64
+
65
+ For change vocab.txt use `--vocab_file` to provide your `vocab.txt` file.
66
+
67
+ Basically you can inference with flags:
68
+ ```bash
69
+ # Leave --ref_text "" will have ASR model transcribe (extra GPU memory usage)
70
+ f5-tts_infer-cli \
71
+ --model F5TTS_v1_Base \
72
+ --ref_audio "ref_audio.wav" \
73
+ --ref_text "The content, subtitle or transcription of reference audio." \
74
+ --gen_text "Some text you want TTS model generate for you."
75
+
76
+ # Use BigVGAN as vocoder. Currently only support F5TTS_Base.
77
+ f5-tts_infer-cli --model F5TTS_Base --vocoder_name bigvgan --load_vocoder_from_local
78
+
79
+ # Use custom path checkpoint, e.g.
80
+ f5-tts_infer-cli --ckpt_file ckpts/F5TTS_v1_Base/model_1250000.safetensors
81
+
82
+ # More instructions
83
+ f5-tts_infer-cli --help
84
+ ```
85
+
86
+ And a `.toml` file would help with more flexible usage.
87
+
88
+ ```bash
89
+ f5-tts_infer-cli -c custom.toml
90
+ ```
91
+
92
+ For example, you can use `.toml` to pass in variables, refer to `src/f5_tts/infer/examples/basic/basic.toml`:
93
+
94
+ ```toml
95
+ # F5TTS_v1_Base | E2TTS_Base
96
+ model = "F5TTS_v1_Base"
97
+ ref_audio = "infer/examples/basic/basic_ref_en.wav"
98
+ # If an empty "", transcribes the reference audio automatically.
99
+ ref_text = "Some call me nature, others call me mother nature."
100
+ gen_text = "I don't really care what you call me. I've been a silent spectator, watching species evolve, empires rise and fall. But always remember, I am mighty and enduring."
101
+ # File with text to generate. Ignores the text above.
102
+ gen_file = ""
103
+ remove_silence = false
104
+ output_dir = "tests"
105
+ ```
106
+
107
+ You can also leverage `.toml` file to do multi-style generation, refer to `src/f5_tts/infer/examples/multi/story.toml`.
108
+
109
+ ```toml
110
+ # F5TTS_v1_Base | E2TTS_Base
111
+ model = "F5TTS_v1_Base"
112
+ ref_audio = "infer/examples/multi/main.flac"
113
+ # If an empty "", transcribes the reference audio automatically.
114
+ ref_text = ""
115
+ gen_text = ""
116
+ # File with text to generate. Ignores the text above.
117
+ gen_file = "infer/examples/multi/story.txt"
118
+ remove_silence = true
119
+ output_dir = "tests"
120
+
121
+ [voices.town]
122
+ ref_audio = "infer/examples/multi/town.flac"
123
+ ref_text = ""
124
+
125
+ [voices.country]
126
+ ref_audio = "infer/examples/multi/country.flac"
127
+ ref_text = ""
128
+ ```
129
+ You should mark the voice with `[main]` `[town]` `[country]` whenever you want to change voice, refer to `src/f5_tts/infer/examples/multi/story.txt`.
130
+
131
+ ## Socket Real-time Service
132
+
133
+ Real-time voice output with chunk stream:
134
+
135
+ ```bash
136
+ # Start socket server
137
+ python src/f5_tts/socket_server.py
138
+
139
+ # If PyAudio not installed
140
+ sudo apt-get install portaudio19-dev
141
+ pip install pyaudio
142
+
143
+ # Communicate with socket client
144
+ python src/f5_tts/socket_client.py
145
+ ```
146
+
147
+ ## Speech Editing
148
+
149
+ To test speech editing capabilities, use the following command:
150
+
151
+ ```bash
152
+ python src/f5_tts/infer/speech_edit.py
153
+ ```
154
+
Test-F5/src/f5_tts/infer/SHARED.md ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- omit in toc -->
2
+ # Shared Model Cards
3
+
4
+ <!-- omit in toc -->
5
+ ### **Prerequisites of using**
6
+ - This document is serving as a quick lookup table for the community training/finetuning result, with various language support.
7
+ - The models in this repository are open source and are based on voluntary contributions from contributors.
8
+ - The use of models must be conditioned on respect for the respective creators. The convenience brought comes from their efforts.
9
+
10
+ <!-- omit in toc -->
11
+ ### **Welcome to share here**
12
+ - Have a pretrained/finetuned result: model checkpoint (pruned best to facilitate inference, i.e. leave only `ema_model_state_dict`) and corresponding vocab file (for tokenization).
13
+ - Host a public [huggingface model repository](https://huggingface.co/new) and upload the model related files.
14
+ - Make a pull request adding a model card to the current page, i.e. `src\f5_tts\infer\SHARED.md`.
15
+
16
+ <!-- omit in toc -->
17
+ ### Supported Languages
18
+ - [Multilingual](#multilingual)
19
+ - [F5-TTS v1 v0 Base @ zh \& en @ F5-TTS](#f5-tts-v1-v0-base--zh--en--f5-tts)
20
+ - [English](#english)
21
+ - [Finnish](#finnish)
22
+ - [F5-TTS Base @ fi @ AsmoKoskinen](#f5-tts-base--fi--asmokoskinen)
23
+ - [French](#french)
24
+ - [F5-TTS Base @ fr @ RASPIAUDIO](#f5-tts-base--fr--raspiaudio)
25
+ - [Hindi](#hindi)
26
+ - [F5-TTS Small @ hi @ SPRINGLab](#f5-tts-small--hi--springlab)
27
+ - [Italian](#italian)
28
+ - [F5-TTS Base @ it @ alien79](#f5-tts-base--it--alien79)
29
+ - [Japanese](#japanese)
30
+ - [F5-TTS Base @ ja @ Jmica](#f5-tts-base--ja--jmica)
31
+ - [Mandarin](#mandarin)
32
+ - [Russian](#russian)
33
+ - [F5-TTS Base @ ru @ HotDro4illa](#f5-tts-base--ru--hotdro4illa)
34
+ - [Spanish](#spanish)
35
+ - [F5-TTS Base @ es @ jpgallegoar](#f5-tts-base--es--jpgallegoar)
36
+
37
+
38
+ ## Multilingual
39
+
40
+ #### F5-TTS v1 v0 Base @ zh & en @ F5-TTS
41
+ |Model|🤗Hugging Face|Data (Hours)|Model License|
42
+ |:---:|:------------:|:-----------:|:-------------:|
43
+ |F5-TTS v1 Base|[ckpt & vocab](https://huggingface.co/SWivid/F5-TTS/tree/main/F5TTS_v1_Base)|[Emilia 95K zh&en](https://huggingface.co/datasets/amphion/Emilia-Dataset/tree/fc71e07)|cc-by-nc-4.0|
44
+
45
+ ```bash
46
+ Model: hf://SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors
47
+ Vocab: hf://SWivid/F5-TTS/F5TTS_v1_Base/vocab.txt
48
+ Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "conv_layers": 4}
49
+ ```
50
+
51
+ |Model|🤗Hugging Face|Data (Hours)|Model License|
52
+ |:---:|:------------:|:-----------:|:-------------:|
53
+ |F5-TTS Base|[ckpt & vocab](https://huggingface.co/SWivid/F5-TTS/tree/main/F5TTS_Base)|[Emilia 95K zh&en](https://huggingface.co/datasets/amphion/Emilia-Dataset/tree/fc71e07)|cc-by-nc-4.0|
54
+
55
+ ```bash
56
+ Model: hf://SWivid/F5-TTS/F5TTS_Base/model_1200000.safetensors
57
+ Vocab: hf://SWivid/F5-TTS/F5TTS_Base/vocab.txt
58
+ Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
59
+ ```
60
+
61
+ *Other infos, e.g. Author info, Github repo, Link to some sampled results, Usage instruction, Tutorial (Blog, Video, etc.) ...*
62
+
63
+
64
+ ## English
65
+
66
+
67
+ ## Finnish
68
+
69
+ #### F5-TTS Base @ fi @ AsmoKoskinen
70
+ |Model|🤗Hugging Face|Data|Model License|
71
+ |:---:|:------------:|:-----------:|:-------------:|
72
+ |F5-TTS Base|[ckpt & vocab](https://huggingface.co/AsmoKoskinen/F5-TTS_Finnish_Model)|[Common Voice](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0), [Vox Populi](https://huggingface.co/datasets/facebook/voxpopuli)|cc-by-nc-4.0|
73
+
74
+ ```bash
75
+ Model: hf://AsmoKoskinen/F5-TTS_Finnish_Model/model_common_voice_fi_vox_populi_fi_20241206.safetensors
76
+ Vocab: hf://AsmoKoskinen/F5-TTS_Finnish_Model/vocab.txt
77
+ Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
78
+ ```
79
+
80
+
81
+ ## French
82
+
83
+ #### F5-TTS Base @ fr @ RASPIAUDIO
84
+ |Model|🤗Hugging Face|Data (Hours)|Model License|
85
+ |:---:|:------------:|:-----------:|:-------------:|
86
+ |F5-TTS Base|[ckpt & vocab](https://huggingface.co/RASPIAUDIO/F5-French-MixedSpeakers-reduced)|[LibriVox](https://librivox.org/)|cc-by-nc-4.0|
87
+
88
+ ```bash
89
+ Model: hf://RASPIAUDIO/F5-French-MixedSpeakers-reduced/model_last_reduced.pt
90
+ Vocab: hf://RASPIAUDIO/F5-French-MixedSpeakers-reduced/vocab.txt
91
+ Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
92
+ ```
93
+
94
+ - [Online Inference with Hugging Face Space](https://huggingface.co/spaces/RASPIAUDIO/f5-tts_french).
95
+ - [Tutorial video to train a new language model](https://www.youtube.com/watch?v=UO4usaOojys).
96
+ - [Discussion about this training can be found here](https://github.com/SWivid/F5-TTS/issues/434).
97
+
98
+
99
+ ## Hindi
100
+
101
+ #### F5-TTS Small @ hi @ SPRINGLab
102
+ |Model|🤗Hugging Face|Data (Hours)|Model License|
103
+ |:---:|:------------:|:-----------:|:-------------:|
104
+ |F5-TTS Small|[ckpt & vocab](https://huggingface.co/SPRINGLab/F5-Hindi-24KHz)|[IndicTTS Hi](https://huggingface.co/datasets/SPRINGLab/IndicTTS-Hindi) & [IndicVoices-R Hi](https://huggingface.co/datasets/SPRINGLab/IndicVoices-R_Hindi) |cc-by-4.0|
105
+
106
+ ```bash
107
+ Model: hf://SPRINGLab/F5-Hindi-24KHz/model_2500000.safetensors
108
+ Vocab: hf://SPRINGLab/F5-Hindi-24KHz/vocab.txt
109
+ Config: {"dim": 768, "depth": 18, "heads": 12, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
110
+ ```
111
+
112
+ - Authors: SPRING Lab, Indian Institute of Technology, Madras
113
+ - Website: https://asr.iitm.ac.in/
114
+
115
+
116
+ ## Italian
117
+
118
+ #### F5-TTS Base @ it @ alien79
119
+ |Model|🤗Hugging Face|Data|Model License|
120
+ |:---:|:------------:|:-----------:|:-------------:|
121
+ |F5-TTS Base|[ckpt & vocab](https://huggingface.co/alien79/F5-TTS-italian)|[ylacombe/cml-tts](https://huggingface.co/datasets/ylacombe/cml-tts) |cc-by-nc-4.0|
122
+
123
+ ```bash
124
+ Model: hf://alien79/F5-TTS-italian/model_159600.safetensors
125
+ Vocab: hf://alien79/F5-TTS-italian/vocab.txt
126
+ Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
127
+ ```
128
+
129
+ - Trained by [Mithril Man](https://github.com/MithrilMan)
130
+ - Model details on [hf project home](https://huggingface.co/alien79/F5-TTS-italian)
131
+ - Open to collaborations to further improve the model
132
+
133
+
134
+ ## Japanese
135
+
136
+ #### F5-TTS Base @ ja @ Jmica
137
+ |Model|🤗Hugging Face|Data (Hours)|Model License|
138
+ |:---:|:------------:|:-----------:|:-------------:|
139
+ |F5-TTS Base|[ckpt & vocab](https://huggingface.co/Jmica/F5TTS/tree/main/JA_25498980)|[Emilia 1.7k JA](https://huggingface.co/datasets/amphion/Emilia-Dataset/tree/fc71e07) & [Galgame Dataset 5.4k](https://huggingface.co/datasets/OOPPEENN/Galgame_Dataset)|cc-by-nc-4.0|
140
+
141
+ ```bash
142
+ Model: hf://Jmica/F5TTS/JA_25498980/model_25498980.pt
143
+ Vocab: hf://Jmica/F5TTS/JA_25498980/vocab_updated.txt
144
+ Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
145
+ ```
146
+
147
+
148
+ ## Mandarin
149
+
150
+
151
+ ## Russian
152
+
153
+ #### F5-TTS Base @ ru @ HotDro4illa
154
+ |Model|🤗Hugging Face|Data (Hours)|Model License|
155
+ |:---:|:------------:|:-----------:|:-------------:|
156
+ |F5-TTS Base|[ckpt & vocab](https://huggingface.co/hotstone228/F5-TTS-Russian)|[Common voice](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0)|cc-by-nc-4.0|
157
+
158
+ ```bash
159
+ Model: hf://hotstone228/F5-TTS-Russian/model_last.safetensors
160
+ Vocab: hf://hotstone228/F5-TTS-Russian/vocab.txt
161
+ Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
162
+ ```
163
+ - Finetuned by [HotDro4illa](https://github.com/HotDro4illa)
164
+ - Any improvements are welcome
165
+
166
+
167
+ ## Spanish
168
+
169
+ #### F5-TTS Base @ es @ jpgallegoar
170
+ |Model|🤗Hugging Face|Data (Hours)|Model License|
171
+ |:---:|:------------:|:-----------:|:-------------:|
172
+ |F5-TTS Base|[ckpt & vocab](https://huggingface.co/jpgallegoar/F5-Spanish)|[Voxpopuli](https://huggingface.co/datasets/facebook/voxpopuli) & Crowdsourced & TEDx, 218 hours|cc0-1.0|
173
+
174
+ - @jpgallegoar [GitHub repo](https://github.com/jpgallegoar/Spanish-F5), Jupyter Notebook and Gradio usage for Spanish model.
Test-F5/src/f5_tts/infer/__pycache__/infer_cli.cpython-310.pyc ADDED
Binary file (7.19 kB). View file
 
Test-F5/src/f5_tts/infer/__pycache__/utils_infer.cpython-310.pyc ADDED
Binary file (12.9 kB). View file
 
Test-F5/src/f5_tts/infer/examples/basic/basic.toml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # F5TTS_v1_Base | E2TTS_Base
2
+ model = "F5TTS_v1_Base"
3
+ ref_audio = "infer/examples/basic/basic_ref_en.wav"
4
+ # If an empty "", transcribes the reference audio automatically.
5
+ ref_text = "Some call me nature, others call me mother nature."
6
+ gen_text = "I don't really care what you call me. I've been a silent spectator, watching species evolve, empires rise and fall. But always remember, I am mighty and enduring."
7
+ # File with text to generate. Ignores the text above.
8
+ gen_file = ""
9
+ remove_silence = false
10
+ output_dir = "tests"
11
+ output_file = "infer_cli_basic.wav"
Test-F5/src/f5_tts/infer/examples/basic/basic_ref_en.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0e22048e72414fcc1e6b6342e47a774d748a195ed34e4a5b3fcf416707f2b71
3
+ size 256018
Test-F5/src/f5_tts/infer/examples/basic/basic_ref_zh.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96724a113240d1f82c6ded1334122f0176b96c9226ccd3c919e625bcfd2a3ede
3
+ size 324558
Test-F5/src/f5_tts/infer/examples/multi/country.flac ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb15708b4b3875e37beec46591a5d89e1a9a63fdad3b8fe4a5c8738f4f554400
3
+ size 180321
Test-F5/src/f5_tts/infer/examples/multi/main.flac ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4abb1107771ce7e14926fde879b959dde6db6e572476b98684f04e45e978ab19
3
+ size 279219
Test-F5/src/f5_tts/infer/examples/multi/story.toml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # F5TTS_v1_Base | E2TTS_Base
2
+ model = "F5TTS_v1_Base"
3
+ ref_audio = "infer/examples/multi/main.flac"
4
+ # If an empty "", transcribes the reference audio automatically.
5
+ ref_text = ""
6
+ gen_text = ""
7
+ # File with text to generate. Ignores the text above.
8
+ gen_file = "infer/examples/multi/story.txt"
9
+ remove_silence = true
10
+ output_dir = "tests"
11
+ output_file = "infer_cli_story.wav"
12
+
13
+ [voices.town]
14
+ ref_audio = "infer/examples/multi/town.flac"
15
+ ref_text = ""
16
+
17
+ [voices.country]
18
+ ref_audio = "infer/examples/multi/country.flac"
19
+ ref_text = ""
20
+
Test-F5/src/f5_tts/infer/examples/multi/story.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ A Town Mouse and a Country Mouse were acquaintances, and the Country Mouse one day invited his friend to come and see him at his home in the fields. The Town Mouse came, and they sat down to a dinner of barleycorns and roots, the latter of which had a distinctly earthy flavour. The fare was not much to the taste of the guest, and presently he broke out with [town] “My poor dear friend, you live here no better than the ants. Now, you should just see how I fare! My larder is a regular horn of plenty. You must come and stay with me, and I promise you you shall live on the fat of the land.” [main] So when he returned to town he took the Country Mouse with him, and showed him into a larder containing flour and oatmeal and figs and honey and dates. The Country Mouse had never seen anything like it, and sat down to enjoy the luxuries his friend provided: but before they had well begun, the door of the larder opened and someone came in. The two Mice scampered off and hid themselves in a narrow and exceedingly uncomfortable hole. Presently, when all was quiet, they ventured out again; but someone else came in, and off they scuttled again. This was too much for the visitor. [country] “Goodbye,” [main] said he, [country] “I’m off. You live in the lap of luxury, I can see, but you are surrounded by dangers; whereas at home I can enjoy my simple dinner of roots and corn in peace.”
Test-F5/src/f5_tts/infer/examples/multi/town.flac ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7d069b8ebd5180c3b30fde5d378f0a1ddac96722d62cf43537efc3c3f3a3ce8
3
+ size 229383
Test-F5/src/f5_tts/infer/examples/vocab.txt ADDED
@@ -0,0 +1,2545 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ !
3
+ "
4
+ #
5
+ $
6
+ %
7
+ &
8
+ '
9
+ (
10
+ )
11
+ *
12
+ +
13
+ ,
14
+ -
15
+ .
16
+ /
17
+ 0
18
+ 1
19
+ 2
20
+ 3
21
+ 4
22
+ 5
23
+ 6
24
+ 7
25
+ 8
26
+ 9
27
+ :
28
+ ;
29
+ =
30
+ >
31
+ ?
32
+ @
33
+ A
34
+ B
35
+ C
36
+ D
37
+ E
38
+ F
39
+ G
40
+ H
41
+ I
42
+ J
43
+ K
44
+ L
45
+ M
46
+ N
47
+ O
48
+ P
49
+ Q
50
+ R
51
+ S
52
+ T
53
+ U
54
+ V
55
+ W
56
+ X
57
+ Y
58
+ Z
59
+ [
60
+ \
61
+ ]
62
+ _
63
+ a
64
+ a1
65
+ ai1
66
+ ai2
67
+ ai3
68
+ ai4
69
+ an1
70
+ an3
71
+ an4
72
+ ang1
73
+ ang2
74
+ ang4
75
+ ao1
76
+ ao2
77
+ ao3
78
+ ao4
79
+ b
80
+ ba
81
+ ba1
82
+ ba2
83
+ ba3
84
+ ba4
85
+ bai1
86
+ bai2
87
+ bai3
88
+ bai4
89
+ ban1
90
+ ban2
91
+ ban3
92
+ ban4
93
+ bang1
94
+ bang2
95
+ bang3
96
+ bang4
97
+ bao1
98
+ bao2
99
+ bao3
100
+ bao4
101
+ bei
102
+ bei1
103
+ bei2
104
+ bei3
105
+ bei4
106
+ ben1
107
+ ben2
108
+ ben3
109
+ ben4
110
+ beng
111
+ beng1
112
+ beng2
113
+ beng3
114
+ beng4
115
+ bi1
116
+ bi2
117
+ bi3
118
+ bi4
119
+ bian1
120
+ bian2
121
+ bian3
122
+ bian4
123
+ biao1
124
+ biao2
125
+ biao3
126
+ bie1
127
+ bie2
128
+ bie3
129
+ bie4
130
+ bin1
131
+ bin4
132
+ bing1
133
+ bing2
134
+ bing3
135
+ bing4
136
+ bo
137
+ bo1
138
+ bo2
139
+ bo3
140
+ bo4
141
+ bu2
142
+ bu3
143
+ bu4
144
+ c
145
+ ca1
146
+ cai1
147
+ cai2
148
+ cai3
149
+ cai4
150
+ can1
151
+ can2
152
+ can3
153
+ can4
154
+ cang1
155
+ cang2
156
+ cao1
157
+ cao2
158
+ cao3
159
+ ce4
160
+ cen1
161
+ cen2
162
+ ceng1
163
+ ceng2
164
+ ceng4
165
+ cha1
166
+ cha2
167
+ cha3
168
+ cha4
169
+ chai1
170
+ chai2
171
+ chan1
172
+ chan2
173
+ chan3
174
+ chan4
175
+ chang1
176
+ chang2
177
+ chang3
178
+ chang4
179
+ chao1
180
+ chao2
181
+ chao3
182
+ che1
183
+ che2
184
+ che3
185
+ che4
186
+ chen1
187
+ chen2
188
+ chen3
189
+ chen4
190
+ cheng1
191
+ cheng2
192
+ cheng3
193
+ cheng4
194
+ chi1
195
+ chi2
196
+ chi3
197
+ chi4
198
+ chong1
199
+ chong2
200
+ chong3
201
+ chong4
202
+ chou1
203
+ chou2
204
+ chou3
205
+ chou4
206
+ chu1
207
+ chu2
208
+ chu3
209
+ chu4
210
+ chua1
211
+ chuai1
212
+ chuai2
213
+ chuai3
214
+ chuai4
215
+ chuan1
216
+ chuan2
217
+ chuan3
218
+ chuan4
219
+ chuang1
220
+ chuang2
221
+ chuang3
222
+ chuang4
223
+ chui1
224
+ chui2
225
+ chun1
226
+ chun2
227
+ chun3
228
+ chuo1
229
+ chuo4
230
+ ci1
231
+ ci2
232
+ ci3
233
+ ci4
234
+ cong1
235
+ cong2
236
+ cou4
237
+ cu1
238
+ cu4
239
+ cuan1
240
+ cuan2
241
+ cuan4
242
+ cui1
243
+ cui3
244
+ cui4
245
+ cun1
246
+ cun2
247
+ cun4
248
+ cuo1
249
+ cuo2
250
+ cuo4
251
+ d
252
+ da
253
+ da1
254
+ da2
255
+ da3
256
+ da4
257
+ dai1
258
+ dai2
259
+ dai3
260
+ dai4
261
+ dan1
262
+ dan2
263
+ dan3
264
+ dan4
265
+ dang1
266
+ dang2
267
+ dang3
268
+ dang4
269
+ dao1
270
+ dao2
271
+ dao3
272
+ dao4
273
+ de
274
+ de1
275
+ de2
276
+ dei3
277
+ den4
278
+ deng1
279
+ deng2
280
+ deng3
281
+ deng4
282
+ di1
283
+ di2
284
+ di3
285
+ di4
286
+ dia3
287
+ dian1
288
+ dian2
289
+ dian3
290
+ dian4
291
+ diao1
292
+ diao3
293
+ diao4
294
+ die1
295
+ die2
296
+ die4
297
+ ding1
298
+ ding2
299
+ ding3
300
+ ding4
301
+ diu1
302
+ dong1
303
+ dong3
304
+ dong4
305
+ dou1
306
+ dou2
307
+ dou3
308
+ dou4
309
+ du1
310
+ du2
311
+ du3
312
+ du4
313
+ duan1
314
+ duan2
315
+ duan3
316
+ duan4
317
+ dui1
318
+ dui4
319
+ dun1
320
+ dun3
321
+ dun4
322
+ duo1
323
+ duo2
324
+ duo3
325
+ duo4
326
+ e
327
+ e1
328
+ e2
329
+ e3
330
+ e4
331
+ ei2
332
+ en1
333
+ en4
334
+ er
335
+ er2
336
+ er3
337
+ er4
338
+ f
339
+ fa1
340
+ fa2
341
+ fa3
342
+ fa4
343
+ fan1
344
+ fan2
345
+ fan3
346
+ fan4
347
+ fang1
348
+ fang2
349
+ fang3
350
+ fang4
351
+ fei1
352
+ fei2
353
+ fei3
354
+ fei4
355
+ fen1
356
+ fen2
357
+ fen3
358
+ fen4
359
+ feng1
360
+ feng2
361
+ feng3
362
+ feng4
363
+ fo2
364
+ fou2
365
+ fou3
366
+ fu1
367
+ fu2
368
+ fu3
369
+ fu4
370
+ g
371
+ ga1
372
+ ga2
373
+ ga3
374
+ ga4
375
+ gai1
376
+ gai2
377
+ gai3
378
+ gai4
379
+ gan1
380
+ gan2
381
+ gan3
382
+ gan4
383
+ gang1
384
+ gang2
385
+ gang3
386
+ gang4
387
+ gao1
388
+ gao2
389
+ gao3
390
+ gao4
391
+ ge1
392
+ ge2
393
+ ge3
394
+ ge4
395
+ gei2
396
+ gei3
397
+ gen1
398
+ gen2
399
+ gen3
400
+ gen4
401
+ geng1
402
+ geng3
403
+ geng4
404
+ gong1
405
+ gong3
406
+ gong4
407
+ gou1
408
+ gou2
409
+ gou3
410
+ gou4
411
+ gu
412
+ gu1
413
+ gu2
414
+ gu3
415
+ gu4
416
+ gua1
417
+ gua2
418
+ gua3
419
+ gua4
420
+ guai1
421
+ guai2
422
+ guai3
423
+ guai4
424
+ guan1
425
+ guan2
426
+ guan3
427
+ guan4
428
+ guang1
429
+ guang2
430
+ guang3
431
+ guang4
432
+ gui1
433
+ gui2
434
+ gui3
435
+ gui4
436
+ gun3
437
+ gun4
438
+ guo1
439
+ guo2
440
+ guo3
441
+ guo4
442
+ h
443
+ ha1
444
+ ha2
445
+ ha3
446
+ hai1
447
+ hai2
448
+ hai3
449
+ hai4
450
+ han1
451
+ han2
452
+ han3
453
+ han4
454
+ hang1
455
+ hang2
456
+ hang4
457
+ hao1
458
+ hao2
459
+ hao3
460
+ hao4
461
+ he1
462
+ he2
463
+ he4
464
+ hei1
465
+ hen2
466
+ hen3
467
+ hen4
468
+ heng1
469
+ heng2
470
+ heng4
471
+ hong1
472
+ hong2
473
+ hong3
474
+ hong4
475
+ hou1
476
+ hou2
477
+ hou3
478
+ hou4
479
+ hu1
480
+ hu2
481
+ hu3
482
+ hu4
483
+ hua1
484
+ hua2
485
+ hua4
486
+ huai2
487
+ huai4
488
+ huan1
489
+ huan2
490
+ huan3
491
+ huan4
492
+ huang1
493
+ huang2
494
+ huang3
495
+ huang4
496
+ hui1
497
+ hui2
498
+ hui3
499
+ hui4
500
+ hun1
501
+ hun2
502
+ hun4
503
+ huo
504
+ huo1
505
+ huo2
506
+ huo3
507
+ huo4
508
+ i
509
+ j
510
+ ji1
511
+ ji2
512
+ ji3
513
+ ji4
514
+ jia
515
+ jia1
516
+ jia2
517
+ jia3
518
+ jia4
519
+ jian1
520
+ jian2
521
+ jian3
522
+ jian4
523
+ jiang1
524
+ jiang2
525
+ jiang3
526
+ jiang4
527
+ jiao1
528
+ jiao2
529
+ jiao3
530
+ jiao4
531
+ jie1
532
+ jie2
533
+ jie3
534
+ jie4
535
+ jin1
536
+ jin2
537
+ jin3
538
+ jin4
539
+ jing1
540
+ jing2
541
+ jing3
542
+ jing4
543
+ jiong3
544
+ jiu1
545
+ jiu2
546
+ jiu3
547
+ jiu4
548
+ ju1
549
+ ju2
550
+ ju3
551
+ ju4
552
+ juan1
553
+ juan2
554
+ juan3
555
+ juan4
556
+ jue1
557
+ jue2
558
+ jue4
559
+ jun1
560
+ jun4
561
+ k
562
+ ka1
563
+ ka2
564
+ ka3
565
+ kai1
566
+ kai2
567
+ kai3
568
+ kai4
569
+ kan1
570
+ kan2
571
+ kan3
572
+ kan4
573
+ kang1
574
+ kang2
575
+ kang4
576
+ kao1
577
+ kao2
578
+ kao3
579
+ kao4
580
+ ke1
581
+ ke2
582
+ ke3
583
+ ke4
584
+ ken3
585
+ keng1
586
+ kong1
587
+ kong3
588
+ kong4
589
+ kou1
590
+ kou2
591
+ kou3
592
+ kou4
593
+ ku1
594
+ ku2
595
+ ku3
596
+ ku4
597
+ kua1
598
+ kua3
599
+ kua4
600
+ kuai3
601
+ kuai4
602
+ kuan1
603
+ kuan2
604
+ kuan3
605
+ kuang1
606
+ kuang2
607
+ kuang4
608
+ kui1
609
+ kui2
610
+ kui3
611
+ kui4
612
+ kun1
613
+ kun3
614
+ kun4
615
+ kuo4
616
+ l
617
+ la
618
+ la1
619
+ la2
620
+ la3
621
+ la4
622
+ lai2
623
+ lai4
624
+ lan2
625
+ lan3
626
+ lan4
627
+ lang1
628
+ lang2
629
+ lang3
630
+ lang4
631
+ lao1
632
+ lao2
633
+ lao3
634
+ lao4
635
+ le
636
+ le1
637
+ le4
638
+ lei
639
+ lei1
640
+ lei2
641
+ lei3
642
+ lei4
643
+ leng1
644
+ leng2
645
+ leng3
646
+ leng4
647
+ li
648
+ li1
649
+ li2
650
+ li3
651
+ li4
652
+ lia3
653
+ lian2
654
+ lian3
655
+ lian4
656
+ liang2
657
+ liang3
658
+ liang4
659
+ liao1
660
+ liao2
661
+ liao3
662
+ liao4
663
+ lie1
664
+ lie2
665
+ lie3
666
+ lie4
667
+ lin1
668
+ lin2
669
+ lin3
670
+ lin4
671
+ ling2
672
+ ling3
673
+ ling4
674
+ liu1
675
+ liu2
676
+ liu3
677
+ liu4
678
+ long1
679
+ long2
680
+ long3
681
+ long4
682
+ lou1
683
+ lou2
684
+ lou3
685
+ lou4
686
+ lu1
687
+ lu2
688
+ lu3
689
+ lu4
690
+ luan2
691
+ luan3
692
+ luan4
693
+ lun1
694
+ lun2
695
+ lun4
696
+ luo1
697
+ luo2
698
+ luo3
699
+ luo4
700
+ lv2
701
+ lv3
702
+ lv4
703
+ lve3
704
+ lve4
705
+ m
706
+ ma
707
+ ma1
708
+ ma2
709
+ ma3
710
+ ma4
711
+ mai2
712
+ mai3
713
+ mai4
714
+ man1
715
+ man2
716
+ man3
717
+ man4
718
+ mang2
719
+ mang3
720
+ mao1
721
+ mao2
722
+ mao3
723
+ mao4
724
+ me
725
+ mei2
726
+ mei3
727
+ mei4
728
+ men
729
+ men1
730
+ men2
731
+ men4
732
+ meng
733
+ meng1
734
+ meng2
735
+ meng3
736
+ meng4
737
+ mi1
738
+ mi2
739
+ mi3
740
+ mi4
741
+ mian2
742
+ mian3
743
+ mian4
744
+ miao1
745
+ miao2
746
+ miao3
747
+ miao4
748
+ mie1
749
+ mie4
750
+ min2
751
+ min3
752
+ ming2
753
+ ming3
754
+ ming4
755
+ miu4
756
+ mo1
757
+ mo2
758
+ mo3
759
+ mo4
760
+ mou1
761
+ mou2
762
+ mou3
763
+ mu2
764
+ mu3
765
+ mu4
766
+ n
767
+ n2
768
+ na1
769
+ na2
770
+ na3
771
+ na4
772
+ nai2
773
+ nai3
774
+ nai4
775
+ nan1
776
+ nan2
777
+ nan3
778
+ nan4
779
+ nang1
780
+ nang2
781
+ nang3
782
+ nao1
783
+ nao2
784
+ nao3
785
+ nao4
786
+ ne
787
+ ne2
788
+ ne4
789
+ nei3
790
+ nei4
791
+ nen4
792
+ neng2
793
+ ni1
794
+ ni2
795
+ ni3
796
+ ni4
797
+ nian1
798
+ nian2
799
+ nian3
800
+ nian4
801
+ niang2
802
+ niang4
803
+ niao2
804
+ niao3
805
+ niao4
806
+ nie1
807
+ nie4
808
+ nin2
809
+ ning2
810
+ ning3
811
+ ning4
812
+ niu1
813
+ niu2
814
+ niu3
815
+ niu4
816
+ nong2
817
+ nong4
818
+ nou4
819
+ nu2
820
+ nu3
821
+ nu4
822
+ nuan3
823
+ nuo2
824
+ nuo4
825
+ nv2
826
+ nv3
827
+ nve4
828
+ o
829
+ o1
830
+ o2
831
+ ou1
832
+ ou2
833
+ ou3
834
+ ou4
835
+ p
836
+ pa1
837
+ pa2
838
+ pa4
839
+ pai1
840
+ pai2
841
+ pai3
842
+ pai4
843
+ pan1
844
+ pan2
845
+ pan4
846
+ pang1
847
+ pang2
848
+ pang4
849
+ pao1
850
+ pao2
851
+ pao3
852
+ pao4
853
+ pei1
854
+ pei2
855
+ pei4
856
+ pen1
857
+ pen2
858
+ pen4
859
+ peng1
860
+ peng2
861
+ peng3
862
+ peng4
863
+ pi1
864
+ pi2
865
+ pi3
866
+ pi4
867
+ pian1
868
+ pian2
869
+ pian4
870
+ piao1
871
+ piao2
872
+ piao3
873
+ piao4
874
+ pie1
875
+ pie2
876
+ pie3
877
+ pin1
878
+ pin2
879
+ pin3
880
+ pin4
881
+ ping1
882
+ ping2
883
+ po1
884
+ po2
885
+ po3
886
+ po4
887
+ pou1
888
+ pu1
889
+ pu2
890
+ pu3
891
+ pu4
892
+ q
893
+ qi1
894
+ qi2
895
+ qi3
896
+ qi4
897
+ qia1
898
+ qia3
899
+ qia4
900
+ qian1
901
+ qian2
902
+ qian3
903
+ qian4
904
+ qiang1
905
+ qiang2
906
+ qiang3
907
+ qiang4
908
+ qiao1
909
+ qiao2
910
+ qiao3
911
+ qiao4
912
+ qie1
913
+ qie2
914
+ qie3
915
+ qie4
916
+ qin1
917
+ qin2
918
+ qin3
919
+ qin4
920
+ qing1
921
+ qing2
922
+ qing3
923
+ qing4
924
+ qiong1
925
+ qiong2
926
+ qiu1
927
+ qiu2
928
+ qiu3
929
+ qu1
930
+ qu2
931
+ qu3
932
+ qu4
933
+ quan1
934
+ quan2
935
+ quan3
936
+ quan4
937
+ que1
938
+ que2
939
+ que4
940
+ qun2
941
+ r
942
+ ran2
943
+ ran3
944
+ rang1
945
+ rang2
946
+ rang3
947
+ rang4
948
+ rao2
949
+ rao3
950
+ rao4
951
+ re2
952
+ re3
953
+ re4
954
+ ren2
955
+ ren3
956
+ ren4
957
+ reng1
958
+ reng2
959
+ ri4
960
+ rong1
961
+ rong2
962
+ rong3
963
+ rou2
964
+ rou4
965
+ ru2
966
+ ru3
967
+ ru4
968
+ ruan2
969
+ ruan3
970
+ rui3
971
+ rui4
972
+ run4
973
+ ruo4
974
+ s
975
+ sa1
976
+ sa2
977
+ sa3
978
+ sa4
979
+ sai1
980
+ sai4
981
+ san1
982
+ san2
983
+ san3
984
+ san4
985
+ sang1
986
+ sang3
987
+ sang4
988
+ sao1
989
+ sao2
990
+ sao3
991
+ sao4
992
+ se4
993
+ sen1
994
+ seng1
995
+ sha1
996
+ sha2
997
+ sha3
998
+ sha4
999
+ shai1
1000
+ shai2
1001
+ shai3
1002
+ shai4
1003
+ shan1
1004
+ shan3
1005
+ shan4
1006
+ shang
1007
+ shang1
1008
+ shang3
1009
+ shang4
1010
+ shao1
1011
+ shao2
1012
+ shao3
1013
+ shao4
1014
+ she1
1015
+ she2
1016
+ she3
1017
+ she4
1018
+ shei2
1019
+ shen1
1020
+ shen2
1021
+ shen3
1022
+ shen4
1023
+ sheng1
1024
+ sheng2
1025
+ sheng3
1026
+ sheng4
1027
+ shi
1028
+ shi1
1029
+ shi2
1030
+ shi3
1031
+ shi4
1032
+ shou1
1033
+ shou2
1034
+ shou3
1035
+ shou4
1036
+ shu1
1037
+ shu2
1038
+ shu3
1039
+ shu4
1040
+ shua1
1041
+ shua2
1042
+ shua3
1043
+ shua4
1044
+ shuai1
1045
+ shuai3
1046
+ shuai4
1047
+ shuan1
1048
+ shuan4
1049
+ shuang1
1050
+ shuang3
1051
+ shui2
1052
+ shui3
1053
+ shui4
1054
+ shun3
1055
+ shun4
1056
+ shuo1
1057
+ shuo4
1058
+ si1
1059
+ si2
1060
+ si3
1061
+ si4
1062
+ song1
1063
+ song3
1064
+ song4
1065
+ sou1
1066
+ sou3
1067
+ sou4
1068
+ su1
1069
+ su2
1070
+ su4
1071
+ suan1
1072
+ suan4
1073
+ sui1
1074
+ sui2
1075
+ sui3
1076
+ sui4
1077
+ sun1
1078
+ sun3
1079
+ suo
1080
+ suo1
1081
+ suo2
1082
+ suo3
1083
+ t
1084
+ ta1
1085
+ ta2
1086
+ ta3
1087
+ ta4
1088
+ tai1
1089
+ tai2
1090
+ tai4
1091
+ tan1
1092
+ tan2
1093
+ tan3
1094
+ tan4
1095
+ tang1
1096
+ tang2
1097
+ tang3
1098
+ tang4
1099
+ tao1
1100
+ tao2
1101
+ tao3
1102
+ tao4
1103
+ te4
1104
+ teng2
1105
+ ti1
1106
+ ti2
1107
+ ti3
1108
+ ti4
1109
+ tian1
1110
+ tian2
1111
+ tian3
1112
+ tiao1
1113
+ tiao2
1114
+ tiao3
1115
+ tiao4
1116
+ tie1
1117
+ tie2
1118
+ tie3
1119
+ tie4
1120
+ ting1
1121
+ ting2
1122
+ ting3
1123
+ tong1
1124
+ tong2
1125
+ tong3
1126
+ tong4
1127
+ tou
1128
+ tou1
1129
+ tou2
1130
+ tou4
1131
+ tu1
1132
+ tu2
1133
+ tu3
1134
+ tu4
1135
+ tuan1
1136
+ tuan2
1137
+ tui1
1138
+ tui2
1139
+ tui3
1140
+ tui4
1141
+ tun1
1142
+ tun2
1143
+ tun4
1144
+ tuo1
1145
+ tuo2
1146
+ tuo3
1147
+ tuo4
1148
+ u
1149
+ v
1150
+ w
1151
+ wa
1152
+ wa1
1153
+ wa2
1154
+ wa3
1155
+ wa4
1156
+ wai1
1157
+ wai3
1158
+ wai4
1159
+ wan1
1160
+ wan2
1161
+ wan3
1162
+ wan4
1163
+ wang1
1164
+ wang2
1165
+ wang3
1166
+ wang4
1167
+ wei1
1168
+ wei2
1169
+ wei3
1170
+ wei4
1171
+ wen1
1172
+ wen2
1173
+ wen3
1174
+ wen4
1175
+ weng1
1176
+ weng4
1177
+ wo1
1178
+ wo2
1179
+ wo3
1180
+ wo4
1181
+ wu1
1182
+ wu2
1183
+ wu3
1184
+ wu4
1185
+ x
1186
+ xi1
1187
+ xi2
1188
+ xi3
1189
+ xi4
1190
+ xia1
1191
+ xia2
1192
+ xia4
1193
+ xian1
1194
+ xian2
1195
+ xian3
1196
+ xian4
1197
+ xiang1
1198
+ xiang2
1199
+ xiang3
1200
+ xiang4
1201
+ xiao1
1202
+ xiao2
1203
+ xiao3
1204
+ xiao4
1205
+ xie1
1206
+ xie2
1207
+ xie3
1208
+ xie4
1209
+ xin1
1210
+ xin2
1211
+ xin4
1212
+ xing1
1213
+ xing2
1214
+ xing3
1215
+ xing4
1216
+ xiong1
1217
+ xiong2
1218
+ xiu1
1219
+ xiu3
1220
+ xiu4
1221
+ xu
1222
+ xu1
1223
+ xu2
1224
+ xu3
1225
+ xu4
1226
+ xuan1
1227
+ xuan2
1228
+ xuan3
1229
+ xuan4
1230
+ xue1
1231
+ xue2
1232
+ xue3
1233
+ xue4
1234
+ xun1
1235
+ xun2
1236
+ xun4
1237
+ y
1238
+ ya
1239
+ ya1
1240
+ ya2
1241
+ ya3
1242
+ ya4
1243
+ yan1
1244
+ yan2
1245
+ yan3
1246
+ yan4
1247
+ yang1
1248
+ yang2
1249
+ yang3
1250
+ yang4
1251
+ yao1
1252
+ yao2
1253
+ yao3
1254
+ yao4
1255
+ ye1
1256
+ ye2
1257
+ ye3
1258
+ ye4
1259
+ yi
1260
+ yi1
1261
+ yi2
1262
+ yi3
1263
+ yi4
1264
+ yin1
1265
+ yin2
1266
+ yin3
1267
+ yin4
1268
+ ying1
1269
+ ying2
1270
+ ying3
1271
+ ying4
1272
+ yo1
1273
+ yong1
1274
+ yong2
1275
+ yong3
1276
+ yong4
1277
+ you1
1278
+ you2
1279
+ you3
1280
+ you4
1281
+ yu1
1282
+ yu2
1283
+ yu3
1284
+ yu4
1285
+ yuan1
1286
+ yuan2
1287
+ yuan3
1288
+ yuan4
1289
+ yue1
1290
+ yue4
1291
+ yun1
1292
+ yun2
1293
+ yun3
1294
+ yun4
1295
+ z
1296
+ za1
1297
+ za2
1298
+ za3
1299
+ zai1
1300
+ zai3
1301
+ zai4
1302
+ zan1
1303
+ zan2
1304
+ zan3
1305
+ zan4
1306
+ zang1
1307
+ zang4
1308
+ zao1
1309
+ zao2
1310
+ zao3
1311
+ zao4
1312
+ ze2
1313
+ ze4
1314
+ zei2
1315
+ zen3
1316
+ zeng1
1317
+ zeng4
1318
+ zha1
1319
+ zha2
1320
+ zha3
1321
+ zha4
1322
+ zhai1
1323
+ zhai2
1324
+ zhai3
1325
+ zhai4
1326
+ zhan1
1327
+ zhan2
1328
+ zhan3
1329
+ zhan4
1330
+ zhang1
1331
+ zhang2
1332
+ zhang3
1333
+ zhang4
1334
+ zhao1
1335
+ zhao2
1336
+ zhao3
1337
+ zhao4
1338
+ zhe
1339
+ zhe1
1340
+ zhe2
1341
+ zhe3
1342
+ zhe4
1343
+ zhen1
1344
+ zhen2
1345
+ zhen3
1346
+ zhen4
1347
+ zheng1
1348
+ zheng2
1349
+ zheng3
1350
+ zheng4
1351
+ zhi1
1352
+ zhi2
1353
+ zhi3
1354
+ zhi4
1355
+ zhong1
1356
+ zhong2
1357
+ zhong3
1358
+ zhong4
1359
+ zhou1
1360
+ zhou2
1361
+ zhou3
1362
+ zhou4
1363
+ zhu1
1364
+ zhu2
1365
+ zhu3
1366
+ zhu4
1367
+ zhua1
1368
+ zhua2
1369
+ zhua3
1370
+ zhuai1
1371
+ zhuai3
1372
+ zhuai4
1373
+ zhuan1
1374
+ zhuan2
1375
+ zhuan3
1376
+ zhuan4
1377
+ zhuang1
1378
+ zhuang4
1379
+ zhui1
1380
+ zhui4
1381
+ zhun1
1382
+ zhun2
1383
+ zhun3
1384
+ zhuo1
1385
+ zhuo2
1386
+ zi
1387
+ zi1
1388
+ zi2
1389
+ zi3
1390
+ zi4
1391
+ zong1
1392
+ zong2
1393
+ zong3
1394
+ zong4
1395
+ zou1
1396
+ zou2
1397
+ zou3
1398
+ zou4
1399
+ zu1
1400
+ zu2
1401
+ zu3
1402
+ zuan1
1403
+ zuan3
1404
+ zuan4
1405
+ zui2
1406
+ zui3
1407
+ zui4
1408
+ zun1
1409
+ zuo
1410
+ zuo1
1411
+ zuo2
1412
+ zuo3
1413
+ zuo4
1414
+ {
1415
+ ~
1416
+ ¡
1417
+ ¢
1418
+ £
1419
+ ¥
1420
+ §
1421
+ ¨
1422
+ ©
1423
+ «
1424
+ ®
1425
+ ¯
1426
+ °
1427
+ ±
1428
+ ²
1429
+ ³
1430
+ ´
1431
+ µ
1432
+ ·
1433
+ ¹
1434
+ º
1435
+ »
1436
+ ¼
1437
+ ½
1438
+ ¾
1439
+ ¿
1440
+ À
1441
+ Á
1442
+ Â
1443
+ Ã
1444
+ Ä
1445
+ Å
1446
+ Æ
1447
+ Ç
1448
+ È
1449
+ É
1450
+ Ê
1451
+ Í
1452
+ Î
1453
+ Ñ
1454
+ Ó
1455
+ Ö
1456
+ ×
1457
+ Ø
1458
+ Ú
1459
+ Ü
1460
+ Ý
1461
+ Þ
1462
+ ß
1463
+ à
1464
+ á
1465
+ â
1466
+ ã
1467
+ ä
1468
+ å
1469
+ æ
1470
+ ç
1471
+ è
1472
+ é
1473
+ ê
1474
+ ë
1475
+ ì
1476
+ í
1477
+ î
1478
+ ï
1479
+ ð
1480
+ ñ
1481
+ ò
1482
+ ó
1483
+ ô
1484
+ õ
1485
+ ö
1486
+ ø
1487
+ ù
1488
+ ú
1489
+ û
1490
+ ü
1491
+ ý
1492
+ Ā
1493
+ ā
1494
+ ă
1495
+ ą
1496
+ ć
1497
+ Č
1498
+ č
1499
+ Đ
1500
+ đ
1501
+ ē
1502
+ ė
1503
+ ę
1504
+ ě
1505
+ ĝ
1506
+ ğ
1507
+ ħ
1508
+ ī
1509
+ į
1510
+ İ
1511
+ ı
1512
+ Ł
1513
+ ł
1514
+ ń
1515
+ ņ
1516
+ ň
1517
+ ŋ
1518
+ Ō
1519
+ ō
1520
+ ő
1521
+ œ
1522
+ ř
1523
+ Ś
1524
+ ś
1525
+ Ş
1526
+ ş
1527
+ Š
1528
+ š
1529
+ Ť
1530
+ ť
1531
+ ũ
1532
+ ū
1533
+ ź
1534
+ Ż
1535
+ ż
1536
+ Ž
1537
+ ž
1538
+ ơ
1539
+ ư
1540
+ ǎ
1541
+ ǐ
1542
+ ǒ
1543
+ ǔ
1544
+ ǚ
1545
+ ș
1546
+ ț
1547
+ ɑ
1548
+ ɔ
1549
+ ɕ
1550
+ ə
1551
+ ɛ
1552
+ ɜ
1553
+ ɡ
1554
+ ɣ
1555
+ ɪ
1556
+ ɫ
1557
+ ɴ
1558
+ ɹ
1559
+ ɾ
1560
+ ʃ
1561
+ ʊ
1562
+ ʌ
1563
+ ʒ
1564
+ ʔ
1565
+ ʰ
1566
+ ʷ
1567
+ ʻ
1568
+ ʾ
1569
+ ʿ
1570
+ ˈ
1571
+ ː
1572
+ ˙
1573
+ ˜
1574
+ ˢ
1575
+ ́
1576
+ ̅
1577
+ Α
1578
+ Β
1579
+ Δ
1580
+ Ε
1581
+ Θ
1582
+ Κ
1583
+ Λ
1584
+ Μ
1585
+ Ξ
1586
+ Π
1587
+ Σ
1588
+ Τ
1589
+ Φ
1590
+ Χ
1591
+ Ψ
1592
+ Ω
1593
+ ά
1594
+ έ
1595
+ ή
1596
+ ί
1597
+ α
1598
+ β
1599
+ γ
1600
+ δ
1601
+ ε
1602
+ ζ
1603
+ η
1604
+ θ
1605
+ ι
1606
+ κ
1607
+ λ
1608
+ μ
1609
+ ν
1610
+ ξ
1611
+ ο
1612
+ π
1613
+ ρ
1614
+ ς
1615
+ σ
1616
+ τ
1617
+ υ
1618
+ φ
1619
+ χ
1620
+ ψ
1621
+ ω
1622
+ ϊ
1623
+ ό
1624
+ ύ
1625
+ ώ
1626
+ ϕ
1627
+ ϵ
1628
+ Ё
1629
+ А
1630
+ Б
1631
+ В
1632
+ Г
1633
+ Д
1634
+ Е
1635
+ Ж
1636
+ З
1637
+ И
1638
+ Й
1639
+ К
1640
+ Л
1641
+ М
1642
+ Н
1643
+ О
1644
+ П
1645
+ Р
1646
+ С
1647
+ Т
1648
+ У
1649
+ Ф
1650
+ Х
1651
+ Ц
1652
+ Ч
1653
+ Ш
1654
+ Щ
1655
+ Ы
1656
+ Ь
1657
+ Э
1658
+ Ю
1659
+ Я
1660
+ а
1661
+ б
1662
+ в
1663
+ г
1664
+ д
1665
+ е
1666
+ ж
1667
+ з
1668
+ и
1669
+ й
1670
+ к
1671
+ л
1672
+ м
1673
+ н
1674
+ о
1675
+ п
1676
+ р
1677
+ с
1678
+ т
1679
+ у
1680
+ ф
1681
+ х
1682
+ ц
1683
+ ч
1684
+ ш
1685
+ щ
1686
+ ъ
1687
+ ы
1688
+ ь
1689
+ э
1690
+ ю
1691
+ я
1692
+ ё
1693
+ і
1694
+ ְ
1695
+ ִ
1696
+ ֵ
1697
+ ֶ
1698
+ ַ
1699
+ ָ
1700
+ ֹ
1701
+ ּ
1702
+ ־
1703
+ ׁ
1704
+ א
1705
+ ב
1706
+ ג
1707
+ ד
1708
+ ה
1709
+ ו
1710
+ ז
1711
+ ח
1712
+ ט
1713
+ י
1714
+ כ
1715
+ ל
1716
+ ם
1717
+ מ
1718
+ ן
1719
+ נ
1720
+ ס
1721
+ ע
1722
+ פ
1723
+ ק
1724
+ ר
1725
+ ש
1726
+ ת
1727
+ أ
1728
+ ب
1729
+ ة
1730
+ ت
1731
+ ج
1732
+ ح
1733
+ د
1734
+ ر
1735
+ ز
1736
+ س
1737
+ ص
1738
+ ط
1739
+ ع
1740
+ ق
1741
+ ك
1742
+ ل
1743
+ م
1744
+ ن
1745
+ ه
1746
+ و
1747
+ ي
1748
+ َ
1749
+ ُ
1750
+ ِ
1751
+ ْ
1752
+
1753
+
1754
+
1755
+
1756
+
1757
+
1758
+
1759
+
1760
+
1761
+
1762
+
1763
+
1764
+
1765
+
1766
+
1767
+
1768
+
1769
+
1770
+
1771
+
1772
+
1773
+
1774
+
1775
+
1776
+
1777
+
1778
+
1779
+
1780
+
1781
+
1782
+
1783
+
1784
+
1785
+
1786
+
1787
+
1788
+
1789
+
1790
+
1791
+
1792
+
1793
+
1794
+
1795
+
1796
+
1797
+
1798
+
1799
+
1800
+ ế
1801
+
1802
+
1803
+
1804
+
1805
+
1806
+
1807
+
1808
+
1809
+
1810
+
1811
+
1812
+
1813
+
1814
+
1815
+
1816
+
1817
+
1818
+
1819
+
1820
+
1821
+
1822
+
1823
+
1824
+
1825
+
1826
+
1827
+
1828
+
1829
+
1830
+ ���
1831
+
1832
+
1833
+
1834
+
1835
+
1836
+
1837
+
1838
+
1839
+
1840
+
1841
+
1842
+
1843
+
1844
+
1845
+
1846
+
1847
+
1848
+
1849
+
1850
+
1851
+
1852
+
1853
+
1854
+
1855
+
1856
+
1857
+
1858
+
1859
+
1860
+
1861
+
1862
+
1863
+
1864
+
1865
+
1866
+
1867
+
1868
+
1869
+
1870
+
1871
+
1872
+
1873
+
1874
+
1875
+
1876
+
1877
+
1878
+
1879
+
1880
+
1881
+
1882
+
1883
+
1884
+
1885
+
1886
+
1887
+
1888
+
1889
+
1890
+
1891
+
1892
+
1893
+
1894
+
1895
+
1896
+
1897
+
1898
+
1899
+
1900
+
1901
+
1902
+
1903
+
1904
+
1905
+
1906
+
1907
+
1908
+
1909
+
1910
+
1911
+
1912
+
1913
+
1914
+
1915
+
1916
+
1917
+
1918
+
1919
+
1920
+
1921
+
1922
+
1923
+
1924
+
1925
+
1926
+
1927
+
1928
+
1929
+
1930
+
1931
+
1932
+
1933
+
1934
+
1935
+
1936
+
1937
+
1938
+
1939
+
1940
+
1941
+
1942
+
1943
+
1944
+
1945
+
1946
+
1947
+
1948
+
1949
+
1950
+
1951
+
1952
+
1953
+
1954
+
1955
+
1956
+
1957
+
1958
+
1959
+
1960
+
1961
+
1962
+
1963
+
1964
+
1965
+
1966
+
1967
+
1968
+
1969
+
1970
+
1971
+
1972
+
1973
+
1974
+
1975
+
1976
+
1977
+
1978
+
1979
+
1980
+
1981
+
1982
+
1983
+
1984
+
1985
+
1986
+
1987
+
1988
+
1989
+
1990
+
1991
+
1992
+
1993
+
1994
+
1995
+
1996
+
1997
+
1998
+
1999
+
2000
+
2001
+
2002
+
2003
+
2004
+
2005
+
2006
+
2007
+
2008
+
2009
+
2010
+
2011
+
2012
+
2013
+
2014
+
2015
+
2016
+
2017
+
2018
+
2019
+
2020
+
2021
+
2022
+
2023
+
2024
+
2025
+
2026
+
2027
+
2028
+
2029
+
2030
+
2031
+
2032
+
2033
+
2034
+
2035
+
2036
+
2037
+
2038
+
2039
+
2040
+
2041
+
2042
+
2043
+
2044
+
2045
+
2046
+
2047
+
2048
+
2049
+
2050
+
2051
+
2052
+
2053
+
2054
+
2055
+
2056
+
2057
+
2058
+
2059
+
2060
+
2061
+
2062
+
2063
+
2064
+
2065
+
2066
+
2067
+
2068
+
2069
+
2070
+
2071
+
2072
+
2073
+
2074
+
2075
+
2076
+
2077
+
2078
+
2079
+
2080
+
2081
+
2082
+
2083
+
2084
+
2085
+
2086
+
2087
+
2088
+
2089
+
2090
+
2091
+
2092
+
2093
+
2094
+
2095
+
2096
+
2097
+
2098
+
2099
+
2100
+
2101
+
2102
+
2103
+
2104
+
2105
+
2106
+
2107
+
2108
+
2109
+
2110
+
2111
+
2112
+
2113
+
2114
+
2115
+
2116
+
2117
+
2118
+
2119
+
2120
+
2121
+
2122
+
2123
+
2124
+
2125
+
2126
+
2127
+
2128
+
2129
+
2130
+
2131
+
2132
+
2133
+
2134
+
2135
+
2136
+
2137
+
2138
+
2139
+
2140
+
2141
+
2142
+
2143
+
2144
+
2145
+
2146
+
2147
+
2148
+
2149
+
2150
+
2151
+
2152
+
2153
+
2154
+
2155
+
2156
+
2157
+
2158
+
2159
+
2160
+
2161
+
2162
+
2163
+
2164
+
2165
+
2166
+
2167
+
2168
+
2169
+
2170
+
2171
+
2172
+
2173
+
2174
+
2175
+
2176
+
2177
+
2178
+
2179
+
2180
+
2181
+
2182
+
2183
+
2184
+
2185
+
2186
+
2187
+
2188
+
2189
+
2190
+
2191
+
2192
+
2193
+
2194
+
2195
+
2196
+
2197
+
2198
+
2199
+
2200
+
2201
+
2202
+
2203
+
2204
+
2205
+
2206
+
2207
+
2208
+
2209
+
2210
+
2211
+
2212
+
2213
+
2214
+
2215
+
2216
+
2217
+
2218
+
2219
+
2220
+
2221
+
2222
+
2223
+
2224
+
2225
+
2226
+
2227
+
2228
+
2229
+
2230
+
2231
+
2232
+
2233
+
2234
+
2235
+
2236
+
2237
+
2238
+
2239
+
2240
+
2241
+
2242
+
2243
+
2244
+
2245
+
2246
+
2247
+
2248
+
2249
+
2250
+
2251
+
2252
+
2253
+
2254
+
2255
+
2256
+
2257
+
2258
+
2259
+
2260
+
2261
+
2262
+
2263
+
2264
+
2265
+
2266
+
2267
+
2268
+
2269
+
2270
+
2271
+
2272
+
2273
+
2274
+
2275
+
2276
+
2277
+
2278
+
2279
+
2280
+
2281
+
2282
+
2283
+
2284
+
2285
+
2286
+
2287
+
2288
+
2289
+
2290
+
2291
+
2292
+
2293
+
2294
+
2295
+
2296
+
2297
+
2298
+
2299
+
2300
+
2301
+
2302
+
2303
+
2304
+
2305
+
2306
+
2307
+
2308
+
2309
+
2310
+
2311
+
2312
+
2313
+
2314
+
2315
+
2316
+
2317
+
2318
+
2319
+
2320
+
2321
+
2322
+
2323
+
2324
+
2325
+
2326
+
2327
+
2328
+
2329
+
2330
+
2331
+
2332
+
2333
+
2334
+
2335
+
2336
+
2337
+
2338
+
2339
+
2340
+
2341
+
2342
+
2343
+
2344
+
2345
+
2346
+
2347
+
2348
+
2349
+
2350
+
2351
+
2352
+
2353
+
2354
+
2355
+
2356
+
2357
+
2358
+
2359
+
2360
+
2361
+
2362
+
2363
+
2364
+
2365
+
2366
+
2367
+
2368
+
2369
+
2370
+
2371
+
2372
+
2373
+
2374
+
2375
+
2376
+
2377
+
2378
+
2379
+
2380
+
2381
+
2382
+
2383
+
2384
+
2385
+
2386
+
2387
+
2388
+
2389
+
2390
+
2391
+
2392
+
2393
+
2394
+
2395
+
2396
+
2397
+
2398
+
2399
+
2400
+
2401
+
2402
+
2403
+
2404
+
2405
+
2406
+
2407
+
2408
+
2409
+
2410
+
2411
+
2412
+
2413
+
2414
+
2415
+
2416
+
2417
+
2418
+
2419
+
2420
+
2421
+
2422
+
2423
+
2424
+
2425
+
2426
+
2427
+
2428
+
2429
+
2430
+
2431
+
2432
+
2433
+
2434
+
2435
+
2436
+
2437
+
2438
+
2439
+
2440
+
2441
+
2442
+
2443
+
2444
+
2445
+
2446
+
2447
+
2448
+
2449
+
2450
+
2451
+
2452
+
2453
+
2454
+
2455
+
2456
+
2457
+
2458
+
2459
+
2460
+
2461
+
2462
+
2463
+
2464
+
2465
+
2466
+
2467
+
2468
+
2469
+
2470
+
2471
+
2472
+
2473
+
2474
+
2475
+
2476
+
2477
+
2478
+
2479
+
2480
+
2481
+
2482
+
2483
+
2484
+
2485
+
2486
+
2487
+
2488
+
2489
+
2490
+
2491
+
2492
+
2493
+
2494
+
2495
+
2496
+
2497
+
2498
+
2499
+
2500
+
2501
+
2502
+
2503
+
2504
+
2505
+
2506
+
2507
+
2508
+
2509
+
2510
+
2511
+
2512
+
2513
+
2514
+
2515
+
2516
+
2517
+
2518
+
2519
+
2520
+
2521
+
2522
+
2523
+
2524
+
2525
+
2526
+
2527
+
2528
+
2529
+
2530
+
2531
+
2532
+
2533
+
2534
+
2535
+
2536
+
2537
+
2538
+
2539
+
2540
+
2541
+
2542
+
2543
+
2544
+
2545
+ 𠮶