diff --git a/ru/ru_RU/denis/medium/MODEL_CARD b/ru/ru_RU/denis/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..930f8b52307a2a3a24700fe15d52cd6a58e7e092 --- /dev/null +++ b/ru/ru_RU/denis/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for denis (medium) + +* Language: ru_RU (Russian, Russia) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://github.com/NabuCasa/voice-datasets +* License: CC0 + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/ru/ru_RU/denis/medium/ru_RU-denis-medium.onnx b/ru/ru_RU/denis/medium/ru_RU-denis-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..033b78cd23a48ab42159be1019edba1ceca0499b --- /dev/null +++ b/ru/ru_RU/denis/medium/ru_RU-denis-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15fab56e11a097858ee115545d0f697fc2a316c41a291a5362349fb870411b0a +size 63201294 diff --git a/ru/ru_RU/denis/medium/ru_RU-denis-medium.onnx.json b/ru/ru_RU/denis/medium/ru_RU-denis-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..298fb1cdb5deb29defaeb53df285c6d269839a3d --- /dev/null +++ b/ru/ru_RU/denis/medium/ru_RU-denis-medium.onnx.json @@ -0,0 +1,487 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "ru" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "espeak", + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ], + "↑": [ + 151 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "1.0.0", + "language": { + "code": "ru_RU", + "family": "ru", + "region": "RU", + "name_native": "Русский", + "name_english": "Russian", + "country_english": "Russia" + }, + "dataset": "denis" +} \ No newline at end of file diff --git a/ru/ru_RU/denis/medium/samples/speaker_0.mp3 b/ru/ru_RU/denis/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..e6bbc30b8f7e1f774abb29f197da3e157578e212 Binary files /dev/null and b/ru/ru_RU/denis/medium/samples/speaker_0.mp3 differ diff --git a/ru/ru_RU/dmitri/medium/MODEL_CARD b/ru/ru_RU/dmitri/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..fad059bbfed2e3448cc4de4ab7428dda1be7bd68 --- /dev/null +++ b/ru/ru_RU/dmitri/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for dmitri (medium) + +* Language: ru_RU (Russian, Russia) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://github.com/NabuCasa/voice-datasets +* License: CC0 + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/ru/ru_RU/dmitri/medium/ru_RU-dmitri-medium.onnx b/ru/ru_RU/dmitri/medium/ru_RU-dmitri-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9f92b87ee286884cfe34f4f352d037a32d5995c2 --- /dev/null +++ b/ru/ru_RU/dmitri/medium/ru_RU-dmitri-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f073356ebc4bd0f80c5af58df2953a5988bd5bdab1eb38635ce960b071fbefcb +size 63201294 diff --git a/ru/ru_RU/dmitri/medium/ru_RU-dmitri-medium.onnx.json b/ru/ru_RU/dmitri/medium/ru_RU-dmitri-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..89ff93dea9003ae8a364d48460c660c4f2a1ef01 --- /dev/null +++ b/ru/ru_RU/dmitri/medium/ru_RU-dmitri-medium.onnx.json @@ -0,0 +1,487 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "ru" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "espeak", + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ], + "↑": [ + 151 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "1.0.0", + "language": { + "code": "ru_RU", + "family": "ru", + "region": "RU", + "name_native": "Русский", + "name_english": "Russian", + "country_english": "Russia" + }, + "dataset": "dmitri" +} \ No newline at end of file diff --git a/ru/ru_RU/dmitri/medium/samples/speaker_0.mp3 b/ru/ru_RU/dmitri/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..3626d0cce94a6e5c31996d608e70e4c85cc9699c Binary files /dev/null and b/ru/ru_RU/dmitri/medium/samples/speaker_0.mp3 differ diff --git a/ru/ru_RU/irina/medium/ALIASES b/ru/ru_RU/irina/medium/ALIASES new file mode 100644 index 0000000000000000000000000000000000000000..b358c7ca12e8e230e923ed17001413e606159aca --- /dev/null +++ b/ru/ru_RU/irina/medium/ALIASES @@ -0,0 +1 @@ +ru-irinia-medium diff --git a/ru/ru_RU/irina/medium/MODEL_CARD b/ru/ru_RU/irina/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..20d6abef2a7f737b324ede38bf8422b5a5208bdc --- /dev/null +++ b/ru/ru_RU/irina/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for irina (medium) + +* Language: ru_RU (Russian, Russia) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://github.com/RHVoice/RHVoice +* License: Unknown + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/ru/ru_RU/irina/medium/ru_RU-irina-medium.onnx b/ru/ru_RU/irina/medium/ru_RU-irina-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0b0af08563ddb90c30573fd988d2f80cb8b85dad --- /dev/null +++ b/ru/ru_RU/irina/medium/ru_RU-irina-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff38212d23da300bbe3705c645e6e5b9475f0bfde01558eb17813e22acaaaaa +size 63201294 diff --git a/ru/ru_RU/irina/medium/ru_RU-irina-medium.onnx.json b/ru/ru_RU/irina/medium/ru_RU-irina-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..7b61766dc87a3b419855f7980e6ab14d61166412 --- /dev/null +++ b/ru/ru_RU/irina/medium/ru_RU-irina-medium.onnx.json @@ -0,0 +1,483 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "ru" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "1.0.0", + "language": { + "code": "ru_RU", + "family": "ru", + "region": "RU", + "name_native": "Русский", + "name_english": "Russian", + "country_english": "Russia" + }, + "dataset": "irina" +} \ No newline at end of file diff --git a/ru/ru_RU/irina/medium/samples/speaker_0.mp3 b/ru/ru_RU/irina/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ee3389cf344b8725244b82a2e8f023edfbd06043 Binary files /dev/null and b/ru/ru_RU/irina/medium/samples/speaker_0.mp3 differ diff --git a/ru/ru_RU/ruslan/medium/MODEL_CARD b/ru/ru_RU/ruslan/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..81aad6ac008f24b986f97431cc1da8a50531c3aa --- /dev/null +++ b/ru/ru_RU/ruslan/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for ruslan (medium) + +* Language: ru_RU (Russian, Russia) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://ruslan-corpus.github.io/ +* License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/ru/ru_RU/ruslan/medium/ru_RU-ruslan-medium.onnx b/ru/ru_RU/ruslan/medium/ru_RU-ruslan-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b5b94ba353364fca86f4a54af269a991f5a7eede --- /dev/null +++ b/ru/ru_RU/ruslan/medium/ru_RU-ruslan-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a5f88e0b20928064eb45d88e1daa21f8af62d18613580d32cbb4aed48dcf7f +size 63201294 diff --git a/ru/ru_RU/ruslan/medium/ru_RU-ruslan-medium.onnx.json b/ru/ru_RU/ruslan/medium/ru_RU-ruslan-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..c302bba5313f5054c5fbed0eeea60aac72a29553 --- /dev/null +++ b/ru/ru_RU/ruslan/medium/ru_RU-ruslan-medium.onnx.json @@ -0,0 +1,493 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "ru" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "espeak", + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ], + "↑": [ + 151 + ], + "̺": [ + 152 + ], + "̻": [ + 153 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "1.0.0", + "language": { + "code": "ru_RU", + "family": "ru", + "region": "RU", + "name_native": "Русский", + "name_english": "Russian", + "country_english": "Russia" + }, + "dataset": "ruslan" +} \ No newline at end of file diff --git a/ru/ru_RU/ruslan/medium/samples/speaker_0.mp3 b/ru/ru_RU/ruslan/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..14204472a0b2ccdc8227dd88f9373edd64e21e97 Binary files /dev/null and b/ru/ru_RU/ruslan/medium/samples/speaker_0.mp3 differ diff --git a/sk/sk_SK/lili/medium/MODEL_CARD b/sk/sk_SK/lili/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..9b10f15b1f7a2a904adeadc64ab68572395394c8 --- /dev/null +++ b/sk/sk_SK/lili/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for lili (medium) + +* Language: sk_SK (Slovak, Slovakia) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://github.com/NabuCasa/voice-datasets +* License: CC0 + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/sk/sk_SK/lili/medium/samples/speaker_0.mp3 b/sk/sk_SK/lili/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..10568ffba3a54322d4c0f4ea89dc9c5579d3962d Binary files /dev/null and b/sk/sk_SK/lili/medium/samples/speaker_0.mp3 differ diff --git a/sk/sk_SK/lili/medium/sk_SK-lili-medium.onnx b/sk/sk_SK/lili/medium/sk_SK-lili-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7fec245bb65573c249dbb0be383292d120067f57 --- /dev/null +++ b/sk/sk_SK/lili/medium/sk_SK-lili-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8e21603e0165252849efe0bcb3fbffd1b3193c36bd1f556e1106911e8015526 +size 63201294 diff --git a/sk/sk_SK/lili/medium/sk_SK-lili-medium.onnx.json b/sk/sk_SK/lili/medium/sk_SK-lili-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..b39144b92a9ec58ad80f83f752b143275c178354 --- /dev/null +++ b/sk/sk_SK/lili/medium/sk_SK-lili-medium.onnx.json @@ -0,0 +1,502 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "sk" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "espeak", + "phoneme_map": {}, + "phoneme_id_map": { + " ": [ + 3 + ], + "!": [ + 4 + ], + "\"": [ + 150 + ], + "#": [ + 149 + ], + "$": [ + 2 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "X": [ + 156 + ], + "^": [ + 1 + ], + "_": [ + 0 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "g": [ + 154 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʦ": [ + 155 + ], + "ʰ": [ + 145 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "ˤ": [ + 146 + ], + "̃": [ + 141 + ], + "̧": [ + 140 + ], + "̩": [ + 144 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̺": [ + 152 + ], + "̻": [ + 153 + ], + "β": [ + 125 + ], + "ε": [ + 147 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "↑": [ + 151 + ], + "↓": [ + 148 + ], + "ⱱ": [ + 129 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "1.0.0", + "language": { + "code": "sk_SK", + "family": "sk", + "region": "SK", + "name_native": "Slovenčina", + "name_english": "Slovak", + "country_english": "Slovakia" + }, + "dataset": "lili" +} \ No newline at end of file diff --git a/sr/sr_RS/serbski_institut/medium/MODEL_CARD b/sr/sr_RS/serbski_institut/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..7ef78a3c06753d2a3a51acd5b7508ec0b3fae43f --- /dev/null +++ b/sr/sr_RS/serbski_institut/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for serbski_institut (medium) + +* Language: sr_RS (Serbian, Serbia) +* Speakers: 2 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://github.com/marytts/serbski-institut-dsb-data +* License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/sr/sr_RS/serbski_institut/medium/samples/speaker_0.mp3 b/sr/sr_RS/serbski_institut/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..284eeb4630a4a31719629a16e8d4bdff79774a4f Binary files /dev/null and b/sr/sr_RS/serbski_institut/medium/samples/speaker_0.mp3 differ diff --git a/sr/sr_RS/serbski_institut/medium/samples/speaker_1.mp3 b/sr/sr_RS/serbski_institut/medium/samples/speaker_1.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..cad3dc1c627fcce0a25f560fca09a1c475e737db Binary files /dev/null and b/sr/sr_RS/serbski_institut/medium/samples/speaker_1.mp3 differ diff --git a/sr/sr_RS/serbski_institut/medium/sr_RS-serbski_institut-medium.onnx b/sr/sr_RS/serbski_institut/medium/sr_RS-serbski_institut-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..18507eb02664ed0c4e4717695fbeaa2be3a29859 --- /dev/null +++ b/sr/sr_RS/serbski_institut/medium/sr_RS-serbski_institut-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7003890cf596e653f660a4fd97fd17f57f1eceb6d9727abad9cd76d2fda0d80 +size 76733615 diff --git a/sr/sr_RS/serbski_institut/medium/sr_RS-serbski_institut-medium.onnx.json b/sr/sr_RS/serbski_institut/medium/sr_RS-serbski_institut-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..f12b695d094d0c294cfac34eef62ee74c57838e5 --- /dev/null +++ b/sr/sr_RS/serbski_institut/medium/sr_RS-serbski_institut-medium.onnx.json @@ -0,0 +1,505 @@ +{ + "piper_version": "1.0.0", + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "sr" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "espeak", + "phoneme_map": {}, + "phoneme_id_map": { + " ": [ + 3 + ], + "!": [ + 4 + ], + "\"": [ + 150 + ], + "#": [ + 149 + ], + "$": [ + 2 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "X": [ + 156 + ], + "^": [ + 1 + ], + "_": [ + 0 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "g": [ + 154 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʦ": [ + 155 + ], + "ʰ": [ + 145 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "ˤ": [ + 146 + ], + "̃": [ + 141 + ], + "̧": [ + 140 + ], + "̩": [ + 144 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̺": [ + 152 + ], + "̻": [ + 153 + ], + "β": [ + 125 + ], + "ε": [ + 147 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "↑": [ + 151 + ], + "↓": [ + 148 + ], + "ⱱ": [ + 129 + ] + }, + "num_symbols": 256, + "num_speakers": 2, + "speaker_id_map": { + "dsb": 0, + "hsb": 1 + }, + "language": { + "code": "sr_RS", + "family": "sr", + "region": "RS", + "name_native": "srpski", + "name_english": "Serbian", + "country_english": "Serbia" + }, + "dataset": "serbski_institut" +} \ No newline at end of file diff --git a/sv/sv_SE/nst/medium/MODEL_CARD b/sv/sv_SE/nst/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..2ccd0d637630294f1b09c96cb0df727ad785169d --- /dev/null +++ b/sv/sv_SE/nst/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for nst (medium) + +* Language: sv_SE (Swedish, Sweden) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://www.nb.no/sprakbanken/en/resource-catalogue/oai-nb-no-sbr-17/ +* License: CC0 + +## Training + +Trained from scratch by KBLab at The National Library of Sweden. diff --git a/sv/sv_SE/nst/medium/samples/speaker_0.mp3 b/sv/sv_SE/nst/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..bf9fa26b54e0998ee08532d636e475640f10a5de Binary files /dev/null and b/sv/sv_SE/nst/medium/samples/speaker_0.mp3 differ diff --git a/sv/sv_SE/nst/medium/sv_SE-nst-medium.onnx b/sv/sv_SE/nst/medium/sv_SE-nst-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..52debfb4c66b1aa2d051ad5ccbbb95435f53794a --- /dev/null +++ b/sv/sv_SE/nst/medium/sv_SE-nst-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df011f56825a59dd1efc080c38a65a1ef70407e60f63050e9246f43a3d7e471e +size 63104526 diff --git a/sv/sv_SE/nst/medium/sv_SE-nst-medium.onnx.json b/sv/sv_SE/nst/medium/sv_SE-nst-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..9766b9769436046a610a505fda925032e12a66b3 --- /dev/null +++ b/sv/sv_SE/nst/medium/sv_SE-nst-medium.onnx.json @@ -0,0 +1,420 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "sv" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ] + }, + "num_symbols": 130, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "0.2.0", + "language": { + "code": "sv_SE", + "family": "sv", + "region": "SE", + "name_native": "Svenska", + "name_english": "Swedish", + "country_english": "Sweden" + }, + "dataset": "nst" +} \ No newline at end of file diff --git a/sw/sw_CD/lanfrica/medium/MODEL_CARD b/sw/sw_CD/lanfrica/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..13dee66d516c1572d982790c7009be065e8e609c --- /dev/null +++ b/sw/sw_CD/lanfrica/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for lanfrica (medium) + +* Language: sw_CD (Swahili, Democratic Republic of the Congo) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://lanfrica.com/record/kiswahili-tts-dataset +* License: See URL + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/sw/sw_CD/lanfrica/medium/samples/speaker_0.mp3 b/sw/sw_CD/lanfrica/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..4eb591348e0d82052e093e22c67d99b7122f0ed0 Binary files /dev/null and b/sw/sw_CD/lanfrica/medium/samples/speaker_0.mp3 differ diff --git a/sw/sw_CD/lanfrica/medium/sw_CD-lanfrica-medium.onnx b/sw/sw_CD/lanfrica/medium/sw_CD-lanfrica-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3dc7dd3bfcd8943913035969f5a5bbbaf6e5fb61 --- /dev/null +++ b/sw/sw_CD/lanfrica/medium/sw_CD-lanfrica-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f195ed12ca5e7875114618e5f00207af364602e21ca78c8a6d3d7674f9259fa +size 63201294 diff --git a/sw/sw_CD/lanfrica/medium/sw_CD-lanfrica-medium.onnx.json b/sw/sw_CD/lanfrica/medium/sw_CD-lanfrica-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..f847f8bd62f70ee5dcee032dc366833b5acb3e29 --- /dev/null +++ b/sw/sw_CD/lanfrica/medium/sw_CD-lanfrica-medium.onnx.json @@ -0,0 +1,493 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "sw" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "espeak", + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ], + "↑": [ + 151 + ], + "̺": [ + 152 + ], + "̻": [ + 153 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "1.0.0", + "language": { + "code": "sw_CD", + "family": "sw", + "region": "CD", + "name_native": "Kiswahili", + "name_english": "Swahili", + "country_english": "Democratic Republic of the Congo" + }, + "dataset": "lanfrica" +} \ No newline at end of file diff --git a/tr/tr_TR/dfki/medium/MODEL_CARD b/tr/tr_TR/dfki/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..b56c295a8eaabd6fd34e1a7ad6eb335cdb902866 --- /dev/null +++ b/tr/tr_TR/dfki/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for dfki (medium) + +* Language: tr_TR (Turkish, Turkey) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://github.com/marytts/dfki-ot-data/ +* License: https://creativecommons.org/licenses/by-nc-sa/4.0/ + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/tr/tr_TR/dfki/medium/samples/speaker_0.mp3 b/tr/tr_TR/dfki/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..e035bf678eb01a479ca8e79e71609f14c89af7ff Binary files /dev/null and b/tr/tr_TR/dfki/medium/samples/speaker_0.mp3 differ diff --git a/tr/tr_TR/dfki/medium/tr_TR-dfki-medium.onnx b/tr/tr_TR/dfki/medium/tr_TR-dfki-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4b74e9c8975c0ab1bfd8ebf78643c02b662e94e9 --- /dev/null +++ b/tr/tr_TR/dfki/medium/tr_TR-dfki-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2844717f524ab965d3fe86e60562cbb601d3e456836efcc2196cc3a14112a8fb +size 63201294 diff --git a/tr/tr_TR/dfki/medium/tr_TR-dfki-medium.onnx.json b/tr/tr_TR/dfki/medium/tr_TR-dfki-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..ea4036df5c9e4e07b8c2f42acce4d77d0fb6afdc --- /dev/null +++ b/tr/tr_TR/dfki/medium/tr_TR-dfki-medium.onnx.json @@ -0,0 +1,498 @@ +{ + "piper_version": "1.2.0", + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "tr" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8, + "phoneme_silence": { + ",": 0.1, + ":": 0.1, + ";": 0.1 + } + }, + "phoneme_type": "espeak", + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ], + "↑": [ + 151 + ], + "̺": [ + 152 + ], + "̻": [ + 153 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "language": { + "code": "tr_TR", + "family": "tr", + "region": "TR", + "name_native": "Türkçe", + "name_english": "Turkish", + "country_english": "Turkey" + }, + "dataset": "dfki" +} diff --git a/tr/tr_TR/fahrettin/medium/MODEL_CARD b/tr/tr_TR/fahrettin/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..04eccf82cab96e61736440466e2654f8f2af36a9 --- /dev/null +++ b/tr/tr_TR/fahrettin/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for fahrettin (medium) + +* Language: tr_TR (Turkish, Turkey) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://github.com/NabuCasa/voice-datasets +* License: CC0 + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/tr/tr_TR/fahrettin/medium/samples/speaker_0.mp3 b/tr/tr_TR/fahrettin/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c48da79b4582adf94384609bf74d6a2f0842e879 Binary files /dev/null and b/tr/tr_TR/fahrettin/medium/samples/speaker_0.mp3 differ diff --git a/tr/tr_TR/fahrettin/medium/tr_TR-fahrettin-medium.onnx b/tr/tr_TR/fahrettin/medium/tr_TR-fahrettin-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..288d11c31f3aca58fbadd6264b7659347b2beeb7 --- /dev/null +++ b/tr/tr_TR/fahrettin/medium/tr_TR-fahrettin-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39081c47270180e8a0dfac69b07bf329fb6d039fcc1279dbe26c2daf2848b190 +size 63201294 diff --git a/tr/tr_TR/fahrettin/medium/tr_TR-fahrettin-medium.onnx.json b/tr/tr_TR/fahrettin/medium/tr_TR-fahrettin-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..b495964d27195aae03726391b35a6ab1a513859f --- /dev/null +++ b/tr/tr_TR/fahrettin/medium/tr_TR-fahrettin-medium.onnx.json @@ -0,0 +1,508 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "tr" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "espeak", + "phoneme_map": {}, + "phoneme_id_map": { + " ": [ + 3 + ], + "!": [ + 4 + ], + "\"": [ + 150 + ], + "#": [ + 149 + ], + "$": [ + 2 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "X": [ + 156 + ], + "^": [ + 1 + ], + "_": [ + 0 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "g": [ + 154 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʦ": [ + 155 + ], + "ʰ": [ + 145 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "ˤ": [ + 146 + ], + "̃": [ + 141 + ], + "̊": [ + 158 + ], + "̝": [ + 157 + ], + "̧": [ + 140 + ], + "̩": [ + 144 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̺": [ + 152 + ], + "̻": [ + 153 + ], + "β": [ + 125 + ], + "ε": [ + 147 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "↑": [ + 151 + ], + "↓": [ + 148 + ], + "ⱱ": [ + 129 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "1.0.0", + "language": { + "code": "tr_TR", + "family": "tr", + "region": "TR", + "name_native": "Türkçe", + "name_english": "Turkish", + "country_english": "Turkey" + }, + "dataset": "fahrettin" +} \ No newline at end of file diff --git a/uk/uk_UA/lada/x_low/ALIASES b/uk/uk_UA/lada/x_low/ALIASES new file mode 100644 index 0000000000000000000000000000000000000000..85769dc1e1d5b599c443154d31a61033599c9fb1 --- /dev/null +++ b/uk/uk_UA/lada/x_low/ALIASES @@ -0,0 +1 @@ +uk-lada-x-low diff --git a/uk/uk_UA/lada/x_low/MODEL_CARD b/uk/uk_UA/lada/x_low/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..675dbdf51dae27313d6b88ae83e7165e2ba94ca1 --- /dev/null +++ b/uk/uk_UA/lada/x_low/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for lada (x_low) + +* Language: uk_UA (Ukranian, Ukraine) +* Speakers: 1 +* Quality: x_low +* Samplerate: 16,000Hz + +## Dataset + +* URL: https://github.com/egorsmkv/ukrainian-tts-datasets/tree/main/lada +* License: Apache 2.0 + +## Training + +Trained from scratch. diff --git a/uk/uk_UA/lada/x_low/samples/speaker_0.mp3 b/uk/uk_UA/lada/x_low/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..e331aef2d98ef0d1cbbd52727764887ba1409111 Binary files /dev/null and b/uk/uk_UA/lada/x_low/samples/speaker_0.mp3 differ diff --git a/uk/uk_UA/lada/x_low/uk_UA-lada-x_low.onnx b/uk/uk_UA/lada/x_low/uk_UA-lada-x_low.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b71c528cd4fca040052b2f4f51248f1abf0aebe1 --- /dev/null +++ b/uk/uk_UA/lada/x_low/uk_UA-lada-x_low.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d015d3fc19ea6bd8ac3ca1fd0c0fbac5054c609599ee73799235fd2cf5c786 +size 20628813 diff --git a/uk/uk_UA/lada/x_low/uk_UA-lada-x_low.onnx.json b/uk/uk_UA/lada/x_low/uk_UA-lada-x_low.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..8fcddc6df7b1ba857ce592a3ac0035375d57f2eb --- /dev/null +++ b/uk/uk_UA/lada/x_low/uk_UA-lada-x_low.onnx.json @@ -0,0 +1,420 @@ +{ + "audio": { + "sample_rate": 16000, + "quality": "x_low" + }, + "espeak": { + "voice": "uk" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ] + }, + "num_symbols": 130, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "0.2.0", + "language": { + "code": "uk_UA", + "family": "uk", + "region": "UA", + "name_native": "украї́нська мо́ва", + "name_english": "Ukrainian", + "country_english": "Ukraine" + }, + "dataset": "lada" +} \ No newline at end of file diff --git a/uk/uk_UA/ukrainian_tts/medium/MODEL_CARD b/uk/uk_UA/ukrainian_tts/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..1543557c937670df86ce66e29ecce1ecb462e471 --- /dev/null +++ b/uk/uk_UA/ukrainian_tts/medium/MODEL_CARD @@ -0,0 +1,16 @@ +# Model card for ukrainian_tts (medium) + +* Language: uk_UA (Ukrainian, Ukraine) +* Speakers: 3 +* Quality: medium +* Samplerate: 22,050Hz +* Phonemes: text + +## Dataset + +* URL: https://github.com/NabuCasa/voice-datasets +* License: CC0 + +## Training + +Trained from scratch. diff --git a/uk/uk_UA/ukrainian_tts/medium/samples/speaker_0.mp3 b/uk/uk_UA/ukrainian_tts/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..353b7ccdc27b3bae3d1adc6e3daa7568c5c700fe Binary files /dev/null and b/uk/uk_UA/ukrainian_tts/medium/samples/speaker_0.mp3 differ diff --git a/uk/uk_UA/ukrainian_tts/medium/samples/speaker_1.mp3 b/uk/uk_UA/ukrainian_tts/medium/samples/speaker_1.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..577f5680086ffad19620fe4a1c2381ebf5aa26b7 Binary files /dev/null and b/uk/uk_UA/ukrainian_tts/medium/samples/speaker_1.mp3 differ diff --git a/uk/uk_UA/ukrainian_tts/medium/samples/speaker_2.mp3 b/uk/uk_UA/ukrainian_tts/medium/samples/speaker_2.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c31e9a0c63d4c8939e5558c8b047f9db747350b7 Binary files /dev/null and b/uk/uk_UA/ukrainian_tts/medium/samples/speaker_2.mp3 differ diff --git a/uk/uk_UA/ukrainian_tts/medium/uk_UA-ukrainian_tts-medium.onnx b/uk/uk_UA/ukrainian_tts/medium/uk_UA-ukrainian_tts-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..355c89e8df1efc9df0fe0ce63e240196d6fd711e --- /dev/null +++ b/uk/uk_UA/ukrainian_tts/medium/uk_UA-ukrainian_tts-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7920419ac5f6fd8b6450520f24b52ed5a319cb53dd018fbcd71c9e079cbac84f +size 76735663 diff --git a/uk/uk_UA/ukrainian_tts/medium/uk_UA-ukrainian_tts-medium.onnx.json b/uk/uk_UA/ukrainian_tts/medium/uk_UA-ukrainian_tts-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..c13f1ec4dff625299bc5607f8d4b1a35f8bcb9d0 --- /dev/null +++ b/uk/uk_UA/ukrainian_tts/medium/uk_UA-ukrainian_tts-medium.onnx.json @@ -0,0 +1,182 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "uk" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "text", + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + ",": [ + 6 + ], + "-": [ + 7 + ], + ".": [ + 8 + ], + ":": [ + 9 + ], + ";": [ + 10 + ], + "?": [ + 11 + ], + "а": [ + 12 + ], + "б": [ + 13 + ], + "в": [ + 14 + ], + "г": [ + 15 + ], + "ґ": [ + 16 + ], + "д": [ + 17 + ], + "е": [ + 18 + ], + "є": [ + 19 + ], + "ж": [ + 20 + ], + "з": [ + 21 + ], + "и": [ + 22 + ], + "і": [ + 23 + ], + "ї": [ + 24 + ], + "й": [ + 25 + ], + "к": [ + 26 + ], + "л": [ + 27 + ], + "м": [ + 28 + ], + "н": [ + 29 + ], + "о": [ + 30 + ], + "п": [ + 31 + ], + "р": [ + 32 + ], + "с": [ + 33 + ], + "т": [ + 34 + ], + "у": [ + 35 + ], + "ф": [ + 36 + ], + "х": [ + 37 + ], + "ц": [ + 38 + ], + "ч": [ + 39 + ], + "ш": [ + 40 + ], + "щ": [ + 41 + ], + "ь": [ + 42 + ], + "ю": [ + 43 + ], + "я": [ + 44 + ], + "́": [ + 45 + ], + "̆": [ + 46 + ], + "̈": [ + 47 + ], + "—": [ + 48 + ] + }, + "num_symbols": 256, + "num_speakers": 3, + "speaker_id_map": { + "lada": 0, + "mykyta": 1, + "tetiana": 2 + }, + "piper_version": "1.0.0", + "language": { + "code": "uk_UA", + "family": "uk", + "region": "UA", + "name_native": "украї́нська мо́ва", + "name_english": "Ukrainian", + "country_english": "Ukraine" + }, + "dataset": "ukrainian_tts" +} \ No newline at end of file diff --git a/vi/vi_VN/25hours_single/low/ALIASES b/vi/vi_VN/25hours_single/low/ALIASES new file mode 100644 index 0000000000000000000000000000000000000000..f29a4c9b3324f2c31f02bcc234fb1d9ae6e017d4 --- /dev/null +++ b/vi/vi_VN/25hours_single/low/ALIASES @@ -0,0 +1 @@ +vi-25hours-single-low diff --git a/vi/vi_VN/25hours_single/low/MODEL_CARD b/vi/vi_VN/25hours_single/low/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..265071298d4122a889b183ac9a369ef5218b20f3 --- /dev/null +++ b/vi/vi_VN/25hours_single/low/MODEL_CARD @@ -0,0 +1,16 @@ +# Model card for 25hours_single (low) + +* Language: vi_VN (Vietnamese, Vietnam) +* Speakers: 1 +* Quality: low +* Samplerate: 16,000Hz + +## Dataset + +* Name: InfoRe Technology 1 +* URL: https://github.com/TensorSpeech/TensorFlowASR/blob/main/README.md#vietnamese +* License: Unknown + +## Training + +Finetuned from U.S. English Ryan voice (low quality). diff --git a/vi/vi_VN/25hours_single/low/samples/speaker_0.mp3 b/vi/vi_VN/25hours_single/low/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..aeb16c45b6502a5b694ddac31aa34c38d4acbc80 Binary files /dev/null and b/vi/vi_VN/25hours_single/low/samples/speaker_0.mp3 differ diff --git a/vi/vi_VN/25hours_single/low/vi_VN-25hours_single-low.onnx b/vi/vi_VN/25hours_single/low/vi_VN-25hours_single-low.onnx new file mode 100644 index 0000000000000000000000000000000000000000..de6c1b62de5ba6316f8ffb732bd5d2ad574dad40 --- /dev/null +++ b/vi/vi_VN/25hours_single/low/vi_VN-25hours_single-low.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:266945c4a80cb3301450c8ba6fcf6ba3542c56cf952a08a8ff3d2d4ff0ab5609 +size 63104526 diff --git a/vi/vi_VN/25hours_single/low/vi_VN-25hours_single-low.onnx.json b/vi/vi_VN/25hours_single/low/vi_VN-25hours_single-low.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..4c03d0ed428616af2fe85dd969fa42ee2efd048b --- /dev/null +++ b/vi/vi_VN/25hours_single/low/vi_VN-25hours_single-low.onnx.json @@ -0,0 +1,420 @@ +{ + "audio": { + "sample_rate": 16000, + "quality": "low" + }, + "espeak": { + "voice": "vi" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ] + }, + "num_symbols": 130, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "0.2.0", + "language": { + "code": "vi_VN", + "family": "vi", + "region": "VN", + "name_native": "Tiếng Việt", + "name_english": "Vietnamese", + "country_english": "Vietnam" + }, + "dataset": "25hours_single" +} \ No newline at end of file diff --git a/vi/vi_VN/vais1000/medium/MODEL_CARD b/vi/vi_VN/vais1000/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..d77035517b7e2e9446d43b2c83769890e14b000c --- /dev/null +++ b/vi/vi_VN/vais1000/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for vais1000 (medium) + +* Language: vi_VN (Vietnamese, Vietnam) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://ieee-dataport.org/documents/vais-1000-vietnamese-speech-synthesis-corpus +* License: https://creativecommons.org/licenses/by/4.0/ + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/vi/vi_VN/vais1000/medium/samples/speaker_0.mp3 b/vi/vi_VN/vais1000/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..2d3891684e4304c2da9a6dedb4d988e9fab5d90d Binary files /dev/null and b/vi/vi_VN/vais1000/medium/samples/speaker_0.mp3 differ diff --git a/vi/vi_VN/vais1000/medium/vi_VN-vais1000-medium.onnx b/vi/vi_VN/vais1000/medium/vi_VN-vais1000-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c9b9df265b718e05ed60899d377f98725a8dbc95 --- /dev/null +++ b/vi/vi_VN/vais1000/medium/vi_VN-vais1000-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7c89e2c85f4d1edc24b6120c18aaf1bda614f06b511567eb9c7c0de15e2dab +size 63201294 diff --git a/vi/vi_VN/vais1000/medium/vi_VN-vais1000-medium.onnx.json b/vi/vi_VN/vais1000/medium/vi_VN-vais1000-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..b11c938b28c4425d9be76beeb09d68f0d67157ba --- /dev/null +++ b/vi/vi_VN/vais1000/medium/vi_VN-vais1000-medium.onnx.json @@ -0,0 +1,492 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "vi" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ], + "↑": [ + 151 + ], + "̺": [ + 152 + ], + "̻": [ + 153 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "1.0.0", + "language": { + "code": "vi_VN", + "family": "vi", + "region": "VN", + "name_native": "Tiếng Việt", + "name_english": "Vietnamese", + "country_english": "Vietnam" + }, + "dataset": "vais1000" +} \ No newline at end of file diff --git a/vi/vi_VN/vivos/x_low/ALIASES b/vi/vi_VN/vivos/x_low/ALIASES new file mode 100644 index 0000000000000000000000000000000000000000..00cc3bf24692d22c8948969e054bddf6c9ee45e6 --- /dev/null +++ b/vi/vi_VN/vivos/x_low/ALIASES @@ -0,0 +1 @@ +vi-vivos-x-low diff --git a/vi/vi_VN/vivos/x_low/MODEL_CARD b/vi/vi_VN/vivos/x_low/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..d3166a2c48d549df8060326344199e5a46d753ab --- /dev/null +++ b/vi/vi_VN/vivos/x_low/MODEL_CARD @@ -0,0 +1,16 @@ +# Model card for vivos (x_low) + +* Language: vi_VN (Vietnamese, Vietnam) +* Speakers: 65 +* Quality: x_low +* Samplerate: 16,000Hz + +## Dataset + +* Name: InfoRe Technology 1 +* URL: https://ailab.hcmus.edu.vn/vivos/ +* License: CC BY-NC-SA 4.0 + +## Training + +Trained from scratch. diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_0.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..fad3f33e52dbdc76777a7544412a15a1e4de0dd0 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_0.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_1.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_1.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..d2bf9d2bca8e7bd8e0c5c0a0679dd8975882ca83 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_1.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_10.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_10.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..d6228017d74eae7696543c0f6d6cf3464e176ff8 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_10.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_11.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_11.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c2cf94a3959af74f5c823e723a807198249cd813 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_11.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_12.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_12.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a646086c7fd24dd0a4fb525ff60da83cc411e32b Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_12.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_13.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_13.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..62cde7f08789fc81bc4414ed29a46f3c5e507cb8 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_13.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_14.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_14.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..3cea09d04d6d5321728bd8a9a792069f460e0ead Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_14.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_15.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_15.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..e8cd49e3eef6331a0a23bed2d39691590c674b79 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_15.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_16.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_16.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..5ffb6c7546a5b0c9e0d423e034117a29b27eb1f4 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_16.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_17.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_17.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..17b5e23d960d49b4e031ccda04ca281cf0b9b876 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_17.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_18.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_18.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..bbb2b2fb0b0afe96986d9e02ffb617806d80d50e Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_18.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_19.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_19.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..7891103767c388327153d5c9b18978a321f039f8 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_19.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_2.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_2.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c01afdd25487eb886eb1d16be9b499e466e0fc7f Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_2.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_20.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_20.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..d35c9a845dcfff294123b5cee6f188e335be46e8 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_20.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_21.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_21.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..19404a82e7b7285816197897e9e3db781126870c Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_21.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_22.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_22.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..7e2bb38fc2af4c45c1db45f590564797c9293d71 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_22.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_23.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_23.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a3dc0fe3afe512b56d9d5ed034c5a200ef0b3187 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_23.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_24.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_24.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..2ca73f022293624c2ec5012fa0b12e7159021d74 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_24.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_25.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_25.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..f5ed8355154da61af67e434baad0f23b317aa905 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_25.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_26.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_26.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..f29d8ab8f3bbfa9b823bbea9b7526e0e08a03147 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_26.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_27.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_27.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..d2bb56c2490b14748af73ad9f78f4e7f970a035f Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_27.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_28.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_28.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..41b63596b9818ada79e689421c7c493f23a8f250 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_28.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_29.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_29.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..508a9e3041ef9c4f216a7cbc5ae7f7fc4464bd1c Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_29.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_3.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_3.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..01bb03e6db050eb443bab5a687eb8ddf63935eeb Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_3.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_30.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_30.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..131a63a2b330bb03d4fadd016a3cc4e1fc816165 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_30.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_31.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_31.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..9f70ee5709c58933cb958fe6a94f8e4fdbf014ff Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_31.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_32.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_32.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ca6c228db1428a47a4916224f488be83e7717525 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_32.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_33.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_33.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..500b5927cb63fc0ac148b97d827b7ec33c82d539 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_33.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_34.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_34.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..241f006c340db07df9b9e256162cc97711dd2e09 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_34.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_35.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_35.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..e954fe7b2eff32063228a15e65c822d6df7fcf15 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_35.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_36.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_36.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a3c2a251df196fff62608aa0da10c609da2c21d9 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_36.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_37.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_37.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c4122a090ed946a3ddff00652a6a598264980303 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_37.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_38.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_38.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..260229cd1ab025d554d212af749a9b7bdfaf90c3 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_38.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_39.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_39.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..90a922b2c2056a54047b9671c56dbe81b635b377 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_39.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_4.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_4.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..b245a6c6d6015f88a1ebcbec150e3e54f92775a0 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_4.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_40.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_40.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..fb7e948e0fec8799432f32001ed18cf5f44cc974 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_40.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_41.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_41.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..66876f895f0fef0229d3941f3d0b8d04cda8b5a4 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_41.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_42.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_42.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..9700ae318d089c728e84eea09f2e0f1dae008620 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_42.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_43.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_43.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..26efa137d6c7b1146649447cc700f74451b29133 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_43.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_44.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_44.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..7cb0e42e38642d9678eb8c8994726a022e9b980d Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_44.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_45.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_45.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..225faf53b7236bd578d44d37f4049e47ea371e71 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_45.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_46.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_46.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..cbc45ab60fd9b0771bc3582bbab08ba60383ac20 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_46.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_47.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_47.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ac2aa3301861a67c7e6d1e4f54d035a8ddb1b60a Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_47.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_48.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_48.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..f279c647f8ff5cb6eb439c2ece93a3bafafcf7b0 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_48.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_49.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_49.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..8c9665d161cfbab744355a0548d491b04815a0c8 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_49.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_5.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_5.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..5f6352b0564e32f007d9bea716cebeb0306f4dc4 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_5.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_50.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_50.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a444606f3b5ba392712f486b5bc279a0c7808410 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_50.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_51.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_51.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..207840bb900c73cd4c499173f39549062a4a3326 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_51.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_52.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_52.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..88a1de29062e538a5216bc37964434f5a4bb900d Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_52.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_53.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_53.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c89a5817c5c0b09aa192e5086b8df094f3786456 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_53.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_54.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_54.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..48cacb140f88078a2854bfd53c9e094743143bed Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_54.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_55.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_55.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..e5c2a85f4be184618fd55b476ea43ebef4f8825b Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_55.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_56.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_56.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a1912a3b27c18d2317765e849e1782c2c14d6511 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_56.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_57.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_57.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..ec7e7884e39200a26edfe6eb2de74dce3917dac5 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_57.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_58.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_58.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..11f0fd24bb807205e21ddd06e4719a792621bcc3 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_58.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_59.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_59.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..8832e08b91bd175ec152c0736b09144bb1ef95a3 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_59.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_6.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_6.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..adcc1864998bf32c5cbd840a6db93d796156c332 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_6.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_60.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_60.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..8c522087f57423a744a3fda25373890a89e18d75 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_60.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_61.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_61.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..bc262945ad8a1a8a90df9cbbb19a379f11daf39a Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_61.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_62.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_62.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..b2f70617fa6d9c5094dbc96f3abb07b97fb11b9a Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_62.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_63.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_63.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..c2d02ab5ed493fb6e43578153e03371268973485 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_63.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_64.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_64.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..cff6f7c1a15e8e2e11587ab21bb4aae87e9335c8 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_64.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_7.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_7.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..7d08cdeeb951a00a444ee39da353fcca3549d57c Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_7.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_8.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_8.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..4eaa681fff72b3ebdc218cee3ba0f202da83226c Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_8.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/samples/speaker_9.mp3 b/vi/vi_VN/vivos/x_low/samples/speaker_9.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..5b66b9a9d23996b3026918289b31c3256aa6e5e5 Binary files /dev/null and b/vi/vi_VN/vivos/x_low/samples/speaker_9.mp3 differ diff --git a/vi/vi_VN/vivos/x_low/vi_VN-vivos-x_low.onnx b/vi/vi_VN/vivos/x_low/vi_VN-vivos-x_low.onnx new file mode 100644 index 0000000000000000000000000000000000000000..edb732298bbecb0903c96467f1434c609b981cfb --- /dev/null +++ b/vi/vi_VN/vivos/x_low/vi_VN-vivos-x_low.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ab13374eb0862021a545befe7727aef59e16117f1c075aa9e0362237ecc98ae +size 27789413 diff --git a/vi/vi_VN/vivos/x_low/vi_VN-vivos-x_low.onnx.json b/vi/vi_VN/vivos/x_low/vi_VN-vivos-x_low.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..b8bbc6c471f66e35aaa356053a11646f55c5abb0 --- /dev/null +++ b/vi/vi_VN/vivos/x_low/vi_VN-vivos-x_low.onnx.json @@ -0,0 +1,486 @@ +{ + "audio": { + "sample_rate": 16000, + "quality": "x_low" + }, + "espeak": { + "voice": "vi" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ] + }, + "num_symbols": 130, + "num_speakers": 65, + "speaker_id_map": { + "VIVOSSPK13": 0, + "VIVOSSPK14": 1, + "VIVOSSPK15": 2, + "VIVOSSPK16": 3, + "VIVOSSPK17": 4, + "VIVOSSPK18": 5, + "VIVOSSPK19": 6, + "VIVOSSPK20": 7, + "VIVOSSPK21": 8, + "VIVOSSPK22": 9, + "VIVOSSPK26": 10, + "VIVOSSPK34": 11, + "VIVOSSPK40": 12, + "VIVOSSPK41": 13, + "VIVOSSPK42": 14, + "VIVOSSPK43": 15, + "VIVOSSPK44": 16, + "VIVOSSPK45": 17, + "VIVOSSPK46": 18, + "VIVOSSPK38": 19, + "VIVOSSPK31": 20, + "VIVOSSPK35": 21, + "VIVOSSPK01": 22, + "VIVOSSPK02": 23, + "VIVOSSPK03": 24, + "VIVOSSPK04": 25, + "VIVOSSPK05": 26, + "VIVOSSPK06": 27, + "VIVOSSPK07": 28, + "VIVOSSPK08": 29, + "VIVOSSPK09": 30, + "VIVOSSPK10": 31, + "VIVOSSPK11": 32, + "VIVOSSPK12": 33, + "VIVOSSPK27": 34, + "VIVOSSPK36": 35, + "VIVOSSPK33": 36, + "VIVOSSPK32": 37, + "VIVOSSPK29": 38, + "VIVOSSPK39": 39, + "VIVOSSPK25": 40, + "VIVOSSPK28": 41, + "VIVOSSPK30": 42, + "VIVOSSPK37": 43, + "VIVOSSPK23": 44, + "VIVOSSPK24": 45, + "VIVOSDEV02": 46, + "VIVOSDEV03": 47, + "VIVOSDEV01": 48, + "VIVOSDEV04": 49, + "VIVOSDEV05": 50, + "VIVOSDEV06": 51, + "VIVOSDEV07": 52, + "VIVOSDEV08": 53, + "VIVOSDEV09": 54, + "VIVOSDEV10": 55, + "VIVOSDEV11": 56, + "VIVOSDEV12": 57, + "VIVOSDEV13": 58, + "VIVOSDEV14": 59, + "VIVOSDEV15": 60, + "VIVOSDEV16": 61, + "VIVOSDEV17": 62, + "VIVOSDEV18": 63, + "VIVOSDEV19": 64 + }, + "piper_version": "0.2.0", + "language": { + "code": "vi_VN", + "family": "vi", + "region": "VN", + "name_native": "Tiếng Việt", + "name_english": "Vietnamese", + "country_english": "Vietnam" + }, + "dataset": "vivos" +} \ No newline at end of file diff --git a/zh/zh_CN/huayan/medium/MODEL_CARD b/zh/zh_CN/huayan/medium/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..02b4fba5ae8353637e93af3443ddddacf14c663e --- /dev/null +++ b/zh/zh_CN/huayan/medium/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for huayan (medium) + +* Language: zh_CN (Chinese, China) +* Speakers: 1 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://github.com/PlayVoice/HuaYan_TTS +* License: Unknown + +## Training + +Finetuned from U.S. English lessac voice (medium quality). diff --git a/zh/zh_CN/huayan/medium/samples/speaker_0.mp3 b/zh/zh_CN/huayan/medium/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..a6132c58e242dc842cb672fef58bb06a9361c7cb Binary files /dev/null and b/zh/zh_CN/huayan/medium/samples/speaker_0.mp3 differ diff --git a/zh/zh_CN/huayan/medium/zh_CN-huayan-medium.onnx b/zh/zh_CN/huayan/medium/zh_CN-huayan-medium.onnx new file mode 100644 index 0000000000000000000000000000000000000000..6bf247ed7ba3cbc066b8a3f13f6392aed956bf74 --- /dev/null +++ b/zh/zh_CN/huayan/medium/zh_CN-huayan-medium.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9929917bf8cabb26fd528ea44d3a6699c11e87317a14765312420be230be0f3d +size 63201294 diff --git a/zh/zh_CN/huayan/medium/zh_CN-huayan-medium.onnx.json b/zh/zh_CN/huayan/medium/zh_CN-huayan-medium.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..f0e6e6ed1f91fdfa0bac50cb6f0538606fde254b --- /dev/null +++ b/zh/zh_CN/huayan/medium/zh_CN-huayan-medium.onnx.json @@ -0,0 +1,487 @@ +{ + "audio": { + "sample_rate": 22050, + "quality": "medium" + }, + "espeak": { + "voice": "cmn" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "espeak", + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ], + "↑": [ + 151 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "1.0.0", + "language": { + "code": "zh_CN", + "family": "zh", + "region": "CN", + "name_native": "简体中文", + "name_english": "Chinese", + "country_english": "China" + }, + "dataset": "huayan" +} \ No newline at end of file diff --git a/zh/zh_CN/huayan/x_low/ALIASES b/zh/zh_CN/huayan/x_low/ALIASES new file mode 100644 index 0000000000000000000000000000000000000000..ab22d87f3a8b80fa0581d6eb860e8d2cd162a752 --- /dev/null +++ b/zh/zh_CN/huayan/x_low/ALIASES @@ -0,0 +1 @@ +zh-cn-huayan-x-low diff --git a/zh/zh_CN/huayan/x_low/MODEL_CARD b/zh/zh_CN/huayan/x_low/MODEL_CARD new file mode 100644 index 0000000000000000000000000000000000000000..00e271f38c3ea87822aed8298fa3a16fb1dacef4 --- /dev/null +++ b/zh/zh_CN/huayan/x_low/MODEL_CARD @@ -0,0 +1,15 @@ +# Model card for huayan (x_low) + +* Language: zh_CN (Chinese, China) +* Speakers: 1 +* Quality: x_low +* Samplerate: 16,000Hz + +## Dataset + +* URL: https://github.com/PlayVoice/HuaYan_TTS +* License: Unknown + +## Training + +Trained from scratch. diff --git a/zh/zh_CN/huayan/x_low/samples/speaker_0.mp3 b/zh/zh_CN/huayan/x_low/samples/speaker_0.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..3a5a1bf9be6f45404ab8e0f12a2c109b14e76f90 Binary files /dev/null and b/zh/zh_CN/huayan/x_low/samples/speaker_0.mp3 differ diff --git a/zh/zh_CN/huayan/x_low/zh_CN-huayan-x_low.onnx b/zh/zh_CN/huayan/x_low/zh_CN-huayan-x_low.onnx new file mode 100644 index 0000000000000000000000000000000000000000..dec8441e6ae48ba3eacf311daf72a456a184696a --- /dev/null +++ b/zh/zh_CN/huayan/x_low/zh_CN-huayan-x_low.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d30b143fac66d821a1285aa013295adf5cd129d3cc11d70334e51c7b20662c37 +size 20628813 diff --git a/zh/zh_CN/huayan/x_low/zh_CN-huayan-x_low.onnx.json b/zh/zh_CN/huayan/x_low/zh_CN-huayan-x_low.onnx.json new file mode 100644 index 0000000000000000000000000000000000000000..414a3c6c950e1a49c9e42fb2d95fffbdabbff7f0 --- /dev/null +++ b/zh/zh_CN/huayan/x_low/zh_CN-huayan-x_low.onnx.json @@ -0,0 +1,420 @@ +{ + "audio": { + "sample_rate": 16000, + "quality": "x_low" + }, + "espeak": { + "voice": "cmn" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ] + }, + "num_symbols": 130, + "num_speakers": 1, + "speaker_id_map": {}, + "piper_version": "0.2.0", + "language": { + "code": "zh_CN", + "family": "zh", + "region": "CN", + "name_native": "简体中文", + "name_english": "Chinese", + "country_english": "China" + }, + "dataset": "huayan" +} \ No newline at end of file