File size: 6,950 Bytes
0fcbf28
3eeee98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d88bdc
3eeee98
 
 
 
5d2461b
 
cd81b30
 
3eeee98
 
 
aefaec7
3eeee98
 
 
 
 
cd81b30
3eeee98
 
 
 
 
 
 
 
 
aefaec7
3eeee98
9dc01a4
3eeee98
9dc01a4
3eeee98
9dc01a4
3eeee98
 
 
 
 
 
 
6be5772
3eeee98
6be5772
5d2461b
 
 
 
 
3eeee98
5d2461b
3eeee98
 
 
 
 
5d2461b
aefaec7
278802e
5d2461b
 
3eeee98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb8e9a3
 
3eeee98
 
 
 
cd81b30
 
 
 
bbebd26
 
 
 
 
 
 
 
 
4f21fc5
bbebd26
 
4f21fc5
bbebd26
 
 
5d2461b
 
1d88bdc
3eeee98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fcbf28
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
<!doctype html>
<html lang="en">
<head>
    <meta name="viewport" content="width=device-width" />
    <link rel="stylesheet" href="style.css" />
    <meta charset="UTF-8">
    <title>Match-TTS Onnx En001-English</title>
</head>
<body>
    <h1>Match-TTS Onnx En001-English(Faster GPU Version)</h1>
    <div>this example using Quantized version(lowquality and slow) because of Github Page 100MB limitation</div>
    <p><a href = "https://huggingface.co/Akjava/matcha_tts_common_voice_01_en_001">common_voice_01_en_001</a> - my trained model.you can create too!</p>
    <br>
    <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
    

    <script type="module">
        import { MatchaTTSRaw } from "./js-esm/matcha_tts_raw.js";
        import { webWavPlay } from "./js-esm/web_wav_play.js";
        import { arpa_to_ipa } from "./js-esm/arpa_to_ipa.js";
        import { loadCmudict } from "./js-esm/cmudict_loader.js";
        import { env,textToArpa} from "./js-esm/text_to_arpa.js";

        env.allowLocalModels = true;
        env.localModelPath = "./models/";
        env.backends.onnx.logLevel = "error";
        
        let matcha_tts_raw
        let cmudict ={}
        let speaking = false
        let total_infer_time=0
        let count_infer=0
        async function main(model_name="en001_ep6399_univ_simplify_q8") {
            console.log(model_name)
            if (speaking){
                console.log("speaking return")
            }
            
            speaking = true
            console.log("main called")
            if(!matcha_tts_raw){
                matcha_tts_raw = new MatchaTTSRaw()
                console.time("load model");
                await matcha_tts_raw.load_model('./models/matcha-tts/'+model_name+'.onnx',{ executionProviders: ['webgpu','wasm'] });
                
                console.timeEnd("load model");
                
                let cmudictReady = loadCmudict(cmudict,'./dictionaries/cmudict-0.7b')
                await cmudictReady

            }else{
                console.log("session exist skip load model")
            }
            const startTime = performance.now();
            const text =  document.getElementById('textInput').value
             console.log("### textToArpa call")
            const arpa_text = await textToArpa(cmudict,text)
            console.log("### arpa returned")
            const ipa_text = arpa_to_ipa(arpa_text).replace(/\s/g, "");
            //console.log(ipa_text)

            const spks = 0
            const speed = document.getElementById('speed').value
            const tempature = document.getElementById('temperature').value

            console.time("infer");
            const result = await matcha_tts_raw.infer(ipa_text, tempature, speed,spks);
            
            if (result!=null){
                console.timeEnd("infer");
              const endTime = performance.now();
              const infer_time = endTime-startTime
              total_infer_time+=infer_time
              count_infer += 1
              update_infer_bench1()
                webWavPlay(result)
              
               
            }
    
            speaking = false
        }
      function update_infer_bench1(){
      const avg = total_infer_time/count_infer
      const text = `Infer Count ${count_infer} avg infer-time ${avg.toFixed(2)} ms`;
      document.getElementById('result').innerText=text
      }
        function update_range(){
            const value = document.getElementById('spks').value
            let formattedNumber = value.toString().padStart(3, '0');
            document.getElementById('spks_label').textContent  = formattedNumber
        }
        function update_range2(){
            const value = document.getElementById('temperature').value
            //let formattedNumber = value.toString().padStart(3, '0');
            document.getElementById('tempature_label').textContent  = value//formattedNumber
        }
        function update_range3(){
            const value = document.getElementById('speed').value
            //let formattedNumber = value.toString().padStart(3, '0');
            document.getElementById('speed_label').textContent  = value//sformattedNumber
        }

        window.onload = async function(){
            //document.getElementById('textInput').onchange = main;
            document.getElementById('myButton').onclick = main;
            
            document.getElementById('temperature').onchange = update_range2
            document.getElementById('speed').onchange = update_range3
        }
        function loadModel(model_name){
        main(model_name)
        }

      function create_button(label, model_name) {
      // ボタンの作成
      const button = document.createElement('button');
      button.textContent = label;
      
      // クリックイベントハンドラの設定
      button.onclick = function() {
        loadModel(model_name);
      };
      }

      const bt1=create_button("ljspeech","ljspeech_sim")
      document.getElementById('tempature_label').appendChild(bt1)
    </script>
 <div id="buttons"></div>
  
    <div id="result"></div>
  <br><br>
    <input type="text" id="textInput"  value ="Hello Huggingface." placeholder="Enter some text here...">
    
    <button id="myButton">Text To Speak</button><br>
    

    <label for ="temperature" style="width: 110px;display: inline-block;">Temperature</label>
    <input type="range" id="temperature"  min="0" max="1.0" value="0.5" step="0.1"/>
    <label for ="temperature" id="tempature_label">0.5</label><br>

    <label for ="speed" style="width: 110px;display: inline-block;">Speed</label>
    <input type="range" id="speed"  min="0.1" max="2.0" value="1.0" step="0.1"/>
    <label for ="speed" id="speed_label">1.0</label>
    <br>
    <br>
    <div id="footer">
    <b>Credits</b><br>
    <a href="https://github.com/akjava/Matcha-TTS-Japanese" style="font-size: 9px" target="link">Matcha-TTS-Japanese</a> | 
    <a href = "http://www.udialogue.org/download/cstr-vctk-corpus.html" style="font-size: 9px"  target="link">CSTR VCTK Corpus</a> |
    <a href = "https://github.com/cmusphinx/cmudict" style="font-size: 9px"  target="link">CMUDict</a> |
    <a href = "https://huggingface.co/docs/transformers.js/index" style="font-size: 9px"  target="link">Transformer.js</a> |
    <a href = "https://huggingface.co/cisco-ai/mini-bart-g2p" style="font-size: 9px"  target="link">mini-bart-g2p</a> |
    <a href = "https://onnxruntime.ai/docs/get-started/with-javascript/web.html" style="font-size: 9px"  target="link">ONNXRuntime-Web</a> |
    <a href = "https://github.com/akjava/English-To-IPA-Collections" style="font-size: 9px"  target="link">English-To-IPA-Collections</a> |
    <a href ="https://huggingface.co/papers/2309.03199" style="font-size: 9px"  target="link">Matcha-TTS Paper</a>
    </div>
    
    
    
</body>
</html>