Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Commit 
							
							·
						
						826b5e0
	
1
								Parent(s):
							
							d6566ec
								
Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -56,9 +56,9 @@ device = ( | |
| 56 | 
             
                        else "cpu"
         | 
| 57 | 
             
                    )
         | 
| 58 | 
             
                )
         | 
| 59 | 
            -
             | 
| 60 | 
             
            BandList = {
         | 
| 61 | 
            -
                     | 
| 62 | 
             
                    "Afterglow":["蘭","モカ","ひまり","巴","つぐみ"],
         | 
| 63 | 
             
                    "HelloHappyWorld":["こころ","美咲","薫","花音","はぐみ"],
         | 
| 64 | 
             
                    "PastelPalettes":["彩","日菜","千聖","イヴ","麻弥"],
         | 
| @@ -86,9 +86,10 @@ def get_net_g(model_path: str, version: str, device: str, hps): | |
| 86 | 
             
                return net_g
         | 
| 87 |  | 
| 88 | 
             
            def get_text(text, language_str, hps, device):
         | 
|  | |
| 89 | 
             
                norm_text, phone, tone, word2ph = clean_text(text, language_str)
         | 
| 90 | 
             
                phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str)
         | 
| 91 | 
            -
             | 
| 92 | 
             
                if hps.data.add_blank:
         | 
| 93 | 
             
                    phone = commons.intersperse(phone, 0)
         | 
| 94 | 
             
                    tone = commons.intersperse(tone, 0)
         | 
| @@ -157,10 +158,12 @@ def infer( | |
| 157 | 
             
            ):
         | 
| 158 |  | 
| 159 | 
             
                language= 'JP' if is_japanese(text) else 'ZH'
         | 
|  | |
| 160 | 
             
                bert, ja_bert, en_bert, phones, tones, lang_ids = get_text(
         | 
| 161 | 
             
                    text, language, hps, device
         | 
| 162 | 
             
                )
         | 
| 163 | 
             
                emo = get_emo_(reference_audio, emotion)
         | 
|  | |
| 164 | 
             
                with torch.no_grad():
         | 
| 165 | 
             
                    x_tst = phones.to(device).unsqueeze(0)
         | 
| 166 | 
             
                    tones = tones.to(device).unsqueeze(0)
         | 
| @@ -170,7 +173,6 @@ def infer( | |
| 170 | 
             
                    en_bert = en_bert.to(device).unsqueeze(0)
         | 
| 171 | 
             
                    x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
         | 
| 172 | 
             
                    emo = emo.to(device).unsqueeze(0)
         | 
| 173 | 
            -
                    print(emo)
         | 
| 174 | 
             
                    del phones
         | 
| 175 | 
             
                    speakers = torch.LongTensor([hps.data.spk2id[sid]]).to(device)
         | 
| 176 | 
             
                    audio = (
         | 
| @@ -216,7 +218,7 @@ if __name__ == "__main__": | |
| 216 | 
             
                emotional_model = EmotionModel.from_pretrained(emotional_model_name).to(device)
         | 
| 217 | 
             
                languages = [ "Auto", "ZH", "JP"]
         | 
| 218 | 
             
                modelPaths = []
         | 
| 219 | 
            -
                for dirpath, dirnames, filenames in os.walk( | 
| 220 | 
             
                    for filename in filenames:
         | 
| 221 | 
             
                        modelPaths.append(os.path.join(dirpath, filename))
         | 
| 222 | 
             
                hps = utils.get_hparams_from_file('Data/Bushiroad/configs/config.json')
         | 
|  | |
| 56 | 
             
                        else "cpu"
         | 
| 57 | 
             
                    )
         | 
| 58 | 
             
                )
         | 
| 59 | 
            +
            device = "cpu"
         | 
| 60 | 
             
            BandList = {
         | 
| 61 | 
            +
                    "PoppinParty":["香澄","有咲","たえ","りみ","沙綾"],
         | 
| 62 | 
             
                    "Afterglow":["蘭","モカ","ひまり","巴","つぐみ"],
         | 
| 63 | 
             
                    "HelloHappyWorld":["こころ","美咲","薫","花音","はぐみ"],
         | 
| 64 | 
             
                    "PastelPalettes":["彩","日菜","千聖","イヴ","麻弥"],
         | 
|  | |
| 86 | 
             
                return net_g
         | 
| 87 |  | 
| 88 | 
             
            def get_text(text, language_str, hps, device):
         | 
| 89 | 
            +
                # 在此处实现当前版本的get_text
         | 
| 90 | 
             
                norm_text, phone, tone, word2ph = clean_text(text, language_str)
         | 
| 91 | 
             
                phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str)
         | 
| 92 | 
            +
             | 
| 93 | 
             
                if hps.data.add_blank:
         | 
| 94 | 
             
                    phone = commons.intersperse(phone, 0)
         | 
| 95 | 
             
                    tone = commons.intersperse(tone, 0)
         | 
|  | |
| 158 | 
             
            ):
         | 
| 159 |  | 
| 160 | 
             
                language= 'JP' if is_japanese(text) else 'ZH'
         | 
| 161 | 
            +
                print(language)
         | 
| 162 | 
             
                bert, ja_bert, en_bert, phones, tones, lang_ids = get_text(
         | 
| 163 | 
             
                    text, language, hps, device
         | 
| 164 | 
             
                )
         | 
| 165 | 
             
                emo = get_emo_(reference_audio, emotion)
         | 
| 166 | 
            +
                print(emo)
         | 
| 167 | 
             
                with torch.no_grad():
         | 
| 168 | 
             
                    x_tst = phones.to(device).unsqueeze(0)
         | 
| 169 | 
             
                    tones = tones.to(device).unsqueeze(0)
         | 
|  | |
| 173 | 
             
                    en_bert = en_bert.to(device).unsqueeze(0)
         | 
| 174 | 
             
                    x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
         | 
| 175 | 
             
                    emo = emo.to(device).unsqueeze(0)
         | 
|  | |
| 176 | 
             
                    del phones
         | 
| 177 | 
             
                    speakers = torch.LongTensor([hps.data.spk2id[sid]]).to(device)
         | 
| 178 | 
             
                    audio = (
         | 
|  | |
| 218 | 
             
                emotional_model = EmotionModel.from_pretrained(emotional_model_name).to(device)
         | 
| 219 | 
             
                languages = [ "Auto", "ZH", "JP"]
         | 
| 220 | 
             
                modelPaths = []
         | 
| 221 | 
            +
                for dirpath, dirnames, filenames in os.walk('Data/Bushiroad/models/'):
         | 
| 222 | 
             
                    for filename in filenames:
         | 
| 223 | 
             
                        modelPaths.append(os.path.join(dirpath, filename))
         | 
| 224 | 
             
                hps = utils.get_hparams_from_file('Data/Bushiroad/configs/config.json')
         | 
