Spaces:
Running
Running
Add random_select.take_lyric_continuation for singing a syllable with multiple successive pitches
Browse files- svs_utils.py +15 -5
svs_utils.py
CHANGED
|
@@ -227,7 +227,7 @@ def singmos_evaluation(predictor, wav_info, fs):
|
|
| 227 |
|
| 228 |
|
| 229 |
def estimate_sentence_length(query, config, song2note_lengths):
|
| 230 |
-
if config.melody_source
|
| 231 |
# random select a song from database, and return its value in the phrase_length column
|
| 232 |
# return phrase_length column and song name
|
| 233 |
song_name = random.choice(list(song2note_lengths.keys()))
|
|
@@ -238,7 +238,7 @@ def estimate_sentence_length(query, config, song2note_lengths):
|
|
| 238 |
raise NotImplementedError(f"melody source {config.melody_source} not supported")
|
| 239 |
|
| 240 |
|
| 241 |
-
def align_score_and_text(segment_iterator, lyric_ls, sybs, labels):
|
| 242 |
text = []
|
| 243 |
lyric_idx = 0
|
| 244 |
notes_info = []
|
|
@@ -261,6 +261,16 @@ def align_score_and_text(segment_iterator, lyric_ls, sybs, labels):
|
|
| 261 |
]
|
| 262 |
)
|
| 263 |
text.append(reference_note_lyric.strip("<>"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
else:
|
| 265 |
notes_info.append(
|
| 266 |
[
|
|
@@ -316,7 +326,7 @@ if __name__ == "__main__":
|
|
| 316 |
model_path="espnet/mixdata_svs_visinger2_spkembed_lang_pretrained",
|
| 317 |
cache_dir="cache",
|
| 318 |
device="cpu",
|
| 319 |
-
melody_source="random_select",
|
| 320 |
lang="zh",
|
| 321 |
)
|
| 322 |
|
|
@@ -331,12 +341,12 @@ if __name__ == "__main__":
|
|
| 331 |
|
| 332 |
# then, phrase_length info should be added to llm prompt, and get the answer lyrics from llm
|
| 333 |
# e.g. answer_text = "天气真好\n空气清新"
|
| 334 |
-
answer_text = "天气真好\n
|
| 335 |
lyric_ls, sybs, labels = svs_text_preprocessor(
|
| 336 |
config.model_path, answer_text, config.lang
|
| 337 |
)
|
| 338 |
segment_iterator = song_segment_iterator(song_db, metadata)
|
| 339 |
-
batch = align_score_and_text(segment_iterator, lyric_ls, sybs, labels)
|
| 340 |
singer_embedding = np.load(singer_embeddings[config.model_path]["singer2 (female)"])
|
| 341 |
lid = np.array([langs[config.lang]])
|
| 342 |
output_dict = model(batch, lids=lid, spembs=singer_embedding)
|
|
|
|
| 227 |
|
| 228 |
|
| 229 |
def estimate_sentence_length(query, config, song2note_lengths):
|
| 230 |
+
if config.melody_source.startswith("random_select"):
|
| 231 |
# random select a song from database, and return its value in the phrase_length column
|
| 232 |
# return phrase_length column and song name
|
| 233 |
song_name = random.choice(list(song2note_lengths.keys()))
|
|
|
|
| 238 |
raise NotImplementedError(f"melody source {config.melody_source} not supported")
|
| 239 |
|
| 240 |
|
| 241 |
+
def align_score_and_text(segment_iterator, lyric_ls, sybs, labels, config):
|
| 242 |
text = []
|
| 243 |
lyric_idx = 0
|
| 244 |
notes_info = []
|
|
|
|
| 261 |
]
|
| 262 |
)
|
| 263 |
text.append(reference_note_lyric.strip("<>"))
|
| 264 |
+
elif reference_note_lyric in ["-", "——"] and config.melody_source == "random_select.take_lyric_continuation":
|
| 265 |
+
notes_info.append(
|
| 266 |
+
[
|
| 267 |
+
note_start_time,
|
| 268 |
+
note_end_time,
|
| 269 |
+
reference_note_lyric,
|
| 270 |
+
note_midi,
|
| 271 |
+
text[-1],
|
| 272 |
+
]
|
| 273 |
+
)
|
| 274 |
else:
|
| 275 |
notes_info.append(
|
| 276 |
[
|
|
|
|
| 326 |
model_path="espnet/mixdata_svs_visinger2_spkembed_lang_pretrained",
|
| 327 |
cache_dir="cache",
|
| 328 |
device="cpu",
|
| 329 |
+
melody_source="random_select.take_lyric_continuation",
|
| 330 |
lang="zh",
|
| 331 |
)
|
| 332 |
|
|
|
|
| 341 |
|
| 342 |
# then, phrase_length info should be added to llm prompt, and get the answer lyrics from llm
|
| 343 |
# e.g. answer_text = "天气真好\n空气清新"
|
| 344 |
+
answer_text = "天气真好\n空气清新\n气温温和\n风和日丽\n天高气爽\n阳光明媚"
|
| 345 |
lyric_ls, sybs, labels = svs_text_preprocessor(
|
| 346 |
config.model_path, answer_text, config.lang
|
| 347 |
)
|
| 348 |
segment_iterator = song_segment_iterator(song_db, metadata)
|
| 349 |
+
batch = align_score_and_text(segment_iterator, lyric_ls, sybs, labels, config)
|
| 350 |
singer_embedding = np.load(singer_embeddings[config.model_path]["singer2 (female)"])
|
| 351 |
lid = np.array([langs[config.lang]])
|
| 352 |
output_dict = model(batch, lids=lid, spembs=singer_embedding)
|