#================================================================ # https://huggingface.co/spaces/asigalov61/Lyrics-Morpher #================================================================ print('*' * 70) print('Loading Lyrics Morpher modules...') import os os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" import time import datetime from pytz import timezone import re import gradio as gr import spaces from transformers import AutoModelForCausalLM, AutoTokenizer from typing import List, Tuple print('*' * 70) print('Done!') print('*' * 70) #========================================================================================================== print('*' * 70) print('Loading model and tokenizer...') print('*' * 70) model_name = "asigalov61/Lyrics_Qwen2.5-0.5B-Instruct" model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype="auto", device_map="auto" ) print('*' * 70) tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) print('*' * 70) print('Done!') print('*' * 70) #========================================================================================================== def _split_into_blocks(lines: List[str]) -> Tuple[int, List[Tuple[List[str], int]]]: """ Splits `lines` into: - leading_blanks: number of blank lines before the first non-blank - blocks: a list of (block_lines, blank_count_after) where * block_lines is a list of consecutive non-blank lines * blank_count_after is how many blank lines follow that block """ i = 0 n = len(lines) # count leading blank lines leading_blanks = 0 while i < n and lines[i] == "": leading_blanks += 1 i += 1 blocks = [] while i < n: # collect non-blank lines block = [] while i < n and lines[i] != "": block.append(lines[i]) i += 1 # then count blank lines after this block blank_after = 0 while i < n and lines[i] == "": blank_after += 1 i += 1 blocks.append((block, blank_after)) return leading_blanks, blocks def _compare_line(tpl_line: str, txt_line: str) -> Tuple[int,int]: """ Compare two lines token-by-token. Returns (char_mismatches, word_mismatches). '@' in tpl_line → one uppercase letter [A–Z] '_' in tpl_line → one lowercase letter [a–z] all other chars must match exactly. """ char_mis = 0 word_mis = 0 tpl_tokens = tpl_line.split(" ") txt_tokens = txt_line.split(" ") # difference in token count if len(tpl_tokens) != len(txt_tokens): word_mis += abs(len(tpl_tokens) - len(txt_tokens)) # compare each pair for t_tok, x_tok in zip(tpl_tokens, txt_tokens): token_error = False L = min(len(t_tok), len(x_tok)) # char-by-char for j in range(L): p, c = t_tok[j], x_tok[j] if p == "@": if not ("A" <= c <= "Z"): char_mis += 1 token_error = True elif p == "_": if not ("a" <= c <= "z"): char_mis += 1 token_error = True else: if p != c: char_mis += 1 token_error = True # length difference if len(t_tok) != len(x_tok): char_mis += abs(len(t_tok) - len(x_tok)) token_error = True if token_error: word_mis += 1 return char_mis, word_mis def count_mismatches(template: str, text: str) -> Tuple[int,int]: """ Compares `template` vs `text` where: - '@' matches exactly one uppercase letter [A–Z] - '_' matches exactly one lowercase letter [a–z] - all other characters (spaces, punctuation, quotes, case) match exactly - each extra/missing newline counts as 1 char & 1 word mismatch, but does NOT shift subsequent line alignment. Returns (char_mismatches, word_mismatches). """ tpl_lines = template.splitlines() txt_lines = text.splitlines() # Split into blank‐line‐aware blocks tpl_lead, tpl_blocks = _split_into_blocks(tpl_lines) txt_lead, txt_blocks = _split_into_blocks(txt_lines) char_mis = 0 word_mis = 0 # 1) Leading blank line diff diff_lead = abs(tpl_lead - txt_lead) char_mis += diff_lead word_mis += diff_lead # 2) Compare block by block max_blocks = max(len(tpl_blocks), len(txt_blocks)) for i in range(max_blocks): # unpack or empty if i < len(tpl_blocks): tpl_block, tpl_blank_after = tpl_blocks[i] else: tpl_block, tpl_blank_after = [], 0 if i < len(txt_blocks): txt_block, txt_blank_after = txt_blocks[i] else: txt_block, txt_blank_after = [], 0 # a) compare lines in this block max_lines = max(len(tpl_block), len(txt_block)) for ln in range(max_lines): if ln < len(tpl_block) and ln < len(txt_block): c1, w1 = _compare_line(tpl_block[ln], txt_block[ln]) char_mis += c1 word_mis += w1 elif ln < len(tpl_block): # missing line in text word_mis += 1 # count all chars + one '\n' char_mis += len(tpl_block[ln]) + 1 else: # extra line in text word_mis += 1 char_mis += len(txt_block[ln]) + 1 # b) blank‐line diff after block diff_blank = abs(tpl_blank_after - txt_blank_after) char_mis += diff_blank word_mis += diff_blank return char_mis, word_mis #========================================================================================================== def get_lyrics_template(song_title, song_lyrics): if song_title: title = re.sub(r'\s+', ' ', re.sub(r'[^A-Za-z ]+', '', song_title.strip())).strip() else: title = 'Unknown Song' lines = [re.sub(r'\s+', ' ', re.sub(r'[^A-Za-z ]+', '', l.strip())).strip() if l else '\n' for l in song_lyrics.split('\n')] src = '' words = [] for a in title.split(): wor = '' for aa in a: if aa.isupper(): wor += '@' else: wor += '_' words.append(wor) title_str = ' '.join(words) src += 'Song title: "' + title_str + '"\n\n' src += 'Song lyrics:\n\n' for ln in lines: if ln != '\n': words = ln.split() for w in words: src += ''.join(['@' if a.isupper() else '_' for a in w]) + ' ' src = src.strip() src += '\n' return src #========================================================================================================== @spaces.GPU def Morph_Lyrics(input_title, input_lyrics): print('*' * 70) print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) start_time = time.time() print('=' * 70) print('Requested settings:') print('=' * 70) print('Input title:', input_title) print('Input lyrics:\n\n') print(input_lyrics) print('=' * 70) print('Processing lyrics...Please wait...') lyrics_template = get_lyrics_template(input_title, input_lyrics) print('Done!') print('=' * 70) print('Processing...Please wait...') messages = [ {"role": "system", "content": "Please fill in the words in the following song lyrics template. Thank you."}, {"role": "user", "content": lyrics_template} ] chat_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True ) model_inputs = tokenizer([chat_text], return_tensors="pt").to(model.device) print('Done!') print('=' * 70) print('Generating...') num_batches = 256 generated_ids = model.generate(**model_inputs, max_new_tokens=4096, do_sample=True, temperature=0.7, top_p=0.8, num_return_sequences=num_batches ) print('Done!') print('=' * 70) print('Post-processing...') output_tokens = [output_ids[len(input_ids):] for input_ids, output_ids in zip([model_inputs.input_ids] * num_batches, generated_ids)] responses = tokenizer.batch_decode(output_tokens, skip_special_tokens=True) final_responses = [] for r in responses: final_responses.append(r.split('\nassistant\n')[-1]) print('Done!') print('=' * 70) print('Selecting best response...') best_chars = 8192 best_words = 8192 best_response = '' for fr in final_responses: chars, words = count_mismatches(lyrics_template, fr) if chars < best_chars: best_chars = chars best_words = words best_response = fr print('Done!') print('=' * 70) print("Character mismatches:", best_chars) print("Word mismatches: ", best_words) output_stats = '' output_stats += 'Character mismatches: ' + str(best_chars) + '\n' output_stats += 'Word mismatches: ' + str(best_words) output_lyrics = best_response #======================================================== print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) print('-' * 70) print('Req execution time:', (time.time() - start_time), 'sec') print('*' * 70) #======================================================== return output_stats, output_lyrics #========================================================================================================== if __name__ == "__main__": PDT = timezone('US/Pacific') print('=' * 70) print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) print('=' * 70) app = gr.Blocks() with app: gr.Markdown("

Lyrics Morpher

") gr.Markdown("

Morph any lyrics into an exact variations with fine-tuned Qwen2.5 0.5B Instruct model

") input_title = gr.Textbox(label="Enter song title here", value="Nothing Else Matters") input_lyrics = gr.Textbox(label="Enter song lyrics here", value="So close no matter how far\nCould not be much more from the heart\nForever trusting who we are\nAnd nothing else matters") submit = gr.Button("Morph", variant="primary") gr.Markdown("## Morphing results") output_stats = gr.Textbox(label="Morphed lyrics stats") output_lyrics = gr.Textbox(label="Morphed lyrics") run_event = submit.click(Morph_Lyrics, [input_title, input_lyrics ], [output_stats, output_lyrics ]) app.queue().launch()