Lyrics-Morpher / app.py
projectlosangeles's picture
Update app.py
a756605 verified
#================================================================
# https://huggingface.co/spaces/asigalov61/Lyrics-Morpher
#================================================================
print('*' * 70)
print('Loading Lyrics Morpher modules...')
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
import time
import datetime
from pytz import timezone
import re
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List, Tuple
print('*' * 70)
print('Done!')
print('*' * 70)
#==========================================================================================================
print('*' * 70)
print('Loading model and tokenizer...')
print('*' * 70)
model_name = "asigalov61/Lyrics_Qwen2.5-0.5B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="auto",
device_map="auto"
)
print('*' * 70)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
print('*' * 70)
print('Done!')
print('*' * 70)
#==========================================================================================================
def _split_into_blocks(lines: List[str]) -> Tuple[int, List[Tuple[List[str], int]]]:
"""
Splits `lines` into:
- leading_blanks: number of blank lines before the first non-blank
- blocks: a list of (block_lines, blank_count_after) where
* block_lines is a list of consecutive non-blank lines
* blank_count_after is how many blank lines follow that block
"""
i = 0
n = len(lines)
# count leading blank lines
leading_blanks = 0
while i < n and lines[i] == "":
leading_blanks += 1
i += 1
blocks = []
while i < n:
# collect non-blank lines
block = []
while i < n and lines[i] != "":
block.append(lines[i])
i += 1
# then count blank lines after this block
blank_after = 0
while i < n and lines[i] == "":
blank_after += 1
i += 1
blocks.append((block, blank_after))
return leading_blanks, blocks
def _compare_line(tpl_line: str, txt_line: str) -> Tuple[int,int]:
"""
Compare two lines token-by-token.
Returns (char_mismatches, word_mismatches).
'@' in tpl_line → one uppercase letter [A–Z]
'_' in tpl_line → one lowercase letter [a–z]
all other chars must match exactly.
"""
char_mis = 0
word_mis = 0
tpl_tokens = tpl_line.split(" ")
txt_tokens = txt_line.split(" ")
# difference in token count
if len(tpl_tokens) != len(txt_tokens):
word_mis += abs(len(tpl_tokens) - len(txt_tokens))
# compare each pair
for t_tok, x_tok in zip(tpl_tokens, txt_tokens):
token_error = False
L = min(len(t_tok), len(x_tok))
# char-by-char
for j in range(L):
p, c = t_tok[j], x_tok[j]
if p == "@":
if not ("A" <= c <= "Z"):
char_mis += 1
token_error = True
elif p == "_":
if not ("a" <= c <= "z"):
char_mis += 1
token_error = True
else:
if p != c:
char_mis += 1
token_error = True
# length difference
if len(t_tok) != len(x_tok):
char_mis += abs(len(t_tok) - len(x_tok))
token_error = True
if token_error:
word_mis += 1
return char_mis, word_mis
def count_mismatches(template: str, text: str) -> Tuple[int,int]:
"""
Compares `template` vs `text` where:
- '@' matches exactly one uppercase letter [A–Z]
- '_' matches exactly one lowercase letter [a–z]
- all other characters (spaces, punctuation, quotes, case) match exactly
- each extra/missing newline counts as 1 char & 1 word mismatch,
but does NOT shift subsequent line alignment.
Returns (char_mismatches, word_mismatches).
"""
tpl_lines = template.splitlines()
txt_lines = text.splitlines()
# Split into blank‐line‐aware blocks
tpl_lead, tpl_blocks = _split_into_blocks(tpl_lines)
txt_lead, txt_blocks = _split_into_blocks(txt_lines)
char_mis = 0
word_mis = 0
# 1) Leading blank line diff
diff_lead = abs(tpl_lead - txt_lead)
char_mis += diff_lead
word_mis += diff_lead
# 2) Compare block by block
max_blocks = max(len(tpl_blocks), len(txt_blocks))
for i in range(max_blocks):
# unpack or empty
if i < len(tpl_blocks):
tpl_block, tpl_blank_after = tpl_blocks[i]
else:
tpl_block, tpl_blank_after = [], 0
if i < len(txt_blocks):
txt_block, txt_blank_after = txt_blocks[i]
else:
txt_block, txt_blank_after = [], 0
# a) compare lines in this block
max_lines = max(len(tpl_block), len(txt_block))
for ln in range(max_lines):
if ln < len(tpl_block) and ln < len(txt_block):
c1, w1 = _compare_line(tpl_block[ln], txt_block[ln])
char_mis += c1
word_mis += w1
elif ln < len(tpl_block):
# missing line in text
word_mis += 1
# count all chars + one '\n'
char_mis += len(tpl_block[ln]) + 1
else:
# extra line in text
word_mis += 1
char_mis += len(txt_block[ln]) + 1
# b) blank‐line diff after block
diff_blank = abs(tpl_blank_after - txt_blank_after)
char_mis += diff_blank
word_mis += diff_blank
return char_mis, word_mis
#==========================================================================================================
def get_lyrics_template(song_title, song_lyrics):
if song_title:
title = re.sub(r'\s+', ' ', re.sub(r'[^A-Za-z ]+', '', song_title.strip())).strip()
else:
title = 'Unknown Song'
lines = [re.sub(r'\s+', ' ', re.sub(r'[^A-Za-z ]+', '', l.strip())).strip() if l else '\n' for l in song_lyrics.split('\n')]
src = ''
words = []
for a in title.split():
wor = ''
for aa in a:
if aa.isupper():
wor += '@'
else:
wor += '_'
words.append(wor)
title_str = ' '.join(words)
src += 'Song title: "' + title_str + '"\n\n'
src += 'Song lyrics:\n\n'
for ln in lines:
if ln != '\n':
words = ln.split()
for w in words:
src += ''.join(['@' if a.isupper() else '_' for a in w]) + ' '
src = src.strip()
src += '\n'
return src
#==========================================================================================================
@spaces.GPU
def Morph_Lyrics(input_title, input_lyrics):
print('*' * 70)
print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
start_time = time.time()
print('=' * 70)
print('Requested settings:')
print('=' * 70)
print('Input title:', input_title)
print('Input lyrics:\n\n')
print(input_lyrics)
print('=' * 70)
print('Processing lyrics...Please wait...')
lyrics_template = get_lyrics_template(input_title, input_lyrics)
print('Done!')
print('=' * 70)
print('Processing...Please wait...')
messages = [
{"role": "system", "content": "Please fill in the words in the following song lyrics template. Thank you."},
{"role": "user", "content": lyrics_template}
]
chat_text = tokenizer.apply_chat_template(messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([chat_text], return_tensors="pt").to(model.device)
print('Done!')
print('=' * 70)
print('Generating...')
num_batches = 256
generated_ids = model.generate(**model_inputs,
max_new_tokens=4096,
do_sample=True,
temperature=0.7,
top_p=0.8,
num_return_sequences=num_batches
)
print('Done!')
print('=' * 70)
print('Post-processing...')
output_tokens = [output_ids[len(input_ids):] for input_ids, output_ids in zip([model_inputs.input_ids] * num_batches, generated_ids)]
responses = tokenizer.batch_decode(output_tokens, skip_special_tokens=True)
final_responses = []
for r in responses:
final_responses.append(r.split('\nassistant\n')[-1])
print('Done!')
print('=' * 70)
print('Selecting best response...')
best_chars = 8192
best_words = 8192
best_response = ''
for fr in final_responses:
chars, words = count_mismatches(lyrics_template, fr)
if chars < best_chars:
best_chars = chars
best_words = words
best_response = fr
print('Done!')
print('=' * 70)
print("Character mismatches:", best_chars)
print("Word mismatches: ", best_words)
output_stats = ''
output_stats += 'Character mismatches: ' + str(best_chars) + '\n'
output_stats += 'Word mismatches: ' + str(best_words)
output_lyrics = best_response
#========================================================
print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
print('-' * 70)
print('Req execution time:', (time.time() - start_time), 'sec')
print('*' * 70)
#========================================================
return output_stats, output_lyrics
#==========================================================================================================
if __name__ == "__main__":
PDT = timezone('US/Pacific')
print('=' * 70)
print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
print('=' * 70)
app = gr.Blocks()
with app:
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Lyrics Morpher</h1>")
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Morph any lyrics into an exact variations with fine-tuned Qwen2.5 0.5B Instruct model</h1>")
input_title = gr.Textbox(label="Enter song title here", value="Nothing Else Matters")
input_lyrics = gr.Textbox(label="Enter song lyrics here", value="So close no matter how far\nCould not be much more from the heart\nForever trusting who we are\nAnd nothing else matters")
submit = gr.Button("Morph", variant="primary")
gr.Markdown("## Morphing results")
output_stats = gr.Textbox(label="Morphed lyrics stats")
output_lyrics = gr.Textbox(label="Morphed lyrics")
run_event = submit.click(Morph_Lyrics,
[input_title,
input_lyrics
],
[output_stats,
output_lyrics
])
app.queue().launch()