Spaces:
Running
on
Zero
Running
on
Zero
#================================================================ | |
# https://huggingface.co/spaces/asigalov61/Lyrics-Morpher | |
#================================================================ | |
print('*' * 70) | |
print('Loading Lyrics Morpher modules...') | |
import os | |
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | |
import time | |
import datetime | |
from pytz import timezone | |
import re | |
import gradio as gr | |
import spaces | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from typing import List, Tuple | |
print('*' * 70) | |
print('Done!') | |
print('*' * 70) | |
#========================================================================================================== | |
print('*' * 70) | |
print('Loading model and tokenizer...') | |
print('*' * 70) | |
model_name = "asigalov61/Lyrics_Qwen2.5-0.5B-Instruct" | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype="auto", | |
device_map="auto" | |
) | |
print('*' * 70) | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
print('*' * 70) | |
print('Done!') | |
print('*' * 70) | |
#========================================================================================================== | |
def _split_into_blocks(lines: List[str]) -> Tuple[int, List[Tuple[List[str], int]]]: | |
""" | |
Splits `lines` into: | |
- leading_blanks: number of blank lines before the first non-blank | |
- blocks: a list of (block_lines, blank_count_after) where | |
* block_lines is a list of consecutive non-blank lines | |
* blank_count_after is how many blank lines follow that block | |
""" | |
i = 0 | |
n = len(lines) | |
# count leading blank lines | |
leading_blanks = 0 | |
while i < n and lines[i] == "": | |
leading_blanks += 1 | |
i += 1 | |
blocks = [] | |
while i < n: | |
# collect non-blank lines | |
block = [] | |
while i < n and lines[i] != "": | |
block.append(lines[i]) | |
i += 1 | |
# then count blank lines after this block | |
blank_after = 0 | |
while i < n and lines[i] == "": | |
blank_after += 1 | |
i += 1 | |
blocks.append((block, blank_after)) | |
return leading_blanks, blocks | |
def _compare_line(tpl_line: str, txt_line: str) -> Tuple[int,int]: | |
""" | |
Compare two lines token-by-token. | |
Returns (char_mismatches, word_mismatches). | |
'@' in tpl_line → one uppercase letter [A–Z] | |
'_' in tpl_line → one lowercase letter [a–z] | |
all other chars must match exactly. | |
""" | |
char_mis = 0 | |
word_mis = 0 | |
tpl_tokens = tpl_line.split(" ") | |
txt_tokens = txt_line.split(" ") | |
# difference in token count | |
if len(tpl_tokens) != len(txt_tokens): | |
word_mis += abs(len(tpl_tokens) - len(txt_tokens)) | |
# compare each pair | |
for t_tok, x_tok in zip(tpl_tokens, txt_tokens): | |
token_error = False | |
L = min(len(t_tok), len(x_tok)) | |
# char-by-char | |
for j in range(L): | |
p, c = t_tok[j], x_tok[j] | |
if p == "@": | |
if not ("A" <= c <= "Z"): | |
char_mis += 1 | |
token_error = True | |
elif p == "_": | |
if not ("a" <= c <= "z"): | |
char_mis += 1 | |
token_error = True | |
else: | |
if p != c: | |
char_mis += 1 | |
token_error = True | |
# length difference | |
if len(t_tok) != len(x_tok): | |
char_mis += abs(len(t_tok) - len(x_tok)) | |
token_error = True | |
if token_error: | |
word_mis += 1 | |
return char_mis, word_mis | |
def count_mismatches(template: str, text: str) -> Tuple[int,int]: | |
""" | |
Compares `template` vs `text` where: | |
- '@' matches exactly one uppercase letter [A–Z] | |
- '_' matches exactly one lowercase letter [a–z] | |
- all other characters (spaces, punctuation, quotes, case) match exactly | |
- each extra/missing newline counts as 1 char & 1 word mismatch, | |
but does NOT shift subsequent line alignment. | |
Returns (char_mismatches, word_mismatches). | |
""" | |
tpl_lines = template.splitlines() | |
txt_lines = text.splitlines() | |
# Split into blank‐line‐aware blocks | |
tpl_lead, tpl_blocks = _split_into_blocks(tpl_lines) | |
txt_lead, txt_blocks = _split_into_blocks(txt_lines) | |
char_mis = 0 | |
word_mis = 0 | |
# 1) Leading blank line diff | |
diff_lead = abs(tpl_lead - txt_lead) | |
char_mis += diff_lead | |
word_mis += diff_lead | |
# 2) Compare block by block | |
max_blocks = max(len(tpl_blocks), len(txt_blocks)) | |
for i in range(max_blocks): | |
# unpack or empty | |
if i < len(tpl_blocks): | |
tpl_block, tpl_blank_after = tpl_blocks[i] | |
else: | |
tpl_block, tpl_blank_after = [], 0 | |
if i < len(txt_blocks): | |
txt_block, txt_blank_after = txt_blocks[i] | |
else: | |
txt_block, txt_blank_after = [], 0 | |
# a) compare lines in this block | |
max_lines = max(len(tpl_block), len(txt_block)) | |
for ln in range(max_lines): | |
if ln < len(tpl_block) and ln < len(txt_block): | |
c1, w1 = _compare_line(tpl_block[ln], txt_block[ln]) | |
char_mis += c1 | |
word_mis += w1 | |
elif ln < len(tpl_block): | |
# missing line in text | |
word_mis += 1 | |
# count all chars + one '\n' | |
char_mis += len(tpl_block[ln]) + 1 | |
else: | |
# extra line in text | |
word_mis += 1 | |
char_mis += len(txt_block[ln]) + 1 | |
# b) blank‐line diff after block | |
diff_blank = abs(tpl_blank_after - txt_blank_after) | |
char_mis += diff_blank | |
word_mis += diff_blank | |
return char_mis, word_mis | |
#========================================================================================================== | |
def get_lyrics_template(song_title, song_lyrics): | |
if song_title: | |
title = re.sub(r'\s+', ' ', re.sub(r'[^A-Za-z ]+', '', song_title.strip())).strip() | |
else: | |
title = 'Unknown Song' | |
lines = [re.sub(r'\s+', ' ', re.sub(r'[^A-Za-z ]+', '', l.strip())).strip() if l else '\n' for l in song_lyrics.split('\n')] | |
src = '' | |
words = [] | |
for a in title.split(): | |
wor = '' | |
for aa in a: | |
if aa.isupper(): | |
wor += '@' | |
else: | |
wor += '_' | |
words.append(wor) | |
title_str = ' '.join(words) | |
src += 'Song title: "' + title_str + '"\n\n' | |
src += 'Song lyrics:\n\n' | |
for ln in lines: | |
if ln != '\n': | |
words = ln.split() | |
for w in words: | |
src += ''.join(['@' if a.isupper() else '_' for a in w]) + ' ' | |
src = src.strip() | |
src += '\n' | |
return src | |
#========================================================================================================== | |
def Morph_Lyrics(input_title, input_lyrics): | |
print('*' * 70) | |
print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) | |
start_time = time.time() | |
print('=' * 70) | |
print('Requested settings:') | |
print('=' * 70) | |
print('Input title:', input_title) | |
print('Input lyrics:\n\n') | |
print(input_lyrics) | |
print('=' * 70) | |
print('Processing lyrics...Please wait...') | |
lyrics_template = get_lyrics_template(input_title, input_lyrics) | |
print('Done!') | |
print('=' * 70) | |
print('Processing...Please wait...') | |
messages = [ | |
{"role": "system", "content": "Please fill in the words in the following song lyrics template. Thank you."}, | |
{"role": "user", "content": lyrics_template} | |
] | |
chat_text = tokenizer.apply_chat_template(messages, | |
tokenize=False, | |
add_generation_prompt=True | |
) | |
model_inputs = tokenizer([chat_text], return_tensors="pt").to(model.device) | |
print('Done!') | |
print('=' * 70) | |
print('Generating...') | |
num_batches = 256 | |
generated_ids = model.generate(**model_inputs, | |
max_new_tokens=4096, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.8, | |
num_return_sequences=num_batches | |
) | |
print('Done!') | |
print('=' * 70) | |
print('Post-processing...') | |
output_tokens = [output_ids[len(input_ids):] for input_ids, output_ids in zip([model_inputs.input_ids] * num_batches, generated_ids)] | |
responses = tokenizer.batch_decode(output_tokens, skip_special_tokens=True) | |
final_responses = [] | |
for r in responses: | |
final_responses.append(r.split('\nassistant\n')[-1]) | |
print('Done!') | |
print('=' * 70) | |
print('Selecting best response...') | |
best_chars = 8192 | |
best_words = 8192 | |
best_response = '' | |
for fr in final_responses: | |
chars, words = count_mismatches(lyrics_template, fr) | |
if chars < best_chars: | |
best_chars = chars | |
best_words = words | |
best_response = fr | |
print('Done!') | |
print('=' * 70) | |
print("Character mismatches:", best_chars) | |
print("Word mismatches: ", best_words) | |
output_stats = '' | |
output_stats += 'Character mismatches: ' + str(best_chars) + '\n' | |
output_stats += 'Word mismatches: ' + str(best_words) | |
output_lyrics = best_response | |
#======================================================== | |
print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) | |
print('-' * 70) | |
print('Req execution time:', (time.time() - start_time), 'sec') | |
print('*' * 70) | |
#======================================================== | |
return output_stats, output_lyrics | |
#========================================================================================================== | |
if __name__ == "__main__": | |
PDT = timezone('US/Pacific') | |
print('=' * 70) | |
print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) | |
print('=' * 70) | |
app = gr.Blocks() | |
with app: | |
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Lyrics Morpher</h1>") | |
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Morph any lyrics into an exact variations with fine-tuned Qwen2.5 0.5B Instruct model</h1>") | |
input_title = gr.Textbox(label="Enter song title here", value="Nothing Else Matters") | |
input_lyrics = gr.Textbox(label="Enter song lyrics here", value="So close no matter how far\nCould not be much more from the heart\nForever trusting who we are\nAnd nothing else matters") | |
submit = gr.Button("Morph", variant="primary") | |
gr.Markdown("## Morphing results") | |
output_stats = gr.Textbox(label="Morphed lyrics stats") | |
output_lyrics = gr.Textbox(label="Morphed lyrics") | |
run_event = submit.click(Morph_Lyrics, | |
[input_title, | |
input_lyrics | |
], | |
[output_stats, | |
output_lyrics | |
]) | |
app.queue().launch() |