Spaces:

projectlosangeles
/

Lyrics-Morpher

Running on Zero

App Files Files Community

Lyrics-Morpher / app.py

projectlosangeles

Update app.py

a756605 verified 4 days ago

raw

history blame contribute delete

11.5 kB

	#================================================================
	# https://huggingface.co/spaces/asigalov61/Lyrics-Morpher
	#================================================================

	print('' 70)
	print('Loading Lyrics Morpher modules...')

	import os

	os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

	import time
	import datetime
	from pytz import timezone

	import re

	import gradio as gr
	import spaces

	from transformers import AutoModelForCausalLM, AutoTokenizer

	from typing import List, Tuple

	print('' 70)
	print('Done!')
	print('' 70)

	#==========================================================================================================

	print('' 70)
	print('Loading model and tokenizer...')
	print('' 70)

	model_name = "asigalov61/Lyrics_Qwen2.5-0.5B-Instruct"

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype="auto",
	device_map="auto"
	)

	print('' 70)

	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

	print('' 70)
	print('Done!')
	print('' 70)

	#==========================================================================================================

	def _split_into_blocks(lines: List[str]) -> Tuple[int, List[Tuple[List[str], int]]]:
	"""
	Splits `lines` into:
	- leading_blanks: number of blank lines before the first non-blank
	- blocks: a list of (block_lines, blank_count_after) where
	* block_lines is a list of consecutive non-blank lines
	* blank_count_after is how many blank lines follow that block
	"""
	i = 0
	n = len(lines)
	# count leading blank lines
	leading_blanks = 0
	while i < n and lines[i] == "":
	leading_blanks += 1
	i += 1

	blocks = []
	while i < n:
	# collect non-blank lines
	block = []
	while i < n and lines[i] != "":
	block.append(lines[i])
	i += 1
	# then count blank lines after this block
	blank_after = 0
	while i < n and lines[i] == "":
	blank_after += 1
	i += 1
	blocks.append((block, blank_after))

	return leading_blanks, blocks

	def _compare_line(tpl_line: str, txt_line: str) -> Tuple[int,int]:
	"""
	Compare two lines token-by-token.
	Returns (char_mismatches, word_mismatches).
	'@' in tpl_line → one uppercase letter [A–Z]
	'_' in tpl_line → one lowercase letter [a–z]
	all other chars must match exactly.
	"""
	char_mis = 0
	word_mis = 0

	tpl_tokens = tpl_line.split(" ")
	txt_tokens = txt_line.split(" ")

	# difference in token count
	if len(tpl_tokens) != len(txt_tokens):
	word_mis += abs(len(tpl_tokens) - len(txt_tokens))
	# compare each pair
	for t_tok, x_tok in zip(tpl_tokens, txt_tokens):
	token_error = False
	L = min(len(t_tok), len(x_tok))
	# char-by-char
	for j in range(L):
	p, c = t_tok[j], x_tok[j]
	if p == "@":
	if not ("A" <= c <= "Z"):
	char_mis += 1
	token_error = True
	elif p == "_":
	if not ("a" <= c <= "z"):
	char_mis += 1
	token_error = True
	else:
	if p != c:
	char_mis += 1
	token_error = True
	# length difference
	if len(t_tok) != len(x_tok):
	char_mis += abs(len(t_tok) - len(x_tok))
	token_error = True
	if token_error:
	word_mis += 1

	return char_mis, word_mis

	def count_mismatches(template: str, text: str) -> Tuple[int,int]:
	"""
	Compares `template` vs `text` where:
	- '@' matches exactly one uppercase letter [A–Z]
	- '_' matches exactly one lowercase letter [a–z]
	- all other characters (spaces, punctuation, quotes, case) match exactly
	- each extra/missing newline counts as 1 char & 1 word mismatch,
	but does NOT shift subsequent line alignment.
	Returns (char_mismatches, word_mismatches).
	"""
	tpl_lines = template.splitlines()
	txt_lines = text.splitlines()

	# Split into blank‐line‐aware blocks
	tpl_lead, tpl_blocks = _split_into_blocks(tpl_lines)
	txt_lead, txt_blocks = _split_into_blocks(txt_lines)

	char_mis = 0
	word_mis = 0

	# 1) Leading blank line diff
	diff_lead = abs(tpl_lead - txt_lead)
	char_mis += diff_lead
	word_mis += diff_lead

	# 2) Compare block by block
	max_blocks = max(len(tpl_blocks), len(txt_blocks))
	for i in range(max_blocks):
	# unpack or empty
	if i < len(tpl_blocks):
	tpl_block, tpl_blank_after = tpl_blocks[i]
	else:
	tpl_block, tpl_blank_after = [], 0
	if i < len(txt_blocks):
	txt_block, txt_blank_after = txt_blocks[i]
	else:
	txt_block, txt_blank_after = [], 0

	# a) compare lines in this block
	max_lines = max(len(tpl_block), len(txt_block))
	for ln in range(max_lines):
	if ln < len(tpl_block) and ln < len(txt_block):
	c1, w1 = _compare_line(tpl_block[ln], txt_block[ln])
	char_mis += c1
	word_mis += w1
	elif ln < len(tpl_block):
	# missing line in text
	word_mis += 1
	# count all chars + one '\n'
	char_mis += len(tpl_block[ln]) + 1
	else:
	# extra line in text
	word_mis += 1
	char_mis += len(txt_block[ln]) + 1

	# b) blank‐line diff after block
	diff_blank = abs(tpl_blank_after - txt_blank_after)
	char_mis += diff_blank
	word_mis += diff_blank

	return char_mis, word_mis

	#==========================================================================================================

	def get_lyrics_template(song_title, song_lyrics):

	if song_title:
	title = re.sub(r'\s+', ' ', re.sub(r'[^A-Za-z ]+', '', song_title.strip())).strip()

	else:
	title = 'Unknown Song'

	lines = [re.sub(r'\s+', ' ', re.sub(r'[^A-Za-z ]+', '', l.strip())).strip() if l else '\n' for l in song_lyrics.split('\n')]

	src = ''

	words = []

	for a in title.split():

	wor = ''

	for aa in a:
	if aa.isupper():
	wor += '@'

	else:
	wor += '_'

	words.append(wor)

	title_str = ' '.join(words)

	src += 'Song title: "' + title_str + '"\n\n'

	src += 'Song lyrics:\n\n'

	for ln in lines:
	if ln != '\n':

	words = ln.split()

	for w in words:

	src += ''.join(['@' if a.isupper() else '_' for a in w]) + ' '

	src = src.strip()

	src += '\n'

	return src

	#==========================================================================================================

	@spaces.GPU
	def Morph_Lyrics(input_title, input_lyrics):

	print('' 70)
	print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
	start_time = time.time()

	print('=' * 70)
	print('Requested settings:')
	print('=' * 70)
	print('Input title:', input_title)
	print('Input lyrics:\n\n')
	print(input_lyrics)
	print('=' * 70)
	print('Processing lyrics...Please wait...')

	lyrics_template = get_lyrics_template(input_title, input_lyrics)

	print('Done!')
	print('=' * 70)
	print('Processing...Please wait...')



	messages = [
	{"role": "system", "content": "Please fill in the words in the following song lyrics template. Thank you."},
	{"role": "user", "content": lyrics_template}
	]

	chat_text = tokenizer.apply_chat_template(messages,
	tokenize=False,
	add_generation_prompt=True
	)

	model_inputs = tokenizer([chat_text], return_tensors="pt").to(model.device)

	print('Done!')
	print('=' * 70)
	print('Generating...')

	num_batches = 256

	generated_ids = model.generate(**model_inputs,
	max_new_tokens=4096,
	do_sample=True,
	temperature=0.7,
	top_p=0.8,
	num_return_sequences=num_batches
	)

	print('Done!')
	print('=' * 70)

	print('Post-processing...')

	output_tokens = [output_ids[len(input_ids):] for input_ids, output_ids in zip([model_inputs.input_ids] * num_batches, generated_ids)]

	responses = tokenizer.batch_decode(output_tokens, skip_special_tokens=True)

	final_responses = []

	for r in responses:
	final_responses.append(r.split('\nassistant\n')[-1])

	print('Done!')
	print('=' * 70)

	print('Selecting best response...')

	best_chars = 8192
	best_words = 8192

	best_response = ''

	for fr in final_responses:
	chars, words = count_mismatches(lyrics_template, fr)

	if chars < best_chars:
	best_chars = chars
	best_words = words
	best_response = fr

	print('Done!')
	print('=' * 70)

	print("Character mismatches:", best_chars)
	print("Word mismatches: ", best_words)

	output_stats = ''

	output_stats += 'Character mismatches: ' + str(best_chars) + '\n'
	output_stats += 'Word mismatches: ' + str(best_words)

	output_lyrics = best_response

	#========================================================

	print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
	print('-' * 70)
	print('Req execution time:', (time.time() - start_time), 'sec')
	print('' 70)

	#========================================================

	return output_stats, output_lyrics

	#==========================================================================================================

	if __name__ == "__main__":

	PDT = timezone('US/Pacific')

	print('=' * 70)
	print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
	print('=' * 70)

	app = gr.Blocks()

	with app:

	gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Lyrics Morpher</h1>")
	gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Morph any lyrics into an exact variations with fine-tuned Qwen2.5 0.5B Instruct model</h1>")

	input_title = gr.Textbox(label="Enter song title here", value="Nothing Else Matters")
	input_lyrics = gr.Textbox(label="Enter song lyrics here", value="So close no matter how far\nCould not be much more from the heart\nForever trusting who we are\nAnd nothing else matters")

	submit = gr.Button("Morph", variant="primary")

	gr.Markdown("## Morphing results")

	output_stats = gr.Textbox(label="Morphed lyrics stats")
	output_lyrics = gr.Textbox(label="Morphed lyrics")

	run_event = submit.click(Morph_Lyrics,
	[input_title,
	input_lyrics
	],
	[output_stats,
	output_lyrics
	])

	app.queue().launch()