music-to-outfit

Running on Zero

App Files Files Community

music-to-outfit / app.py

fffiloni

Update app.py

88cbac0 verified 14 days ago

raw

history blame

4.3 kB

	import gradio as gr
	import spaces
	import json
	import re
	import random
	import numpy as np
	from gradio_client import Client, handle_file

	MAX_SEED = np.iinfo(np.int32).max

	import re
	import torch
	from transformers import pipeline

	zephyr_model = "HuggingFaceH4/zephyr-7b-beta"
	mixtral_model = "mistralai/Mixtral-8x7B-Instruct-v0.1"

	pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat16, device_map="auto")

	standard_sys = f"""
	You are an Art Director AI whose job is to help users create their own outfit which will reflect the musical mood or audio described by users.
	In particular, you need to respond succintly with an outfit idea, in a friendly tone, write a prompt for an image including your outfit idea.

	For example, if a user says, "This song features a female vocalist singing a beautiful and emotional melody. The melody is accompanied by the sound of a piano playing a slow and melancholic tune. The song has a dreamy and ethereal feel to it. The lyrics of the song are about the beauty of love and the joy it brings to one's life. Overall, this song is a perfect example of the power of music to evoke strong emotions and create a sense of wonder and awe in the listener.", provide immediately a prompt corresponding to the audio description.
	Immediately STOP after that. It should be EXACTLY in this format:
	"A lady dressed with a flowy, pastel-colored dress paired with strappy sandals and a wide-brimmed hat, accessorized with delicate jewelry, such as dainty earrings and a necklace."
	"""

	@spaces.GPU(enable_queue=True)
	def get_outfit_prompt(user_prompt):

	agent_maker_sys = standard_sys

	instruction = f"""
	<\|system\|>
	{agent_maker_sys}</s>
	<\|user\|>
	"""

	prompt = f"{instruction.strip()}\n{user_prompt}</s>"
	outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
	pattern = r'\<\\|system\\|\>(.*?)\<\\|assistant\\|\>'
	cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)

	print(f"SUGGESTED Musical prompt: {cleaned_text}")
	return cleaned_text.lstrip("\n")

	def get_salmonn(audio_in, prompt):
	client = Client("fffiloni/SALMONN-7B-gradio")
	result = client.predict(
	speech=handle_file(audio_in),
	text_input=prompt,
	num_beams=4,
	temperature=1,
	top_p=0.9,
	api_name="/gradio_answer_1"
	)
	print(result)

	return result

	def infer(audio_in):
	salmonn_prompt = "Please describe the audio in detail."
	salmonn_res = get_salmonn(audio_in, salmonn_prompt)

	outfit_prompt = get_outfit_prompt(salmonn_res)

	return gr.update(value=outfit_prompt, interactive=True)

	demo_title = "Music to Outfit"
	description = "Get an outfit ideau from audio"

	css = """
	#col-container {
	margin: 0 auto;
	max-width: 980px;
	text-align: left;
	}
	#inspi-prompt textarea {
	font-size: 20px;
	line-height: 24px;
	font-weight: 600;
	}
	/* fix examples gallery width on mobile */
	div#component-11 > .gallery > .gallery-item > .container > img {
	width: auto!important;
	}
	"""

	with gr.Blocks(css=css) as demo:

	with gr.Column(elem_id="col-container"):

	gr.HTML(f"""
	<h2 style="text-align: center;">{demo_title}</h2>
	<p style="text-align: center;">{description}</p>
	""")

	with gr.Row():

	with gr.Column():
	audio_in = gr.Audio(
	label = "Audio reference",
	type = "filepath",
	elem_id = "audio-in"
	)

	submit_btn = gr.Button("Make music from my pic !")

	with gr.Column():

	caption = gr.Textbox(
	label = "Inspirational outfit prompt",
	interactive = False,
	elem_id = "inspi-prompt"
	)

	result = gr.Image(
	label = "Outfit"
	)




	submit_btn.click(
	fn = infer,
	inputs = [
	audio_in
	],
	outputs =[
	caption,
	#result
	],
	concurrency_limit = 4
	)

	demo.queue(max_size=16).launch(show_api=False, show_error=True)