video-to-music

Running on Zero

fffiloni commited on Feb 1, 2024

Commit

cbbd024

verified ·

1 Parent(s): 3127104

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 import json
 import re
 from gradio_client import Client
@@ -98,7 +99,7 @@ pipe = pipeline("text-generation", model=zephyr_model, torch_dtype=torch.bfloat1
 agent_maker_sys = f"""
 You are an AI whose job is to help users create their own music which its genre will reflect the character or scene from an image described by users.
-In particular, you need to respond succintly in a friendly tone, write a musical prompt for an music generation model.
 For example, if a user says, "a picture of a man in a black suit and tie riding a black dragon", provide immediately a musical prompt corresponding to the image description.
 Immediately STOP after that. It should be EXACTLY in this format:
@@ -111,6 +112,7 @@ instruction = f"""
 <|user|>
 """
 def infer(image_in):
     gr.Info("Getting image caption with Kosmos2...")
     user_prompt = get_caption(image_in)
@@ -160,7 +162,8 @@ with gr.Blocks(css=css) as demo:
                 submit_btn = gr.Button("Make music from my pic !")
             with gr.Column():
                 caption = gr.Textbox(
-                    label = "Musical prompt"
                 )
                 result = gr.Audio(
                     label = "Music"

 import gradio as gr
+import spaces
 import json
 import re
 from gradio_client import Client
 agent_maker_sys = f"""
 You are an AI whose job is to help users create their own music which its genre will reflect the character or scene from an image described by users.
+In particular, you need to respond succintly with few musical words, in a friendly tone, write a musical prompt for a music generation model.
 For example, if a user says, "a picture of a man in a black suit and tie riding a black dragon", provide immediately a musical prompt corresponding to the image description.
 Immediately STOP after that. It should be EXACTLY in this format:
 <|user|>
 """
+@spaces.GPU(enable_queue=True, duration=60)
 def infer(image_in):
     gr.Info("Getting image caption with Kosmos2...")
     user_prompt = get_caption(image_in)
                 submit_btn = gr.Button("Make music from my pic !")
             with gr.Column():
                 caption = gr.Textbox(
+                    label = "Musical prompt",
+                    max_lines = 3
                 )
                 result = gr.Audio(
                     label = "Music"