SmolVLM

Running on Zero

App Files Files Community

merve HF Staff commited on Aug 6, 2024

Commit

b926faa

verified ·

1 Parent(s): 6492835

Add assistant prefix and examples

Browse files

Files changed (1) hide show

app.py +14 -7

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]
 @spaces.GPU
 def model_inference(
-    images, text, decoding_strategy, temperature, max_new_tokens,
     repetition_penalty, top_p
 ):
     if text == "" and not images:
@@ -43,6 +43,9 @@ def model_inference(
                 }
             ]
     prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
     inputs = processor(text=prompt, images=[images], return_tensors="pt")
@@ -81,14 +84,18 @@ with gr.Blocks(fill_height=True) as demo:
     with gr.Column():
         image_input = gr.Image(label="Upload your Image", type="pil")
         query_input = gr.Textbox(label="Prompt")
         submit_btn = gr.Button("Submit")
         output = gr.Textbox(label="Output")
     with gr.Accordion(label="Example Inputs and Advanced Generation Parameters"):
-        examples=[["example_images/travel_tips.jpg", "I want to go somewhere similar to the one in the photo. Give me destinations and travel tips.", "Greedy", 0.4, 512, 1.2, 0.8],
-                    ["example_images/dummy_pdf.png", "How much percent is the order status?", "Greedy", 0.4, 512, 1.2, 0.8],
-                    ["example_images/art_critic.png", "As an art critic AI assistant, could you describe this painting in details and make a thorough critic?.", "Greedy", 0.4, 512, 1.2, 0.8],
-                    ["example_images/s2w_example.png", "What is this UI about?", "Greedy", 0.4, 512, 1.2, 0.8]]
         # Hyper-parameters for generation
         max_new_tokens = gr.Slider(
@@ -162,13 +169,13 @@ with gr.Blocks(fill_height=True) as demo:
           )
         gr.Examples(
                         examples = examples,
-                        inputs=[image_input, query_input, decoding_strategy, temperature,
                                                               max_new_tokens, repetition_penalty, top_p],
                         outputs=output,
                         fn=model_inference
                     )
-        submit_btn.click(model_inference, inputs = [image_input, query_input, decoding_strategy, temperature,
                                                       max_new_tokens, repetition_penalty, top_p], outputs=output)

 @spaces.GPU
 def model_inference(
+    images, text, assistant_prefix, decoding_strategy, temperature, max_new_tokens,
     repetition_penalty, top_p
 ):
     if text == "" and not images:
                 }
             ]
+    if assistant_prefix:
+      text = f"{assistant_prefix} {text}"
     prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
     inputs = processor(text=prompt, images=[images], return_tensors="pt")
     with gr.Column():
         image_input = gr.Image(label="Upload your Image", type="pil")
         query_input = gr.Textbox(label="Prompt")
+        assistant_prefix = gr.Textbox(label="Assistant Prefix", placeholder="Let's think step by step.")
         submit_btn = gr.Button("Submit")
         output = gr.Textbox(label="Output")
     with gr.Accordion(label="Example Inputs and Advanced Generation Parameters"):
+        examples=[
+                    ["example_images/mmmu_example.jpeg", "Let's think step by step.", "Chase wants to buy 4 kilograms of oval beads and 5 kilograms of star-shaped beads. How much will he spend?", "Greedy", 0.4, 512, 1.2, 0.8],
+                    ["example_images/travel_tips.jpg", None, "I want to go somewhere similar to the one in the photo. Give me destinations and travel tips.", "Greedy", 0.4, 512, 1.2, 0.8],
+                    ["example_images/dummy_pdf.png", None, "How much percent is the order status?", "Greedy", 0.4, 512, 1.2, 0.8],
+                    ["example_images/art_critic.png", None, "As an art critic AI assistant, could you describe this painting in details and make a thorough critic?.", "Greedy", 0.4, 512, 1.2, 0.8],
+                    ["example_images/s2w_example.png", None, "What is this UI about?", "Greedy", 0.4, 512, 1.2, 0.8]]
         # Hyper-parameters for generation
         max_new_tokens = gr.Slider(
           )
         gr.Examples(
                         examples = examples,
+                        inputs=[image_input, query_input, assistant_prefix, decoding_strategy, temperature,
                                                               max_new_tokens, repetition_penalty, top_p],
                         outputs=output,
                         fn=model_inference
                     )
+        submit_btn.click(model_inference, inputs = [image_input, query_input, assistant_prefix, decoding_strategy, temperature,
                                                       max_new_tokens, repetition_penalty, top_p], outputs=output)