Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gradio_client import Client | |
| #fusecap_client = Client("https://noamrot-fusecap-image-captioning.hf.space/") | |
| fuyu_client = Client("https://adept-fuyu-8b-demo.hf.space/") | |
| def get_caption(image_in): | |
| fuyu_result = fuyu_client.predict( | |
| image_in, # str representing input in 'raw_image' Image component | |
| True, # bool in 'Enable detailed captioning' Checkbox component | |
| fn_index=2 | |
| ) | |
| print(f"IMAGE CAPTION: {fuyu_result}") | |
| return fuyu_result | |
| import re | |
| import torch | |
| from transformers import pipeline | |
| pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto") | |
| agent_maker_sys = f""" | |
| You are an AI whose job is to help users create their own chatbot whose personality will reflect the character or scene from an image described by users. | |
| In particular, you need to respond succintly in a friendly tone, write a system prompt for an LLM, a catchy title for the chatbot, and a very short example user input. Make sure each part is included. | |
| The system prompt will not mention any image provided. | |
| For example, if a user says, "a picture of a man in a black suit and tie riding a black dragon", first do a friendly response, then add the title, system prompt, and example user input. | |
| Immediately STOP after the example input. It should be EXACTLY in this format: | |
| "Sure, I'd be happy to help you build a bot! I'm generating a title, system prompt, and an example input. How do they sound? | |
| Title: Dragon Trainer | |
| System prompt: You are a Dragon trainer and your job is to provide guidance and tips on mastering dragons. Use a friendly and informative tone. | |
| Example input: How can I train a dragon to breathe fire?" | |
| Here's another example. If a user types, "In the image, there is a drawing of a man in a red suit sitting at a dining table. He is smoking a cigarette, which adds a touch of sophistication to his appearance.", respond: | |
| "Sure, I'd be happy to help you build a bot! I'm generating a title, system prompt, and an example input. How do they sound? | |
| Title: Gentleman's Companion | |
| System prompt: Your a sophisticated old man, also know as the Gentleman's Companion. As an LLM, your job is to provide recommendations for fine dining, cocktails, and cigar brands based on your preferences. Use a sophisticated and refined tone. | |
| Example input: Can you suggest a good cigar brand for a man who enjoys smoking while dining in style?" | |
| """ | |
| instruction = f""" | |
| <|system|> | |
| {agent_maker_sys}</s> | |
| <|user|> | |
| """ | |
| def infer(image_in): | |
| gr.Info("Getting image caption with Fuyu...") | |
| user_prompt = get_caption(image_in) | |
| prompt = f"{instruction.strip()}\n{user_prompt}</s>" | |
| #print(f"PROMPT: {prompt}") | |
| gr.Info("Building a system according to the image caption ...") | |
| outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) | |
| pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>' | |
| cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL) | |
| print(f"SUGGESTED LLM: {cleaned_text}") | |
| return cleaned_text.lstrip("\n") | |
| title = f"LLM Agent from a Picture", | |
| description = f"Get a LLM system prompt from a picture so you can use it in <a href='https://huggingface.co/spaces/abidlabs/GPT-Baker'>GPT-Baker</a>." | |
| css = """ | |
| #col-container{ | |
| margin: 0 auto; | |
| max-width: 780px; | |
| text-align: left; | |
| } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.HTML(f""" | |
| <h2 style="text-align: center;">LLM Agent from a Picture</h2> | |
| <p style="text-align: center;">{description}</p> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_in = gr.Image( | |
| label = "Image reference", | |
| type = "filepath", | |
| elem_id = "image-in" | |
| ) | |
| submit_btn = gr.Button("Make LLM system from my pic !") | |
| with gr.Column(): | |
| result = gr.Textbox( | |
| label ="Suggested System", | |
| lines = 10, | |
| max_lines = 30, | |
| elem_id = "suggested-system-prompt" | |
| ) | |
| with gr.Row(): | |
| gr.Examples( | |
| examples = [ | |
| ["examples/ocean_poet.jpeg"], | |
| ["examples/winter_hiking.png"] | |
| ], | |
| fn = infer, | |
| inputs = [image_in], | |
| outputs = [result], | |
| cache_examples = True | |
| ) | |
| submit_btn.click( | |
| fn = infer, | |
| inputs = [ | |
| image_in | |
| ], | |
| outputs =[ | |
| result | |
| ] | |
| ) | |
| demo.queue().launch() |