Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import numpy as np | |
| import random | |
| from PIL import Image | |
| import os | |
| import spaces | |
| from diffusers import StableDiffusion3Pipeline | |
| import torch | |
| from peft import PeftModel | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model_repo_id = "frankjoshua/stable-diffusion-3.5-medium" | |
| if torch.cuda.is_available(): | |
| torch_dtype = torch.float16 | |
| else: | |
| torch_dtype = torch.float32 | |
| pipe = StableDiffusion3Pipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype) | |
| MAX_SEED = np.iinfo(np.int32).max | |
| MAX_IMAGE_SIZE = 1024 | |
| lora_models = { | |
| "None": None, | |
| "GenEval": "jieliu/SD3.5M-FlowGRPO-GenEval", | |
| "Text Rendering": "jieliu/SD3.5M-FlowGRPO-Text", | |
| "Human Prefer": "jieliu/SD3.5M-FlowGRPO-PickScore", | |
| } | |
| lora_prompts = { | |
| "GenEval": os.path.join(os.getcwd(), "prompts/geneval.txt"), | |
| "Text Rendering": os.path.join(os.getcwd(), "prompts/ocr.txt"), | |
| "Human Prefer": os.path.join(os.getcwd(), "prompts/pickscore.txt"), | |
| } | |
| pipe.transformer = PeftModel.from_pretrained(pipe.transformer, lora_models["GenEval"], adapter_name="GenEval") | |
| pipe.transformer.load_adapter(lora_models["Text Rendering"], adapter_name="Text Rendering") | |
| pipe.transformer.load_adapter(lora_models["Human Prefer"], adapter_name="Human Prefer") | |
| pipe = pipe.to(device) | |
| # COUNTER_FILE = os.path.join(os.getcwd(),"model_call_counter.txt") | |
| COUNTER_FILE = os.path.join("/data/model_call_counter.txt") | |
| def get_call_count(): | |
| if not os.path.exists(COUNTER_FILE): | |
| return 0 | |
| try: | |
| with open(COUNTER_FILE, 'r') as f: | |
| return int(f.read().strip()) | |
| except: | |
| return 0 | |
| def update_call_count(): | |
| count = get_call_count() + 1 | |
| with open(COUNTER_FILE, 'w') as f: | |
| f.write(str(count)) | |
| return count | |
| def sample_prompt(lora_model): | |
| if lora_model in lora_models and lora_model != "None": | |
| file_path = f"{lora_prompts[lora_model]}" | |
| try: | |
| with open(file_path, 'r') as file: | |
| prompts = file.readlines() | |
| if lora_model=='GenEval': | |
| total_lines = len(prompts) | |
| if total_lines > 0: | |
| weights = [1/(i+1) for i in range(total_lines)] | |
| sum_weights = sum(weights) | |
| normalized_weights = [w/sum_weights for w in weights] | |
| return random.choices(prompts, weights=normalized_weights, k=1)[0].strip() | |
| return "No prompts found in file." | |
| else: | |
| return random.choice(prompts).strip() | |
| except FileNotFoundError: | |
| return "Prompt file not found." | |
| return "" | |
| def create_grid_image(images): | |
| # Create a 2x2 grid from the 4 images | |
| width, height = images[0].size | |
| grid_image = Image.new('RGB', (width * 2, height * 2)) | |
| # Paste images in a 2x2 grid | |
| grid_image.paste(images[0], (0, 0)) | |
| grid_image.paste(images[1], (width, 0)) | |
| grid_image.paste(images[2], (0, height)) | |
| grid_image.paste(images[3], (width, height)) | |
| return grid_image | |
| def infer( | |
| prompt, | |
| seed, | |
| randomize_seed, | |
| width, | |
| height, | |
| guidance_scale, | |
| num_inference_steps, | |
| lora_model, | |
| progress=gr.Progress(track_tqdm=True), | |
| ): | |
| call_count = update_call_count() | |
| images = [] | |
| seeds = [] | |
| # Generate 4 images | |
| for i in range(4): | |
| if randomize_seed: | |
| current_seed = random.randint(0, MAX_SEED) | |
| else: | |
| current_seed = seed + i # Use sequential seeds if not randomizing | |
| seeds.append(current_seed) | |
| generator = torch.Generator().manual_seed(current_seed) | |
| sampled_prompt = sample_prompt(lora_model) | |
| final_prompt = prompt if prompt else sampled_prompt | |
| if lora_model == "None": | |
| with pipe.transformer.disable_adapter(): | |
| image = pipe( | |
| prompt=final_prompt, | |
| negative_prompt="", | |
| guidance_scale=guidance_scale, | |
| num_inference_steps=num_inference_steps, | |
| width=width, | |
| height=height, | |
| generator=generator, | |
| ).images[0] | |
| else: | |
| pipe.transformer.set_adapter(lora_model) | |
| image = pipe( | |
| prompt=final_prompt, | |
| negative_prompt="", | |
| guidance_scale=guidance_scale, | |
| num_inference_steps=num_inference_steps, | |
| width=width, | |
| height=height, | |
| generator=generator, | |
| ).images[0] | |
| images.append(image) | |
| # Create a 2x2 grid from the 4 images | |
| grid_image = create_grid_image(images) | |
| return grid_image, ", ".join(map(str, seeds)), f"Model has been called {call_count} times" | |
| css = """ | |
| #col-container { | |
| margin: 0 auto; | |
| max-width: 640px; | |
| } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown(""" | |
| # SD3.5 Medium + Flow-GRPO | |
| Our model is trained separately for different tasks, so it’s best to use the corresponding prompt format for each task. | |
| **User Guide:** | |
| 1. Select a LoRA model (choose “None” to use the base model) | |
| 2. Click “Sample Prompt” to randomly select from ~1000 task-specific prompts, or write your own | |
| 3. Click “Run” to generate images (a 2×2 grid of 4 images will be produced) | |
| **Note:** | |
| - For the *Text Rendering* task, please enclose the text to be displayed in **double quotes (`"`)**, not single quotes (`'`) | |
| """) | |
| with gr.Row(): | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| show_label=False, | |
| max_lines=1, | |
| placeholder="Enter your prompt", | |
| container=False, | |
| ) | |
| with gr.Row(): | |
| lora_model = gr.Dropdown( | |
| label="LoRA Model", | |
| choices=list(lora_models.keys()), | |
| value="GenEval" | |
| ) | |
| sample_prompt_button = gr.Button("Sample Prompt", scale=0, variant="secondary") | |
| def update_sampled_prompt(lora_model): | |
| return sample_prompt(lora_model) | |
| sample_prompt_button.click( | |
| fn=update_sampled_prompt, | |
| inputs=[lora_model], | |
| outputs=[prompt] | |
| ) | |
| run_button = gr.Button("Run", scale=0, variant="primary") | |
| result = gr.Image(label="Results (2x2 Grid)", show_label=True) | |
| seed_display = gr.Textbox(label="Seeds Used", show_label=True) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| seed = gr.Slider( | |
| label="Starting Seed", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=0, | |
| ) | |
| randomize_seed = gr.Checkbox(label="Randomize seeds", value=True) | |
| with gr.Row(): | |
| width = gr.Slider( | |
| label="Width", | |
| minimum=256, | |
| maximum=MAX_IMAGE_SIZE, | |
| step=32, | |
| value=512, # Replace with defaults that work for your model | |
| ) | |
| height = gr.Slider( | |
| label="Height", | |
| minimum=256, | |
| maximum=MAX_IMAGE_SIZE, | |
| step=32, | |
| value=512, # Replace with defaults that work for your model | |
| ) | |
| with gr.Row(): | |
| guidance_scale = gr.Slider( | |
| label="Guidance scale", | |
| minimum=0.0, | |
| maximum=10.0, | |
| step=0.1, | |
| value=4.5, # Replace with defaults that work for your model | |
| ) | |
| num_inference_steps = gr.Slider( | |
| label="Number of inference steps", | |
| minimum=1, | |
| maximum=50, | |
| step=1, | |
| value=40, # Replace with defaults that work for your model | |
| ) | |
| call_count_display = gr.Textbox( | |
| label="Model Call Count", | |
| value=f"Model has been called {get_call_count()} times", | |
| interactive=False | |
| ) | |
| gr.on( | |
| triggers=[run_button.click, prompt.submit], | |
| fn=infer, | |
| inputs=[ | |
| prompt, | |
| seed, | |
| randomize_seed, | |
| width, | |
| height, | |
| guidance_scale, | |
| num_inference_steps, | |
| lora_model, | |
| ], | |
| outputs=[result, seed_display, call_count_display], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |