Spaces:
Runtime error
Runtime error
Add stable diffusion for compositional generation.
Browse files
app.py
CHANGED
@@ -25,6 +25,10 @@ from composable_diffusion.model_creation import model_and_diffusion_defaults as
|
|
25 |
|
26 |
|
27 |
from PIL import Image
|
|
|
|
|
|
|
|
|
28 |
# This notebook supports both CPU and GPU.
|
29 |
# On CPU, generating one sample may take on the order of 20 minutes.
|
30 |
# On a GPU, it should be under a minute.
|
@@ -33,6 +37,12 @@ has_cuda = th.cuda.is_available()
|
|
33 |
device = th.device('cpu' if not has_cuda else 'cuda')
|
34 |
print(device)
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
# Create base model.
|
37 |
timestep_respacing = 100 # @param{type: 'number'}
|
38 |
options = model_and_diffusion_defaults()
|
@@ -276,9 +286,17 @@ def compose_clevr_objects(prompt, guidance_scale):
|
|
276 |
return out_img
|
277 |
|
278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
def compose(prompt, version, guidance_scale):
|
280 |
if version == 'GLIDE':
|
281 |
return compose_language_descriptions(prompt, guidance_scale)
|
|
|
|
|
282 |
else:
|
283 |
return compose_clevr_objects(prompt, guidance_scale)
|
284 |
|
@@ -286,14 +304,15 @@ def compose(prompt, version, guidance_scale):
|
|
286 |
examples_1 = 'a camel | a forest'
|
287 |
examples_2 = 'A cloudy blue sky | A mountain in the horizon | Cherry Blossoms in front of the mountain'
|
288 |
examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
|
289 |
-
|
|
|
290 |
|
291 |
import gradio as gr
|
292 |
|
293 |
title = 'Compositional Visual Generation with Composable Diffusion Models'
|
294 |
-
description = '<p>Demo for Composable Diffusion<ul><li>~30s per GLIDE example</li><li>~10s per CLEVR Object example</li>(<b>Note</b>: time is
|
295 |
|
296 |
-
iface = gr.Interface(compose, inputs=["text", gr.Radio(['GLIDE', 'CLEVR Objects'], type="value", label='version'), gr.Slider(
|
297 |
title=title, description=description, examples=examples)
|
298 |
|
299 |
-
iface.launch()
|
|
|
25 |
|
26 |
|
27 |
from PIL import Image
|
28 |
+
|
29 |
+
from torch import autocast
|
30 |
+
from diffusers import StableDiffusionPipeline
|
31 |
+
|
32 |
# This notebook supports both CPU and GPU.
|
33 |
# On CPU, generating one sample may take on the order of 20 minutes.
|
34 |
# On a GPU, it should be under a minute.
|
|
|
37 |
device = th.device('cpu' if not has_cuda else 'cuda')
|
38 |
print(device)
|
39 |
|
40 |
+
# iniatilize stable diffusion model
|
41 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
42 |
+
"CompVis/stable-diffusion-v1-4",
|
43 |
+
use_auth_token=True
|
44 |
+
).to(device)
|
45 |
+
|
46 |
# Create base model.
|
47 |
timestep_respacing = 100 # @param{type: 'number'}
|
48 |
options = model_and_diffusion_defaults()
|
|
|
286 |
return out_img
|
287 |
|
288 |
|
289 |
+
def stable_diffusion_compose(prompt, scale):
|
290 |
+
with autocast('cpu' if not has_cuda else 'cuda'):
|
291 |
+
image = pipe(prompt, guidance_scale=scale)["sample"][0]
|
292 |
+
return image
|
293 |
+
|
294 |
+
|
295 |
def compose(prompt, version, guidance_scale):
|
296 |
if version == 'GLIDE':
|
297 |
return compose_language_descriptions(prompt, guidance_scale)
|
298 |
+
elif version == 'Stable_Diffusion_1v_4':
|
299 |
+
return stable_diffusion_compose(prompt, guidance_scale)
|
300 |
else:
|
301 |
return compose_clevr_objects(prompt, guidance_scale)
|
302 |
|
|
|
304 |
examples_1 = 'a camel | a forest'
|
305 |
examples_2 = 'A cloudy blue sky | A mountain in the horizon | Cherry Blossoms in front of the mountain'
|
306 |
examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
|
307 |
+
examples_4 = 'a river leading into a mountain | red trees on the side'
|
308 |
+
examples = [[examples_1, 'GLIDE', 10], [examples_4, 'Stable_Diffusion_1v_4', 10], [examples_2, 'GLIDE', 10], [examples_3, 'CLEVR Objects', 10]]
|
309 |
|
310 |
import gradio as gr
|
311 |
|
312 |
title = 'Compositional Visual Generation with Composable Diffusion Models'
|
313 |
+
description = '<p>Demo for Composable Diffusion<ul><li>~30s per GLIDE/Stable-Diffusion example</li><li>~10s per CLEVR Object example</li>(<b>Note</b>: time is varied depending on what gpu is used.)</ul></p><p>See more information from our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</p><ul><li>One version is based on the released <a href="https://github.com/openai/glide-text2im">GLIDE</a> and <a href="https://github.com/CompVis/stable-diffusion/">Stable Diffusion</a> for composing natural language description.</li><li>Another is based on our pre-trained CLEVR Object Model for composing objects. <br>(<b>Note</b>: We recommend using <b><i>x</i></b> in range <b><i>[0.1, 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in given ranges.)</li></ul><p>When composing multiple sentences, use `|` as the delimiter, see given examples below.</p>'
|
314 |
|
315 |
+
iface = gr.Interface(compose, inputs=["text", gr.Radio(['Stable_Diffusion_1v_4', 'GLIDE', 'CLEVR Objects'], type="value", label='version'), gr.Slider(2, 20)], outputs='image',
|
316 |
title=title, description=description, examples=examples)
|
317 |
|
318 |
+
iface.launch()
|