Spaces:

Gradio-Blocks
/

clip-guided-faces

Runtime error

App Files Files Community

sxela commited on May 28, 2022

Commit

03667b4

1 Parent(s): d8edde0

bring back inits

Browse files

Files changed (1) hide show

app.py +25 -26

app.py CHANGED Viewed

@@ -118,26 +118,25 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
                                      std=[0.26862954, 0.26130258, 0.27577711])
-#def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompt):
     all_frames = []
     prompts = [text]
-    # if image_prompts:
-    #     image_prompts = [image_prompts.name]
-    # else:
-    #     image_prompts = []
     batch_size = 1
     clip_guidance_scale = clip_guidance_scale  # Controls how much the image should look like the prompt.
     tv_scale = tv_scale             # Controls the smoothness of the final output.
     range_scale = range_scale            # Controls how far out of range RGB values are allowed to be.
     cutn = cutn
     n_batches = 1
-    # if init_image:
-    #     init_image = init_image.name
-    # else:
-    #     init_image = None   # This can be an URL or Colab local path and must be in quotes.
     skip_timesteps = skip_timesteps  # This needs to be between approx. 200 and 500 when using an init image.
                         # Higher values make the output look more like the init.
-    # init_scale = init_scale      # This enhances the effect of the init image, a good value is 1000.
     seed = seed
     if seed is not None:
@@ -149,25 +148,25 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
         txt, weight = parse_prompt(prompt)
         target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
         weights.append(weight)
-    # for prompt in image_prompts:
-    #     path, weight = parse_prompt(prompt)
-    #     img = Image.open(fetch(path)).convert('RGB')
-    #     img = TF.resize(img, min(side_x, side_y, *img.size), transforms.InterpolationMode.LANCZOS)
-    #     batch = make_cutouts(TF.to_tensor(img).unsqueeze(0).to(device))
-    #     embed = clip_model.encode_image(normalize(batch)).float()
-    #     target_embeds.append(embed)
-    #     weights.extend([weight / cutn] * cutn)
     target_embeds = torch.cat(target_embeds)
     weights = torch.tensor(weights, device=device)
     if weights.sum().abs() < 1e-3:
         raise RuntimeError('The weights must not sum to 0.')
     weights /= weights.sum().abs()
     init = None
-    # if init_image is not None:
-    #     lpips_model = lpips.LPIPS(net='vgg').to(device)
-    #     init = Image.open(fetch(init_image)).convert('RGB')
-    #     init = init.resize((side_x, side_y), Image.LANCZOS)
-    #     init = TF.to_tensor(init).to(device).unsqueeze(0).mul(2).sub(1)
     cur_t = None
     def cond_fn(x, t, y=None):
         with torch.enable_grad():
@@ -185,10 +184,10 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
             tv_losses = tv_loss(x_in)
             range_losses = range_loss(out['pred_xstart'])
             loss = losses.sum() * clip_guidance_scale + tv_losses.sum() * tv_scale + range_losses.sum() * range_scale
-            # if init is not None and init_scale:
-            #     init_losses = lpips_model(x_in, init)
-            #     loss = loss + init_losses.sum() * init_scale
             return -torch.autograd.grad(loss, x)[0]
     if model_config['timestep_respacing'].startswith('ddim'):
         sample_fn = diffusion.ddim_sample_loop_progressive

                                      std=[0.26862954, 0.26130258, 0.27577711])
     all_frames = []
     prompts = [text]
+    if image_prompts:
+        image_prompts = [image_prompts.name]
+    else:
+        image_prompts = []
     batch_size = 1
     clip_guidance_scale = clip_guidance_scale  # Controls how much the image should look like the prompt.
     tv_scale = tv_scale             # Controls the smoothness of the final output.
     range_scale = range_scale            # Controls how far out of range RGB values are allowed to be.
     cutn = cutn
     n_batches = 1
+    if init_image:
+        init_image = init_image.name
+    else:
+        init_image = None   # This can be an URL or Colab local path and must be in quotes.
     skip_timesteps = skip_timesteps  # This needs to be between approx. 200 and 500 when using an init image.
                         # Higher values make the output look more like the init.
+    init_scale = init_scale      # This enhances the effect of the init image, a good value is 1000.
     seed = seed
     if seed is not None:
         txt, weight = parse_prompt(prompt)
         target_embeds.append(clip_model.encode_text(clip.tokenize(txt).to(device)).float())
         weights.append(weight)
+    for prompt in image_prompts:
+        path, weight = parse_prompt(prompt)
+        img = Image.open(fetch(path)).convert('RGB')
+        img = TF.resize(img, min(side_x, side_y, *img.size), transforms.InterpolationMode.LANCZOS)
+        batch = make_cutouts(TF.to_tensor(img).unsqueeze(0).to(device))
+        embed = clip_model.encode_image(normalize(batch)).float()
+        target_embeds.append(embed)
+        weights.extend([weight / cutn] * cutn)
     target_embeds = torch.cat(target_embeds)
     weights = torch.tensor(weights, device=device)
     if weights.sum().abs() < 1e-3:
         raise RuntimeError('The weights must not sum to 0.')
     weights /= weights.sum().abs()
     init = None
+    if init_image is not None:
+        lpips_model = lpips.LPIPS(net='vgg').to(device)
+        init = Image.open(fetch(init_image)).convert('RGB')
+        init = init.resize((side_x, side_y), Image.LANCZOS)
+        init = TF.to_tensor(init).to(device).unsqueeze(0).mul(2).sub(1)
     cur_t = None
     def cond_fn(x, t, y=None):
         with torch.enable_grad():
             tv_losses = tv_loss(x_in)
             range_losses = range_loss(out['pred_xstart'])
             loss = losses.sum() * clip_guidance_scale + tv_losses.sum() * tv_scale + range_losses.sum() * range_scale
+            if init is not None and init_scale:
+                init_losses = lpips_model(x_in, init)
+                loss = loss + init_losses.sum() * init_scale
             return -torch.autograd.grad(loss, x)[0]
     if model_config['timestep_respacing'].startswith('ddim'):
         sample_fn = diffusion.ddim_sample_loop_progressive