Spaces:

KingNish
/

Realtime-FLUX

Running on Zero

App Files Files Community

KingNish commited on Sep 19, 2024

Commit

03e077d

verified ·

1 Parent(s): deb928c

Update custom_pipeline.py

Browse files

Files changed (1) hide show

custom_pipeline.py +4 -21

custom_pipeline.py CHANGED Viewed

@@ -42,7 +42,7 @@ def prepare_timesteps(
     return timesteps, num_inference_steps
 # FLUX pipeline function
-class FLUXPipelineWithIntermediateOutputs(FluxPipeline):
     """
     Extends the FluxPipeline to yield intermediate images during the denoising process
     with progressively increasing resolution for faster generation.
@@ -56,7 +56,6 @@ class FLUXPipelineWithIntermediateOutputs(FluxPipeline):
         width: Optional[int] = None,
         num_inference_steps: int = 4,
         timesteps: List[int] = None,
-        guidance_scale: float = 3.5,
         num_images_per_prompt: Optional[int] = 1,
         generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
         latents: Optional[torch.FloatTensor] = None,
@@ -64,8 +63,7 @@ class FLUXPipelineWithIntermediateOutputs(FluxPipeline):
         pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
-        joint_attention_kwargs: Optional[Dict[str, Any]] = None,
-        max_sequence_length: int = 300,
     ):
         """Generates images and yields intermediate results during the denoising process."""
         height = height or self.default_sample_size * self.vae_scale_factor
@@ -82,16 +80,10 @@ class FLUXPipelineWithIntermediateOutputs(FluxPipeline):
             max_sequence_length=max_sequence_length,
         )
-        self._guidance_scale = guidance_scale
-        self._joint_attention_kwargs = joint_attention_kwargs
-        self._interrupt = False
         # 2. Define call parameters
         batch_size = 1 if isinstance(prompt, str) else len(prompt)
         device = self._execution_device
-        # 3. Encode prompt
-        lora_scale = joint_attention_kwargs.get("scale", None) if joint_attention_kwargs is not None else None
         prompt_embeds, pooled_prompt_embeds, text_ids = self.encode_prompt(
             prompt=prompt,
             prompt_2=prompt_2,
@@ -100,7 +92,6 @@ class FLUXPipelineWithIntermediateOutputs(FluxPipeline):
             device=device,
             num_images_per_prompt=num_images_per_prompt,
             max_sequence_length=max_sequence_length,
-            lora_scale=lora_scale,
         )
         # 4. Prepare latent variables
         num_channels_latents = self.transformer.config.in_channels // 4
@@ -128,29 +119,21 @@ class FLUXPipelineWithIntermediateOutputs(FluxPipeline):
         )
         self._num_timesteps = len(timesteps)
-        # Handle guidance
-        guidance = torch.full([1], guidance_scale, device=device, dtype=torch.float16).expand(latents.shape[0]) if self.transformer.config.guidance_embeds else None
         # 6. Denoising loop
         for i, t in enumerate(timesteps):
-            if self.interrupt:
-                continue
             timestep = t.expand(latents.shape[0]).to(latents.dtype)
             noise_pred = self.transformer(
                 hidden_states=latents,
                 timestep=timestep / 1000,
-                guidance=guidance,
                 pooled_projections=pooled_prompt_embeds,
                 encoder_hidden_states=prompt_embeds,
                 txt_ids=text_ids,
                 img_ids=latent_image_ids,
-                joint_attention_kwargs=self.joint_attention_kwargs,
                 return_dict=False,
             )[0]
-             # Yield intermediate result
             latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
             torch.cuda.empty_cache()
@@ -165,4 +148,4 @@ class FLUXPipelineWithIntermediateOutputs(FluxPipeline):
         latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
         latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
         image = vae.decode(latents, return_dict=False)[0]
-        return self.image_processor.postprocess(image, output_type=output_type)[0]

     return timesteps, num_inference_steps
 # FLUX pipeline function
+class HighSpeedFluxPipeline(FluxPipeline):
     """
     Extends the FluxPipeline to yield intermediate images during the denoising process
     with progressively increasing resolution for faster generation.
         width: Optional[int] = None,
         num_inference_steps: int = 4,
         timesteps: List[int] = None,
         num_images_per_prompt: Optional[int] = 1,
         generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
         latents: Optional[torch.FloatTensor] = None,
         pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
+        max_sequence_length: int = 128,
     ):
         """Generates images and yields intermediate results during the denoising process."""
         height = height or self.default_sample_size * self.vae_scale_factor
             max_sequence_length=max_sequence_length,
         )
         # 2. Define call parameters
         batch_size = 1 if isinstance(prompt, str) else len(prompt)
         device = self._execution_device
         prompt_embeds, pooled_prompt_embeds, text_ids = self.encode_prompt(
             prompt=prompt,
             prompt_2=prompt_2,
             device=device,
             num_images_per_prompt=num_images_per_prompt,
             max_sequence_length=max_sequence_length,
         )
         # 4. Prepare latent variables
         num_channels_latents = self.transformer.config.in_channels // 4
         )
         self._num_timesteps = len(timesteps)
         # 6. Denoising loop
         for i, t in enumerate(timesteps):
             timestep = t.expand(latents.shape[0]).to(latents.dtype)
             noise_pred = self.transformer(
                 hidden_states=latents,
                 timestep=timestep / 1000,
                 pooled_projections=pooled_prompt_embeds,
                 encoder_hidden_states=prompt_embeds,
                 txt_ids=text_ids,
                 img_ids=latent_image_ids,
                 return_dict=False,
             )[0]
             latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
             torch.cuda.empty_cache()
         latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
         latents = (latents / vae.config.scaling_factor) + vae.config.shift_factor
         image = vae.decode(latents, return_dict=False)[0]
+        return self.image_processor.postprocess(image, output_type=output_type)[0]