hpcai-tech
/

OpenSora-STDiT-v1-HQ-16x512x512

Feature Extraction

Model card Files Files and versions Community

frankleeeee commited on Mar 22, 2024

Commit

3065798

·

verified ·

1 Parent(s): f4ad4f1

Upload STDiT

Files changed (2) hide show

config.json +2 -1
modeling_stdit.py +3 -2

config.json CHANGED Viewed

@@ -11,6 +11,7 @@
   "depth": 28,
   "drop_path": 0.0,
   "enable_flash_attn": false,
   "enable_layernorm_kernel": false,
   "enable_sequence_parallelism": false,
   "freeze": null,
@@ -32,7 +33,7 @@
     2
   ],
   "pred_sigma": true,
-  "space_scale": 0.5,
   "time_scale": 1.0,
   "torch_dtype": "float32",
   "transformers_version": "4.38.2"

   "depth": 28,
   "drop_path": 0.0,
   "enable_flash_attn": false,
+  "enable_flashattn": false,
   "enable_layernorm_kernel": false,
   "enable_sequence_parallelism": false,
   "freeze": null,
     2
   ],
   "pred_sigma": true,
+  "space_scale": 1.0,
   "time_scale": 1.0,
   "torch_dtype": "float32",
   "transformers_version": "4.38.2"

modeling_stdit.py CHANGED Viewed

@@ -112,7 +112,7 @@ class STDiT(PreTrainedModel):
         x = x.to(self.final_layer.linear.weight.dtype)
         timestep = timestep.to(self.final_layer.linear.weight.dtype)
         y = y.to(self.final_layer.linear.weight.dtype)
         # embedding
         x = self.x_embedder(x)  # [B, N, C]
         x = rearrange(x, "B (T S) C -> B T S C", T=self.num_temporal, S=self.num_spatial)
@@ -148,7 +148,8 @@ class STDiT(PreTrainedModel):
                     tpe = self.pos_embed_temporal
             else:
                 tpe = None
-            x = auto_grad_checkpoint(block, x, y, t0, y_lens, tpe)
         if self.enable_sequence_parallelism:
             x = gather_forward_split_backward(x, get_sequence_parallel_group(), dim=1, grad_scale="up")

         x = x.to(self.final_layer.linear.weight.dtype)
         timestep = timestep.to(self.final_layer.linear.weight.dtype)
         y = y.to(self.final_layer.linear.weight.dtype)
         # embedding
         x = self.x_embedder(x)  # [B, N, C]
         x = rearrange(x, "B (T S) C -> B T S C", T=self.num_temporal, S=self.num_spatial)
                     tpe = self.pos_embed_temporal
             else:
                 tpe = None
+            x = block(x, y, t0, y_lens, tpe)
+            # x = auto_grad_checkpoint(block, x, y, t0, y_lens, tpe)
         if self.enable_sequence_parallelism:
             x = gather_forward_split_backward(x, get_sequence_parallel_group(), dim=1, grad_scale="up")