Update modeling_prismatic.py to account for the case where `input_ids` is `None `

Input Ids and Input Embeds are both marked `Optional[torch.LongTensor] = None,` however failing to pass in `input_ids` into the `forward()` method results in an error in the first block, since the code automatically checks if `input_ids.shape[1] == 1` without first checking to see if `input_ids is not None`.

This pull request updates the logic to allow for this case in Generation with Cache and Multimodal Forward.

Files changed (1) hide show

modeling_prismatic.py +5 -2

modeling_prismatic.py CHANGED Viewed

@@ -322,7 +322,7 @@ class PrismaticForConditionalGeneration(PrismaticPreTrainedModel):
         #   => Multimodal Forward :: (pixel_values is not None) and (input_ids/embeds.shape[0] == pixel_values.shape[0])
         # === Handle Generation with Cache (`input_ids.shape[1] == 1`) =>> requires `past_keys_values` ===
-        if input_ids.shape[1] == 1:
             assert input_ids.shape[0] == 1, "Generation is only currently supported for batch size of 1!"
             assert past_key_values is not None, "You must provide `past_key_values` during cached generation!"
             assert labels is None, "Unexpected key `labels` provided during cached generation!"
@@ -359,7 +359,10 @@ class PrismaticForConditionalGeneration(PrismaticPreTrainedModel):
             )
         # === Handle Multimodal Forward ===
-        elif (input_ids.shape[0] == pixel_values.shape[0]) or (inputs_embeds.shape[0] == pixel_values.shape[0]):
             assert past_key_values is None, "Unexpected key `past_key_values` provided during language-only forward!"
             # Visual Feature Extraction

         #   => Multimodal Forward :: (pixel_values is not None) and (input_ids/embeds.shape[0] == pixel_values.shape[0])
         # === Handle Generation with Cache (`input_ids.shape[1] == 1`) =>> requires `past_keys_values` ===
+        if input_ids is not None and input_ids.shape[1] == 1:
             assert input_ids.shape[0] == 1, "Generation is only currently supported for batch size of 1!"
             assert past_key_values is not None, "You must provide `past_key_values` during cached generation!"
             assert labels is None, "Unexpected key `labels` provided during cached generation!"
             )
         # === Handle Multimodal Forward ===
+        elif (
+                (input_ids is not None and input_ids.shape[0] == pixel_values.shape[0]) or
+                (inputs_embeds is not None and inputs_embeds.shape[0] == pixel_values.shape[0])
+            ):
             assert past_key_values is None, "Unexpected key `past_key_values` provided during language-only forward!"
             # Visual Feature Extraction