Update modeling_internvl_chat.py
Browse files
modeling_internvl_chat.py
CHANGED
|
@@ -124,7 +124,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
| 124 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 125 |
|
| 126 |
image_flags = image_flags.squeeze(-1)
|
| 127 |
-
input_embeds = self.language_model.get_input_embeddings()(input_ids)
|
| 128 |
|
| 129 |
vit_embeds = self.extract_feature(pixel_values)
|
| 130 |
vit_embeds = vit_embeds[image_flags == 1]
|
|
@@ -334,7 +334,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
| 334 |
vit_embeds = visual_features
|
| 335 |
else:
|
| 336 |
vit_embeds = self.extract_feature(pixel_values)
|
| 337 |
-
input_embeds = self.language_model.get_input_embeddings()(input_ids)
|
| 338 |
B, N, C = input_embeds.shape
|
| 339 |
input_embeds = input_embeds.reshape(B * N, C)
|
| 340 |
|
|
@@ -345,7 +345,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
| 345 |
|
| 346 |
input_embeds = input_embeds.reshape(B, N, C)
|
| 347 |
else:
|
| 348 |
-
input_embeds = self.language_model.get_input_embeddings()(input_ids)
|
| 349 |
|
| 350 |
outputs = self.language_model.generate(
|
| 351 |
inputs_embeds=input_embeds,
|
|
|
|
| 124 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 125 |
|
| 126 |
image_flags = image_flags.squeeze(-1)
|
| 127 |
+
input_embeds = self.language_model.module.get_input_embeddings()(input_ids)
|
| 128 |
|
| 129 |
vit_embeds = self.extract_feature(pixel_values)
|
| 130 |
vit_embeds = vit_embeds[image_flags == 1]
|
|
|
|
| 334 |
vit_embeds = visual_features
|
| 335 |
else:
|
| 336 |
vit_embeds = self.extract_feature(pixel_values)
|
| 337 |
+
input_embeds = self.language_model.module.get_input_embeddings()(input_ids)
|
| 338 |
B, N, C = input_embeds.shape
|
| 339 |
input_embeds = input_embeds.reshape(B * N, C)
|
| 340 |
|
|
|
|
| 345 |
|
| 346 |
input_embeds = input_embeds.reshape(B, N, C)
|
| 347 |
else:
|
| 348 |
+
input_embeds = self.language_model.module.get_input_embeddings()(input_ids)
|
| 349 |
|
| 350 |
outputs = self.language_model.generate(
|
| 351 |
inputs_embeds=input_embeds,
|