fine tune error?
Thank you very much for the model.
I liked it and I would like to be able to do fine tune but I can't do it, could you please tell me what I'm doing or give me instructions to correct the error.
Thank you very much in advance if you could help me how I could do 'fine tune'.
code fine tune:
import torch
from import tqdm # Importa tqdm para la barra de progreso
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
for epoch in range(5):
print(f"Epoch: {epoch}")
# Inicializa la barra de progreso
progress_bar = tqdm(enumerate(train_dataloader), total=len(train_dataloader))
for idx, batch in progress_bar:
input_ids = batch['input_ids'].to(device) #batch.pop("input_ids").to(device)
image = batch['images'].to(device) #batch.pop("images").to(device)
attention_mask= batch['attention_mask'].to(device) #batch.pop("attention_mask").to(device)
# input_ids, attention_mask,images
print(f"Input IDs shape: {input_ids.shape}, Images shape: {image.shape}, Attention Mask shape: {attention_mask.shape}")
# outputs = model(input_ids=input_ids, images=image, labels=input_ids, attention_mask=attention_mask)
outputs = model(input_ids=input_ids, images=image, labels=input_ids, attention_mask=attention_mask)
loss = outputs.loss
# Actualización de la barra de progreso con la información de la pérdida
progress_bar.set_description(f"Loss: {loss.item():.4f}")
optimizer.zero_grad(set_to_none=True) # Optimiza el reseteo de gradientes
error fine tune:
Hello. Thank you for your interest in our model.
This model has a special prompt formatting rule. You should put " <image>"
text (space is intended) in front of prompt text.
Thank you very much for the quick answer.
following the instructions of a " <image>"
I attached an image of the prompt:
txt = f"""<im_start>system\nYou are a helpful assistant.<im_end>\n<im_start>user\n <image> describe image<im_end>\n<im_start>assistant\n{item["text"]}{self.processor.tokenizer.eos_token}"""
I attach the error when training:
code fine tune:
Hello. Processor tokenizes texts using chat_template, so there is no need to pass formatted text.
Generally speaking, current Processor doesn't suit well for training. You can tokenize dialog using processor.tokenizer and image using processor.feature_extractor. I think code of the gradio demo can be very useful for you
Following your example of 'space', I organized the code this way but I see that it doesn't work either, what am I doing wrong?
from transformers import AutoModel, AutoProcessor
import torch
from PIL import Image
model = AutoModel.from_pretrained("unum-cloud/uform-gen2-qwen-500m",
# torch_dtype=torch.bfloat16, #error
processor = AutoProcessor.from_pretrained("unum-cloud/uform-gen2-qwen-500m",
from datasets import load_dataset
dataset = load_dataset("ybelkada/football-dataset", split="train")
from datasets import load_dataset
from transformers import AutoProcessor
import torch
from PIL import Image
from import Dataset, DataLoader
# from transformers import AutoModel, AutoProcessor
device = "cuda:0" if torch.cuda.is_available() else "cpu"
class ImageCaptioningDataset(Dataset):
def __init__(self, dataset, processor):
self.dataset = dataset
self.processor = processor
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
item = self.dataset[idx]
messages = [{"role": "system", "content": "You are a helpful assistant."}]
# item["text"]' msg assistant describe imagen
assistant_msg = item["text"]
user_msg= " <image>describe image" # example message user
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# Asumiendo que 'item["image"]' es un objeto PIL.Image o una ruta a la imagen
if isinstance(item["image"], str):
image =["image"]) # Cargar la imagen desde una ruta de archivo
image = item["image"] # Si ya es un objeto PIL.Image
# image = image.convert("RGB") # Convert la image to RGB
# Procesamiento de la imagen
image = (self.processor.feature_extractor(image).unsqueeze(0))
# print("image:",image)
model_inputs = self.processor.tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
input_ids = model_inputs
# print("input_ids:",input_ids)
total_length = input_ids.shape[1] + self.processor.num_image_latents - 1
# print("input_ids.shape[1]",input_ids.shape[1],"self.processor.num_image_latents",self.processor.num_image_latents,"total_length:",total_length)
# attention_mask = torch.ones(1, model_inputs.shape[1] + processor.num_image_latents - 1)
attention_mask = torch.ones(1, total_length)
print("--->>attention_mask",attention_mask.shape) # torch.Size([1, 294])
# Preparar las entradas del modelo
model_inputs = {
"input_ids": input_ids,
"images": image,
"attention_mask": attention_mask
# model_inputs = {k: for k, v in model_inputs.items()}
return model_inputs
train_dataset = ImageCaptioningDataset(dataset, processor)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=1)
import torch
from import tqdm # Importa tqdm para la barra de progreso
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
for epoch in range(5):
print(f"Epoch: {epoch}")
# Inicializa la barra de progreso
progress_bar = tqdm(enumerate(train_dataloader), total=len(train_dataloader))
for idx, batch in progress_bar:
input_ids = batch['input_ids'].to(device) # batch.pop("input_ids")
images = batch['images'].to(device) # batch.pop("images")
attention_mask= batch['attention_mask'].to(device) # batch.pop("attention_mask")
print(f"Input IDs shape: {input_ids.shape}, Images shape: {images.shape}, Attention Mask shape: {attention_mask.shape}")
outputs = model(input_ids=input_ids, images=images, labels=input_ids, attention_mask=attention_mask)
loss = outputs.loss
# Actualización de la barra de progreso con la información de la pérdida
progress_bar.set_description(f"Loss: {loss.item():.4f}")
optimizer.zero_grad(set_to_none=True) # Optimiza el reseteo de gradientes
ValueError Traceback (most recent call last)
<ipython-input-18-1cd4f0a661d2> in <cell line: 10>()
21 print(f"Input IDs shape: {input_ids.shape}, Images shape: {images.shape}, Attention Mask shape: {attention_mask.shape}")
---> 22 outputs = model(input_ids=input_ids, images=images, labels=input_ids, attention_mask=attention_mask)
24 loss = outputs.loss
6 frames
~/.cache/huggingface/modules/transformers_modules/unum-cloud/uform-gen2-qwen-500m/3912572ad204f82a2b0f875d3a1700faaebab719/ in forward(self, x)
176 def forward(self, x: Tensor) -> Tensor:
--> 177 h, w = x.shape[2:]
178 x = self.patch_embed(x).flatten(start_dim=2).transpose(2, 1)
179 x = x + self.interpolate_pos_encoding(x, h, w)
ValueError: too many values to unpack (expected 2)
url space
Are there any updates?
hello. I am going to prepare a colab notebook with fine-tuning example, stay tuned
good news, thanks. compatible with peft , sft ?
Any news on this colab notebook?