Unable to fine-tune biogpt for ner task
Hi I was using run_ner.py script It showed an error that it doesn't support model which have slow tokenizer and then they suggested that try to run old run_ner.py script which is this (https://github.com/huggingface/transformers/tree/main/examples/legacy/token-classification), Now I am getting an error
/content/transformers/examples/legacy/token-classification/run_ner.py:327 in β
β β
β β
β 324 β
β 325 β
β 326 if name == "main": β
β β± 327 β main() β
β 328 β
β β
β /content/transformers/examples/legacy/token-classification/run_ner.py:262 in β
β main β
β β
β 259 β β
β 260 β # Training β
β 261 β if training_args.do_train: β
β β± 262 β β trainer.train( β
β 263 β β β model_path=model_args.model_name_or_path if os.path.isdir( β
β 264 β β ) β
β 265 β β trainer.save_model() β
β β
β /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1536 in β
β train β
β β
β 1533 β β inner_training_loop = find_executable_batch_size( β
β 1534 β β β self._inner_training_loop, self._train_batch_size, args.a β
β 1535 β β ) β
β β± 1536 β β return inner_training_loop( β
β 1537 β β β args=args, β
β 1538 β β β resume_from_checkpoint=resume_from_checkpoint, β
β 1539 β β β trial=trial, β
β β
β /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1779 in β
β _inner_training_loop β
β β
β 1776 β β β β rng_to_sync = True β
β 1777 β β β β
β 1778 β β β step = -1 β
β β± 1779 β β β for step, inputs in enumerate(epoch_iterator): β
β 1780 β β β β total_batched_samples += 1 β
β 1781 β β β β if rng_to_sync: β
β 1782 β β β β β self._load_rng_state(resume_from_checkpoint) β
β β
β /usr/local/lib/python3.10/dist-packages/accelerate/data_loader.py:377 in β
β iter β
β β
β 374 β β dataloader_iter = super().iter() β
β 375 β β # We iterate one batch ahead to check when we are at the end β
β 376 β β try: β
β β± 377 β β β current_batch = next(dataloader_iter) β
β 378 β β except StopIteration: β
β 379 β β β yield β
β 380 β
β β
β /usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:633 β
β in next β
β β
β 630 β β β if self._sampler_iter is None: β
β 631 β β β β # TODO(https://github.com/pytorch/pytorch/issues/7675 β
β 632 β β β β self._reset() # type: ignore[call-arg] β
β β± 633 β β β data = self._next_data() β
β 634 β β β self._num_yielded += 1 β
β 635 β β β if self._dataset_kind == _DatasetKind.Iterable and \ β
β 636 β β β β β self._IterableDataset_len_called is not None and β
β β
β /usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:677 β
β in _next_data β
β β
β 674 β β
β 675 β def _next_data(self): β
β 676 β β index = self._next_index() # may raise StopIteration β
β β± 677 β β data = self._dataset_fetcher.fetch(index) # may raise StopIt β
β 678 β β if self._pin_memory: β
β 679 β β β data = _utils.pin_memory.pin_memory(data, self._pin_memor β
β 680 β β return data β
β β
β /usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py:54 β
β in fetch β
β β
β 51 β β β β data = [self.dataset[idx] for idx in possibly_batched_i β
β 52 β β else: β
β 53 β β β data = self.dataset[possibly_batched_index] β
β β± 54 β β return self.collate_fn(data) β
β 55 β
β β
β /usr/local/lib/python3.10/dist-packages/transformers/trainer_utils.py:698 in β
β call β
β β
β 695 β β
β 696 β def call(self, features: List[dict]): β
β 697 β β features = [self._remove_columns(feature) for feature in featu β
β β± 698 β β return self.data_collator(features) β
β 699 β
β β
β /usr/local/lib/python3.10/dist-packages/transformers/data/data_collator.py:7 β
β 0 in default_data_collator β
β β
β 67 β # on the whole batch. β
β 68 β β
β 69 β if return_tensors == "pt": β
β β± 70 β β return torch_default_data_collator(features) β
β 71 β elif return_tensors == "tf": β
β 72 β β return tf_default_data_collator(features) β
β 73 β elif return_tensors == "np": β
β β
β /usr/local/lib/python3.10/dist-packages/transformers/data/data_collator.py:1 β
β 36 in torch_default_data_collator β
β β
β 133 β β β elif isinstance(v, np.ndarray): β
β 134 β β β β batch[k] = torch.tensor(np.stack([f[k] for f in featu β
β 135 β β β else: β
β β± 136 β β β β batch[k] = torch.tensor([f[k] for f in features]) β
β 137 β β
β 138 β return batch β
β 139 β
β°βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ―
RuntimeError: Could not infer dtype of NoneType