transformers load error

#2
by sh2orc - opened

In vllm, it runs normally as a compatable server. However, when loading through the transformers==4.52.3 library, an error occurs. Do you know how to solve this problem?

vllm==0.8.5

INFO 05-23 12:48:34 [api_server.py:1090] Starting vLLM API server on http://0.0.0.0:3000
INFO 05-23 12:48:34 [launcher.py:28] Available routes are:
INFO 05-23 12:48:34 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
INFO 05-23 12:48:34 [launcher.py:36] Route: /docs, Methods: HEAD, GET
INFO 05-23 12:48:34 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
INFO 05-23 12:48:34 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
INFO 05-23 12:48:34 [launcher.py:36] Route: /health, Methods: GET
INFO 05-23 12:48:34 [launcher.py:36] Route: /load, Methods: GET
INFO 05-23 12:48:34 [launcher.py:36] Route: /ping, Methods: POST, GET
INFO 05-23 12:48:34 [launcher.py:36] Route: /tokenize, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /detokenize, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /v1/models, Methods: GET
INFO 05-23 12:48:34 [launcher.py:36] Route: /version, Methods: GET
INFO 05-23 12:48:34 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /v1/completions, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /v1/embeddings, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /pooling, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /score, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /v1/score, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /rerank, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /v1/rerank, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /v2/rerank, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /invocations, Methods: POST
INFO 05-23 12:48:34 [launcher.py:36] Route: /metrics, Methods: GET
INFO:     Started server process [1332]
INFO:     Waiting for application startup.
INFO:     Application startup complete.

In transformers==4.52.3

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_ID = "kakaocorp/kanana-1.5-8b-instruct-2505"

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

following result, do you know how to load successfully using transformers?

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[14], line 6
      2 from transformers import AutoModelForCausalLM, AutoTokenizer
      4 MODEL_ID = "kakaocorp/kanana-1.5-8b-instruct-2505"
----> 6 model = AutoModelForCausalLM.from_pretrained(
      7     MODEL_ID,
      8     torch_dtype=torch.bfloat16,
      9     trust_remote_code=True,
     10 ).to("cuda")
     11 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
     15 from llmcompressor import oneshot

File /usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py:571, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
    569     if model_class.config_class == config.sub_configs.get("text_config", None):
    570         config = config.get_text_config()
--> 571     return model_class.from_pretrained(
    572         pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
    573     )
    574 raise ValueError(
    575     f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
    576     f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
    577 )

File /usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py:309, in restore_default_torch_dtype.<locals>._wrapper(*args, **kwargs)
    307 old_dtype = torch.get_default_dtype()
    308 try:
--> 309     return func(*args, **kwargs)
    310 finally:
    311     torch.set_default_dtype(old_dtype)

File /usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py:4507, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
   4498     config = cls._autoset_attn_implementation(
   4499         config,
   4500         use_flash_attention_2=use_flash_attention_2,
   4501         torch_dtype=torch_dtype,
   4502         device_map=device_map,
   4503     )
   4505 with ContextManagers(model_init_context):
   4506     # Let's make sure we don't run the init function of buffer modules
-> 4507     model = cls(config, *model_args, **model_kwargs)
   4509 # Make sure to tie the weights correctly
   4510 model.tie_weights()

File /usr/local/lib/python3.11/dist-packages/transformers/models/llama/modeling_llama.py:618, in LlamaForCausalLM.__init__(self, config)
    616 def __init__(self, config):
    617     super().__init__(config)
--> 618     self.model = LlamaModel(config)
    619     self.vocab_size = config.vocab_size
    620     self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

File /usr/local/lib/python3.11/dist-packages/transformers/models/llama/modeling_llama.py:379, in LlamaModel.__init__(self, config)
    376 self.gradient_checkpointing = False
    378 # Initialize weights and apply final processing
--> 379 self.post_init()

File /usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py:1968, in PreTrainedModel.post_init(self)
   1966 if self._tp_plan is not None and is_torch_greater_or_equal("2.3"):
   1967     for _, v in self._tp_plan.items():
-> 1968         if v not in ALL_PARALLEL_STYLES:
   1969             raise ValueError(
   1970                 f"Unsupported tensor parallel style {v}. Supported styles are {ALL_PARALLEL_STYLES}"
   1971             )

TypeError: argument of type 'NoneType' is not iterable
sh2orc changed discussion status to closed

I don't know why, but after installing transformers and vllm, it works. :-)

pip install transformers -Uq
pip install vllm -Uq

Sign up or log in to comment