error occurres when i trying to run this model by vllm

#5
by Skyeaee - opened

when i run this model by vllm in vllm serve Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit/,
the log is :

ERROR 04-09 19:22:27 [core.py:386] EngineCore hit an exception: Traceback (most recent call last):
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/v1/engine/core.py", line 377, in run_engine_core
ERROR 04-09 19:22:27 [core.py:386]     engine_core = EngineCoreProc(*args, **kwargs)
ERROR 04-09 19:22:27 [core.py:386]                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/v1/engine/core.py", line 319, in __init__
ERROR 04-09 19:22:27 [core.py:386]     super().__init__(vllm_config, executor_class, log_stats)
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/v1/engine/core.py", line 67, in __init__
ERROR 04-09 19:22:27 [core.py:386]     self.model_executor = executor_class(vllm_config)
ERROR 04-09 19:22:27 [core.py:386]                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/executor/executor_base.py", line 52, in __init__
ERROR 04-09 19:22:27 [core.py:386]     self._init_executor()
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/executor/uniproc_executor.py", line 47, in _init_executor
ERROR 04-09 19:22:27 [core.py:386]     self.collective_rpc("load_model")
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
ERROR 04-09 19:22:27 [core.py:386]     answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 04-09 19:22:27 [core.py:386]              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/utils.py", line 2363, in run_method
ERROR 04-09 19:22:27 [core.py:386]     return func(*args, **kwargs)
ERROR 04-09 19:22:27 [core.py:386]            ^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/v1/worker/gpu_worker.py", line 136, in load_model
ERROR 04-09 19:22:27 [core.py:386]     self.model_runner.load_model()
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/v1/worker/gpu_model_runner.py", line 1280, in load_model
ERROR 04-09 19:22:27 [core.py:386]     self.model = get_model(vllm_config=self.vllm_config)
ERROR 04-09 19:22:27 [core.py:386]                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/model_loader/__init__.py", line 14, in get_model
ERROR 04-09 19:22:27 [core.py:386]     return loader.load_model(vllm_config=vllm_config)
ERROR 04-09 19:22:27 [core.py:386]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/model_loader/loader.py", line 1289, in load_model
ERROR 04-09 19:22:27 [core.py:386]     model = _initialize_model(vllm_config=vllm_config)
ERROR 04-09 19:22:27 [core.py:386]             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/model_loader/loader.py", line 133, in _initialize_model
ERROR 04-09 19:22:27 [core.py:386]     return model_class(vllm_config=vllm_config, prefix=prefix)
ERROR 04-09 19:22:27 [core.py:386]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/mllama4.py", line 691, in __init__
ERROR 04-09 19:22:27 [core.py:386]     self.language_model = _initialize_model(
ERROR 04-09 19:22:27 [core.py:386]                           ^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/model_loader/loader.py", line 133, in _initialize_model
ERROR 04-09 19:22:27 [core.py:386]     return model_class(vllm_config=vllm_config, prefix=prefix)
ERROR 04-09 19:22:27 [core.py:386]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama4.py", line 481, in __init__
ERROR 04-09 19:22:27 [core.py:386]     super().__init__(vllm_config=vllm_config,
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama.py", line 486, in __init__
ERROR 04-09 19:22:27 [core.py:386]     self.model = self._init_model(vllm_config=vllm_config,
ERROR 04-09 19:22:27 [core.py:386]                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama4.py", line 489, in _init_model
ERROR 04-09 19:22:27 [core.py:386]     return Llama4Model(vllm_config=vllm_config,
ERROR 04-09 19:22:27 [core.py:386]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/compilation/decorators.py", line 151, in __init__
ERROR 04-09 19:22:27 [core.py:386]     old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs)
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama4.py", line 335, in __init__
ERROR 04-09 19:22:27 [core.py:386]     super().__init__(vllm_config=vllm_config,
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/compilation/decorators.py", line 151, in __init__
ERROR 04-09 19:22:27 [core.py:386]     old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs)
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama.py", line 321, in __init__
ERROR 04-09 19:22:27 [core.py:386]     self.start_layer, self.end_layer, self.layers = make_layers(
ERROR 04-09 19:22:27 [core.py:386]                                                     ^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/utils.py", line 610, in make_layers
ERROR 04-09 19:22:27 [core.py:386]     maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}"))
ERROR 04-09 19:22:27 [core.py:386]                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama.py", line 323, in <lambda>
ERROR 04-09 19:22:27 [core.py:386]     lambda prefix: layer_type(config=config,
ERROR 04-09 19:22:27 [core.py:386]                    ^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama4.py", line 284, in __init__
ERROR 04-09 19:22:27 [core.py:386]     self.feed_forward = Llama4MoE(
ERROR 04-09 19:22:27 [core.py:386]                         ^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama4.py", line 73, in __init__
ERROR 04-09 19:22:27 [core.py:386]     self.experts = FusedMoE(
ERROR 04-09 19:22:27 [core.py:386]                    ^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386]   File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/layers/fused_moe/layer.py", line 502, in __init__
ERROR 04-09 19:22:27 [core.py:386]     assert self.quant_method is not None
ERROR 04-09 19:22:27 [core.py:386]            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] AssertionError
ERROR 04-09 19:22:27 [core.py:386]
CRITICAL 04-09 19:22:27 [core_client.py:359] Got fatal signal from worker processes, shutting down. See stack trace above for root cause issue.
Killed

Did you find the solution for this one?
I also have the similar issue.

Sign up or log in to comment