error occurres when i trying to run this model by vllm
#5
by
Skyeaee
- opened
when i run this model by vllm in vllm serve Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit/
,
the log is :
ERROR 04-09 19:22:27 [core.py:386] EngineCore hit an exception: Traceback (most recent call last):
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/v1/engine/core.py", line 377, in run_engine_core
ERROR 04-09 19:22:27 [core.py:386] engine_core = EngineCoreProc(*args, **kwargs)
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/v1/engine/core.py", line 319, in __init__
ERROR 04-09 19:22:27 [core.py:386] super().__init__(vllm_config, executor_class, log_stats)
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/v1/engine/core.py", line 67, in __init__
ERROR 04-09 19:22:27 [core.py:386] self.model_executor = executor_class(vllm_config)
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/executor/executor_base.py", line 52, in __init__
ERROR 04-09 19:22:27 [core.py:386] self._init_executor()
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/executor/uniproc_executor.py", line 47, in _init_executor
ERROR 04-09 19:22:27 [core.py:386] self.collective_rpc("load_model")
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
ERROR 04-09 19:22:27 [core.py:386] answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/utils.py", line 2363, in run_method
ERROR 04-09 19:22:27 [core.py:386] return func(*args, **kwargs)
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/v1/worker/gpu_worker.py", line 136, in load_model
ERROR 04-09 19:22:27 [core.py:386] self.model_runner.load_model()
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/v1/worker/gpu_model_runner.py", line 1280, in load_model
ERROR 04-09 19:22:27 [core.py:386] self.model = get_model(vllm_config=self.vllm_config)
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/model_loader/__init__.py", line 14, in get_model
ERROR 04-09 19:22:27 [core.py:386] return loader.load_model(vllm_config=vllm_config)
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/model_loader/loader.py", line 1289, in load_model
ERROR 04-09 19:22:27 [core.py:386] model = _initialize_model(vllm_config=vllm_config)
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/model_loader/loader.py", line 133, in _initialize_model
ERROR 04-09 19:22:27 [core.py:386] return model_class(vllm_config=vllm_config, prefix=prefix)
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/mllama4.py", line 691, in __init__
ERROR 04-09 19:22:27 [core.py:386] self.language_model = _initialize_model(
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/model_loader/loader.py", line 133, in _initialize_model
ERROR 04-09 19:22:27 [core.py:386] return model_class(vllm_config=vllm_config, prefix=prefix)
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama4.py", line 481, in __init__
ERROR 04-09 19:22:27 [core.py:386] super().__init__(vllm_config=vllm_config,
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama.py", line 486, in __init__
ERROR 04-09 19:22:27 [core.py:386] self.model = self._init_model(vllm_config=vllm_config,
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama4.py", line 489, in _init_model
ERROR 04-09 19:22:27 [core.py:386] return Llama4Model(vllm_config=vllm_config,
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/compilation/decorators.py", line 151, in __init__
ERROR 04-09 19:22:27 [core.py:386] old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs)
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama4.py", line 335, in __init__
ERROR 04-09 19:22:27 [core.py:386] super().__init__(vllm_config=vllm_config,
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/compilation/decorators.py", line 151, in __init__
ERROR 04-09 19:22:27 [core.py:386] old_init(self, vllm_config=vllm_config, prefix=prefix, **kwargs)
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama.py", line 321, in __init__
ERROR 04-09 19:22:27 [core.py:386] self.start_layer, self.end_layer, self.layers = make_layers(
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/utils.py", line 610, in make_layers
ERROR 04-09 19:22:27 [core.py:386] maybe_offload_to_cpu(layer_fn(prefix=f"{prefix}.{idx}"))
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama.py", line 323, in <lambda>
ERROR 04-09 19:22:27 [core.py:386] lambda prefix: layer_type(config=config,
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama4.py", line 284, in __init__
ERROR 04-09 19:22:27 [core.py:386] self.feed_forward = Llama4MoE(
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/models/llama4.py", line 73, in __init__
ERROR 04-09 19:22:27 [core.py:386] self.experts = FusedMoE(
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] File "/data1/yinjian/python-scripts/vllm/vllm/model_executor/layers/fused_moe/layer.py", line 502, in __init__
ERROR 04-09 19:22:27 [core.py:386] assert self.quant_method is not None
ERROR 04-09 19:22:27 [core.py:386] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 04-09 19:22:27 [core.py:386] AssertionError
ERROR 04-09 19:22:27 [core.py:386]
CRITICAL 04-09 19:22:27 [core_client.py:359] Got fatal signal from worker processes, shutting down. See stack trace above for root cause issue.
Killed
Did you find the solution for this one?
I also have the similar issue.