which inference server is compatible with this model?

#4
by alexbarbosa - opened

While trying to serve this model using vllm get below error messages:

ERROR 05-14 08:27:13 [core.py:396] EngineCore failed to start.
ERROR 05-14 08:27:13 [core.py:396] Traceback (most recent call last):
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/engine/core.py", line 387, in run_engine_core
ERROR 05-14 08:27:13 [core.py:396] engine_core = EngineCoreProc(*args, **kwargs)
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/engine/core.py", line 329, in init
ERROR 05-14 08:27:13 [core.py:396] super().init(vllm_config, executor_class, log_stats,
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/engine/core.py", line 64, in init
ERROR 05-14 08:27:13 [core.py:396] self.model_executor = executor_class(vllm_config)
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/executor/executor_base.py", line 52, in init
ERROR 05-14 08:27:13 [core.py:396] self._init_executor()
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/executor/uniproc_executor.py", line 46, in _init_executor
ERROR 05-14 08:27:13 [core.py:396] self.collective_rpc("init_device")
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
ERROR 05-14 08:27:13 [core.py:396] answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/utils.py", line 2456, in run_method
ERROR 05-14 08:27:13 [core.py:396] return func(*args, **kwargs)
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/worker/worker_base.py", line 604, in init_device
ERROR 05-14 08:27:13 [core.py:396] self.worker.init_device() # type: ignore
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 142, in init_device
ERROR 05-14 08:27:13 [core.py:396] self.model_runner: GPUModelRunner = GPUModelRunner(
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 149, in init
ERROR 05-14 08:27:13 [core.py:396] encoder_compute_budget, encoder_cache_size = compute_encoder_budget(
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/core/encoder_cache_manager.py", line 94, in compute_encoder_budget
ERROR 05-14 08:27:13 [core.py:396] ) = _compute_encoder_budget_multimodal(
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/core/encoder_cache_manager.py", line 124, in _compute_encoder_budget_multimodal
ERROR 05-14 08:27:13 [core.py:396] .get_max_tokens_per_item_by_nonzero_modality(model_config)
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/registry.py", line 141, in get_max_tokens_per_item_by_nonzero_modality
ERROR 05-14 08:27:13 [core.py:396] self.get_max_tokens_per_item_by_modality(model_config).items()
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/registry.py", line 115, in get_max_tokens_per_item_by_modality
ERROR 05-14 08:27:13 [core.py:396] return profiler.get_mm_max_tokens(
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/profiling.py", line 272, in get_mm_max_tokens
ERROR 05-14 08:27:13 [core.py:396] mm_inputs = self._get_dummy_mm_inputs(seq_len, mm_counts)
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/profiling.py", line 179, in _get_dummy_mm_inputs
ERROR 05-14 08:27:13 [core.py:396] return self.processor.apply(
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/processing.py", line 1665, in apply
ERROR 05-14 08:27:13 [core.py:396] ) = self._cached_apply_hf_processor(
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/processing.py", line 1358, in _cached_apply_hf_processor
ERROR 05-14 08:27:13 [core.py:396] return self._apply_hf_processor_main(
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/processing.py", line 1326, in _apply_hf_processor_main
ERROR 05-14 08:27:13 [core.py:396] return self._apply_hf_processor_text_mm(
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/processing.py", line 1230, in _apply_hf_processor_text_mm
ERROR 05-14 08:27:13 [core.py:396] processed_data = self._call_hf_processor(
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/model_executor/models/internvl.py", line 542, in _call_hf_processor
ERROR 05-14 08:27:13 [core.py:396] image_token_id = hf_processor.image_token_id
ERROR 05-14 08:27:13 [core.py:396] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/model_executor/models/internvl.py", line 430, in image_token_id
ERROR 05-14 08:27:13 [core.py:396] return self.tokenizer.get_vocab()[IMG_CONTEXT]
ERROR 05-14 08:27:13 [core.py:396] ~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
ERROR 05-14 08:27:13 [core.py:396] KeyError: ''
Process EngineCore_0:
Traceback (most recent call last):
File "/usr/lib64/python3.12/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/usr/lib64/python3.12/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/engine/core.py", line 400, in run_engine_core
raise e
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/engine/core.py", line 387, in run_engine_core
engine_core = EngineCoreProc(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/engine/core.py", line 329, in init
super().init(vllm_config, executor_class, log_stats,
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/engine/core.py", line 64, in init
self.model_executor = executor_class(vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/executor/executor_base.py", line 52, in init
self._init_executor()
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/executor/uniproc_executor.py", line 46, in _init_executor
self.collective_rpc("init_device")
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/executor/uniproc_executor.py", line 56, in collective_rpc
answer = run_method(self.driver_worker, method, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/utils.py", line 2456, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/worker/worker_base.py", line 604, in init_device
self.worker.init_device() # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 142, in init_device
self.model_runner: GPUModelRunner = GPUModelRunner(
^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/worker/gpu_model_runner.py", line 149, in init
encoder_compute_budget, encoder_cache_size = compute_encoder_budget(
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/core/encoder_cache_manager.py", line 94, in compute_encoder_budget
) = _compute_encoder_budget_multimodal(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/v1/core/encoder_cache_manager.py", line 124, in _compute_encoder_budget_multimodal
.get_max_tokens_per_item_by_nonzero_modality(model_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/registry.py", line 141, in get_max_tokens_per_item_by_nonzero_modality
self.get_max_tokens_per_item_by_modality(model_config).items()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/registry.py", line 115, in get_max_tokens_per_item_by_modality
return profiler.get_mm_max_tokens(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/profiling.py", line 272, in get_mm_max_tokens
mm_inputs = self._get_dummy_mm_inputs(seq_len, mm_counts)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/profiling.py", line 179, in _get_dummy_mm_inputs
return self.processor.apply(
^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/processing.py", line 1665, in apply
) = self._cached_apply_hf_processor(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/processing.py", line 1358, in _cached_apply_hf_processor
return self._apply_hf_processor_main(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/processing.py", line 1326, in _apply_hf_processor_main
return self._apply_hf_processor_text_mm(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/multimodal/processing.py", line 1230, in _apply_hf_processor_text_mm
processed_data = self._call_hf_processor(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/model_executor/models/internvl.py", line 542, in _call_hf_processor
image_token_id = hf_processor.image_token_id
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/python-envs/py12/venv/lib64/python3.12/site-packages/vllm/model_executor/models/internvl.py", line 430, in image_token_id
return self.tokenizer.get_vocab()[IMG_CONTEXT]
~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
KeyError: ''
[rank0]:[W514 08:27:14.835993471 ProcessGroupNCCL.cpp:1496] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())

Sign up or log in to comment