VLLM support
#8
by
potanin-marat
- opened
Why not adapt the model to vllm, where gguf is not well supported, but AWQ works
when i tried vllm like below i run into this issue:
vllm serve ./gemma-3-27b-it-q4_0.gguf --tokenizer google/gemma-3-27b-it
File "/usr/local/bin/vllm", line 8, in <module>
sys.exit(main())
File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/cli/main.py", line 51, in main
args.dispatch_function(args)
File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/cli/serve.py", line 27, in cmd
uvloop.run(run_server(args))
File "/usr/local/lib/python3.10/dist-packages/uvloop/__init__.py", line 82, in run
return loop.run_until_complete(wrapper())
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/usr/local/lib/python3.10/dist-packages/uvloop/__init__.py", line 61, in wrapper
return await main
File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 1069, in run_server
async with build_async_engine_client(args) as engine_client:
File "/usr/lib/python3.10/contextlib.py", line 199, in __aenter__
return await anext(self.gen)
File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 146, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
File "/usr/lib/python3.10/contextlib.py", line 199, in __aenter__
return await anext(self.gen)
File "/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/openai/api_server.py", line 166, in build_async_engine_client_from_engine_args
vllm_config = engine_args.create_engine_config(usage_context=usage_context)
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py", line 1154, in create_engine_config
model_config = self.create_model_config()
File "/usr/local/lib/python3.10/dist-packages/vllm/engine/arg_utils.py", line 1042, in create_model_config
return ModelConfig(
File "/usr/local/lib/python3.10/dist-packages/vllm/config.py", line 423, in __init__
hf_config = get_config(self.hf_config_path or self.model,
File "/usr/local/lib/python3.10/dist-packages/vllm/transformers_utils/config.py", line 286, in get_config
config_dict, _ = PretrainedConfig.get_config_dict(
File "/usr/local/lib/python3.10/dist-packages/transformers/configuration_utils.py", line 590, in get_config_dict
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/configuration_utils.py", line 681, in _get_config_dict
config_dict = load_gguf_checkpoint(resolved_config_file, return_tensors=False)["config"]
File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_gguf_pytorch_utils.py", line 369, in load_gguf_checkpoint
model_name = read_field(reader, "general.name")
File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_gguf_pytorch_utils.py", line 260, in read_field
value = reader.fields[field]
KeyError: 'general.name'