params.json missing?
#2
by
docgerbil
- opened
I'm trying to use this model for tooling but vllm throws a fit:
vllm-server-mistral-31 | 2025-05-13T18:34:19.348751933Z INFO 05-13 11:34:19 [api_server.py:1044] args: Namespace(host='0.0.0.0', port=8901, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=True, tool_call_parser='mistral', tool_parser_plugin='', model='OPEA/Mistral-Small-3.1-24B-Instruct-2503-int4-AutoRound-awq-sym', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='mistral', trust_remote_code=False, allowed_local_media_path=None, load_format='mistral', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format='mistral', dtype='auto', max_model_len=8192, guided_decoding_backend='xgrammar', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=2, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=16.0, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=None, qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=1, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=KVTransferConfig(kv_connector='LMCacheConnector', kv_buffer_device='cuda', kv_buffer_size=1000000000.0, kv_role='kv_both', kv_rank=None, kv_parallel_size=1, kv_ip='127.0.0.1', kv_port=14579, kv_connector_extra_config={}), worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config={'temperature': 0.15}, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=False, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
vllm-server-mistral-31 | 2025-05-13T18:34:19.427952091Z Traceback (most recent call last):
vllm-server-mistral-31 | 2025-05-13T18:34:19.427984708Z File "<frozen runpy>", line 198, in _run_module_as_main
vllm-server-mistral-31 | 2025-05-13T18:34:19.427993161Z File "<frozen runpy>", line 88, in _run_code
vllm-server-mistral-31 | 2025-05-13T18:34:19.427998414Z File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 1130, in <module>
vllm-server-mistral-31 | 2025-05-13T18:34:19.428236806Z uvloop.run(run_server(args))
vllm-server-mistral-31 | 2025-05-13T18:34:19.428301588Z File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 109, in run
vllm-server-mistral-31 | 2025-05-13T18:34:19.428383563Z return __asyncio.run(
vllm-server-mistral-31 | 2025-05-13T18:34:19.428400056Z ^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.428404091Z File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run
vllm-server-mistral-31 | 2025-05-13T18:34:19.428525418Z return runner.run(main)
vllm-server-mistral-31 | 2025-05-13T18:34:19.428555912Z ^^^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.428564216Z File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
vllm-server-mistral-31 | 2025-05-13T18:34:19.428651938Z return self._loop.run_until_complete(task)
vllm-server-mistral-31 | 2025-05-13T18:34:19.428673567Z ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.428681990Z File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
vllm-server-mistral-31 | 2025-05-13T18:34:19.428850653Z File "/usr/local/lib/python3.12/dist-packages/uvloop/__init__.py", line 61, in wrapper
vllm-server-mistral-31 | 2025-05-13T18:34:19.428912460Z return await main
vllm-server-mistral-31 | 2025-05-13T18:34:19.428935147Z ^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.428946993Z File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 1078, in run_server
vllm-server-mistral-31 | 2025-05-13T18:34:19.429147008Z async with build_async_engine_client(args) as engine_client:
vllm-server-mistral-31 | 2025-05-13T18:34:19.429180720Z ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.429188442Z File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__
vllm-server-mistral-31 | 2025-05-13T18:34:19.429271101Z return await anext(self.gen)
vllm-server-mistral-31 | 2025-05-13T18:34:19.429299002Z ^^^^^^^^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.429308019Z File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 146, in build_async_engine_client
vllm-server-mistral-31 | 2025-05-13T18:34:19.429378026Z async with build_async_engine_client_from_engine_args(
vllm-server-mistral-31 | 2025-05-13T18:34:19.429396912Z ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.429400493Z File "/usr/lib/python3.12/contextlib.py", line 210, in __aenter__
vllm-server-mistral-31 | 2025-05-13T18:34:19.429476041Z return await anext(self.gen)
vllm-server-mistral-31 | 2025-05-13T18:34:19.429508481Z ^^^^^^^^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.429519000Z File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 166, in build_async_engine_client_from_engine_args
vllm-server-mistral-31 | 2025-05-13T18:34:19.429588733Z vllm_config = engine_args.create_engine_config(usage_context=usage_context)
vllm-server-mistral-31 | 2025-05-13T18:34:19.429630437Z ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.429640062Z File "/usr/local/lib/python3.12/dist-packages/vllm/engine/arg_utils.py", line 1099, in create_engine_config
vllm-server-mistral-31 | 2025-05-13T18:34:19.429885961Z model_config = self.create_model_config()
vllm-server-mistral-31 | 2025-05-13T18:34:19.429923383Z ^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.429932504Z File "/usr/local/lib/python3.12/dist-packages/vllm/engine/arg_utils.py", line 987, in create_model_config
vllm-server-mistral-31 | 2025-05-13T18:34:19.430129521Z return ModelConfig(
vllm-server-mistral-31 | 2025-05-13T18:34:19.430139732Z ^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.430157084Z File "/usr/local/lib/python3.12/dist-packages/vllm/config.py", line 451, in __init__
vllm-server-mistral-31 | 2025-05-13T18:34:19.430269986Z hf_config = get_config(self.hf_config_path or self.model,
vllm-server-mistral-31 | 2025-05-13T18:34:19.430292845Z ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.430298657Z File "/usr/local/lib/python3.12/dist-packages/vllm/transformers_utils/config.py", line 347, in get_config
vllm-server-mistral-31 | 2025-05-13T18:34:19.430406913Z config = load_params_config(model, revision, token=HF_TOKEN, **kwargs)
vllm-server-mistral-31 | 2025-05-13T18:34:19.430450512Z ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
vllm-server-mistral-31 | 2025-05-13T18:34:19.430460838Z File "/usr/local/lib/python3.12/dist-packages/vllm/transformers_utils/config.py", line 654, in load_params_config
vllm-server-mistral-31 | 2025-05-13T18:34:19.430591408Z raise ValueError(
vllm-server-mistral-31 | 2025-05-13T18:34:19.430604416Z ValueError: Failed to load mistral 'params.json' config for model OPEA/Mistral-Small-3.1-24B-Instruct-2503-int4-AutoRound-awq-sym. Please check if the model is a mistral-format model and if the config file exists.
Fixed. This file is not needed for Transformers inference, so the save API does not include it. If any files are missing, you can copy them from the original model.