Doesnt work with sglang
[2025-08-01 12:12:02 TP2] Scheduler hit an exception: Traceback (most recent call last):
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/managers/scheduler.py", line 2403, in run_scheduler_process
scheduler = Scheduler(
^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/managers/scheduler.py", line 312, in init
self.tp_worker = TpWorkerClass(
^^^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/managers/tp_worker_overlap_thread.py", line 67, in init
self.worker = TpModelWorker(
^^^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/managers/tp_worker.py", line 84, in init
self.model_runner = ModelRunner(
^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/model_executor/model_runner.py", line 237, in init
self.initialize(min_per_gpu_memory)
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/model_executor/model_runner.py", line 280, in initialize
self.load_model()
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/model_executor/model_runner.py", line 638, in load_model
self.model = get_model(
^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/model_loader/init.py", line 22, in get_model
return loader.load_model(
^^^^^^^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/model_loader/loader.py", line 432, in load_model
model = _initialize_model(
^^^^^^^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/model_loader/loader.py", line 174, in _initialize_model
return model_class(
^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/models/qwen3_moe.py", line 640, in init
self.model = Qwen3MoeModel(
^^^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/models/qwen3_moe.py", line 618, in init
super().init(
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/models/qwen2_moe.py", line 426, in init
self.layers, self.start_layer, self.end_layer = make_layers(
^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/utils.py", line 529, in make_layers
maybe_offload_to_cpu(layer_fn(idx=idx, prefix=add_prefix(idx, prefix)))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/models/qwen2_moe.py", line 428, in
lambda idx, prefix: decoder_layer_type(
^^^^^^^^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/models/qwen3_moe.py", line 496, in init
self.mlp = Qwen3MoeSparseMoeBlock(
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/models/qwen3_moe.py", line 106, in init
self.experts = get_moe_impl_class()(
^^^^^^^^^^^^^^^^^^^^^
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/layers/moe/fused_moe_triton/layer.py", line 164, in init
self.quant_method.create_weights(
File "/home/rmehta/sglangenv/lib/python3.12/site-packages/sglang/srt/layers/quantization/fp8.py", line 544, in create_weights
raise ValueError(
ValueError: The output_size of gate's and up's weight = 192 is not divisible by weight quantization block_n = 128.
[2025-08-01 12:12:02] Received sigquit from a child process. It usually means the child failed.
Killed