YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
not usable for inference, omly temp_model to test training scripts.
- was created with
from transformers.models.qwen3_omni_moe import Qwen3OmniMoeThinkerTextModel
from transformers.models.qwen3_omni_moe.configuration_qwen3_omni_moe import Qwen3OmniMoeTextConfig
# 1. init orig model
import os
os.environ["CUDA_VISIBLE_DEVICES"]='0'
import soundfile as sf
from transformers import Qwen3OmniMoeForConditionalGeneration, Qwen3OmniMoeProcessor
from qwen_omni_utils import process_mm_info
MODEL_PATH = "/scratch/vladimir_albrekht/qwen3omni-fine-tuning/models/Qwen3-Omni-30B-A3B-Instruct"
# MODEL_PATH = "Qwen/Qwen3-Omni-30B-A3B-Thinking"
model = Qwen3OmniMoeForConditionalGeneration.from_pretrained(
MODEL_PATH,
dtype="auto",
device_map="auto",
attn_implementation="flash_attention_2",
)
processor = Qwen3OmniMoeProcessor.from_pretrained(MODEL_PATH)
# 2. Get the original config and create a new, smaller version
# where model == qwen3omni orig 30B model `Qwen/Qwen3-Omni-30B-A3B-Instruct`
original_text_config = model.config.thinker_config.text_config
# new conf
small_text_config = Qwen3OmniMoeTextConfig(
hidden_size=original_text_config.hidden_size, # Keep this the same
vocab_size=original_text_config.vocab_size, # Keep this the same
num_hidden_layers=4, # Reduced from 48
num_attention_heads=16, # Reduced from 32
num_key_value_heads=2, # Reduced from 4
intermediate_size=1024, # Reduced from 768
num_experts=8, # Reduced from 128
num_experts_per_tok=2, # Reduced from 8
moe_intermediate_size=256, # Reduced from 768
# You can keep other parameters the same or modify them as needed
**{k: v for k, v in original_text_config.to_dict().items() if k not in
['hidden_size', 'vocab_size', 'num_hidden_layers', 'num_attention_heads',
'num_key_value_heads', 'intermediate_size', 'num_experts',
'num_experts_per_tok', 'moe_intermediate_size']}
)
print("--- Initializing new, smaller Thinker LLM part from scratch ---")
# 3. Initialize the smaller Thinker LLM part from scratch using the new config
# This creates a new model with randomly initialized weights.
small_thinker_llm = Qwen3OmniMoeThinkerTextModel(small_text_config)
# 4. replace
model.thinker.model = small_thinker_llm
model.config.thinker_config.text_config = small_thinker_llm.config
# 5. save
PATH = "./qwen3omni_5B_random_thinker_weights"
model.save_pretrained(PATH)
processor.save_pretrained(PATH)
- Downloads last month
- 16
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support