|
{ |
|
"card": 2048, |
|
"n_q": 32, |
|
"dep_q": 32, |
|
"delays": [ |
|
0, |
|
0, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2 |
|
], |
|
"dim": 2048, |
|
"text_card": 8000, |
|
"existing_text_padding_id": 3, |
|
"num_heads": 16, |
|
"num_layers": 16, |
|
"hidden_scale": 4.125, |
|
"causal": true, |
|
"layer_scale": null, |
|
"context": 500, |
|
"max_period": 10000, |
|
"gating": "silu", |
|
"norm": "rms_norm_f32", |
|
"positional_embedding": "rope", |
|
"depformer_dim": 1024, |
|
"depformer_num_heads": 16, |
|
"depformer_num_layers": 4, |
|
"depformer_dim_feedforward": 3072, |
|
"depformer_multi_linear": true, |
|
"depformer_pos_emb": "none", |
|
"depformer_weights_per_step": true, |
|
"depformer_low_rank_embeddings": 128, |
|
"demux_second_stream": true, |
|
"text_card_out": null, |
|
"conditioners": { |
|
"speaker_wavs": { |
|
"type": "tensor", |
|
"tensor": { |
|
"dim": 512 |
|
} |
|
}, |
|
"cfg": { |
|
"type": "lut", |
|
"lut": { |
|
"n_bins": 7, |
|
"dim": 16, |
|
"tokenizer": "noop", |
|
"possible_values": [ |
|
"1.0", |
|
"1.5", |
|
"2.0", |
|
"2.5", |
|
"3.0", |
|
"3.5", |
|
"4.0" |
|
] |
|
} |
|
}, |
|
"control": { |
|
"type": "lut", |
|
"lut": { |
|
"dim": 2048, |
|
"n_bins": 1, |
|
"tokenizer": "noop", |
|
"possible_values": [ |
|
"ok" |
|
] |
|
} |
|
} |
|
}, |
|
"fuser": { |
|
"cross_attention_pos_emb": true, |
|
"cross_attention_pos_emb_scale": 1, |
|
"sum": [ |
|
"control", |
|
"cfg" |
|
], |
|
"prepend": [], |
|
"cross": [ |
|
"speaker_wavs" |
|
] |
|
}, |
|
"cross_attention": true, |
|
"tts_config": { |
|
"audio_delay": 1.28, |
|
"second_stream_ahead": 2 |
|
}, |
|
"model_id": { |
|
"sig": "1e68beda", |
|
"epoch": 240 |
|
}, |
|
"depformer_weights_per_step_schedule": [ |
|
0, |
|
1, |
|
2, |
|
3, |
|
4, |
|
5, |
|
6, |
|
7, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
8, |
|
9, |
|
9, |
|
9, |
|
9, |
|
9, |
|
9, |
|
9, |
|
9, |
|
10, |
|
10, |
|
10, |
|
10, |
|
10, |
|
10, |
|
10, |
|
10 |
|
], |
|
"model_type": "tts", |
|
"lm_gen_config": { |
|
"temp": 0.6, |
|
"text_temp": 0.6 |
|
}, |
|
"tokenizer_name": "tokenizer_spm_8k_en_fr_audio.model", |
|
"mimi_name": "tokenizer-e351c8d8-checkpoint125.safetensors", |
|
"moshi_name": "[email protected]" |
|
} |