|
{ |
|
"audio_llama_proj_model": "", |
|
"beats_cfg": { |
|
"activation_dropout": 0.0, |
|
"activation_fn": "gelu", |
|
"attention_dropout": 0.0, |
|
"conv_bias": false, |
|
"conv_pos": 128, |
|
"conv_pos_groups": 16, |
|
"deep_norm": true, |
|
"dropout": 0.0, |
|
"dropout_input": 0.0, |
|
"embed_dim": 512, |
|
"encoder_attention_heads": 12, |
|
"encoder_embed_dim": 768, |
|
"encoder_ffn_embed_dim": 3072, |
|
"encoder_layerdrop": 0.05, |
|
"encoder_layers": 12, |
|
"finetuned_model": true, |
|
"gru_rel_pos": true, |
|
"input_patch_size": 16, |
|
"layer_norm_first": false, |
|
"layer_wise_gradient_decay_ratio": 0.6, |
|
"max_distance": 800, |
|
"num_buckets": 320, |
|
"predictor_class": 527, |
|
"predictor_dropout": 0.0, |
|
"relative_position_embedding": true |
|
}, |
|
"downsample_factor": 8, |
|
"end_sym": "<|end_of_text|>", |
|
"freeze_audio_QFormer": false, |
|
"freeze_audio_llama_proj": false, |
|
"freeze_beats": true, |
|
"llama_path": "meta-llama/Meta-Llama-3.1-8B-Instruct", |
|
"lora": true, |
|
"lora_alpha": 32, |
|
"lora_dropout": 0.1, |
|
"lora_rank": 32, |
|
"max_pooling": false, |
|
"max_txt_len": 160, |
|
"num_audio_query_token": 1, |
|
"prompt_template": "<|start_header_id|>user<|end_header_id|>\n\n{}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", |
|
"second_per_window": 0.333333, |
|
"second_stride": 0.333333, |
|
"use_audio_Qformer": true, |
|
"window_level_Qformer": true |
|
} |