model: arch: video_llama model_type: pretrain_vicuna freeze_vit: True freeze_qformer: True max_txt_len: 512 end_sym: "###" low_resource: False frozen_llama_proj: False # If you want use LLaMA-2-chat, # some ckpts could be download from our provided huggingface repo # i.e. https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-13B-Finetuned llama_model: "ckpt/vicuna-13b/" or "ckpt/vicuna-7b/" or "ckpt/llama-2-7b-chat-hf" or "ckpt/llama-2-13b-chat-hf" ckpt: 'path/pretrained_visual_branch_ckpt' # you can use our pretrained ckpt from https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-13B-Pretrained/ equip_audio_branch: False fusion_head_layers: 2 max_frame_pos: 32 fusion_header_type: "seqTransf" datasets: webvid: vis_processor: train: name: "alpro_video_eval" n_frms: 8 image_size: 224 text_processor: train: name: "blip_caption" run: task: video_text_pretrain