Upload training_config.yml with huggingface_hub
Browse files- training_config.yml +19 -17
 
    	
        training_config.yml
    CHANGED
    
    | 
         @@ -5,30 +5,31 @@ model: 
     | 
|
| 5 | 
         
             
              - v_proj
         
     | 
| 6 | 
         
             
              apply_lora_to_mlp: false
         
     | 
| 7 | 
         
             
              apply_lora_to_output: false
         
     | 
| 8 | 
         
            -
              lora_rank:  
     | 
| 9 | 
         
            -
              lora_alpha:  
     | 
| 10 | 
         
             
              perception_tokens: 2
         
     | 
| 11 | 
         
             
              use_clip: false
         
     | 
| 12 | 
         
             
            tokenizer:
         
     | 
| 13 | 
         
             
              _component_: models.a2a_tokenizer
         
     | 
| 14 | 
         
            -
              path:  
     | 
| 15 | 
         
             
            checkpointer:
         
     | 
| 16 | 
         
             
              _component_: torchtune.utils.FullModelMetaCheckpointer
         
     | 
| 17 | 
         
            -
              checkpoint_dir:  
     | 
| 18 | 
         
             
              checkpoint_files:
         
     | 
| 19 | 
         
            -
              -  
     | 
| 20 | 
         
             
              adapter_checkpoint: null
         
     | 
| 21 | 
         
             
              recipe_checkpoint: null
         
     | 
| 22 | 
         
             
              output_dir: output_checkpoints/experiment_1
         
     | 
| 23 | 
         
             
              model_type: LLAMA3
         
     | 
| 24 | 
         
             
            resume_from_checkpoint: false
         
     | 
| 25 | 
         
            -
            interim_checkpoint_steps:  
     | 
| 26 | 
         
             
            interim_gen_steps: null
         
     | 
| 27 | 
         
            -
            max_new_tokens:  
     | 
| 28 | 
         
             
            temperature: 0.6
         
     | 
| 29 | 
         
            -
            top_k:  
     | 
| 30 | 
         
             
            dataset:
         
     | 
| 31 | 
         
             
              _component_: ds.EvenBatcher
         
     | 
| 
         | 
|
| 32 | 
         
             
              dataset:
         
     | 
| 33 | 
         
             
                _component_: ds.RoundRobinDataset
         
     | 
| 34 | 
         
             
                datasets:
         
     | 
| 
         @@ -45,19 +46,19 @@ dataset: 
     | 
|
| 45 | 
         
             
                  train_on_input: false
         
     | 
| 46 | 
         
             
            seed: null
         
     | 
| 47 | 
         
             
            shuffle: true
         
     | 
| 48 | 
         
            -
            batch_size:  
     | 
| 49 | 
         
             
            optimizer:
         
     | 
| 50 | 
         
             
              _component_: torch.optim.AdamW
         
     | 
| 51 | 
         
            -
              weight_decay: 0. 
     | 
| 52 | 
         
            -
              lr:  
     | 
| 53 | 
         
             
            lr_scheduler:
         
     | 
| 54 | 
         
             
              _component_: torchtune.modules.get_cosine_schedule_with_warmup
         
     | 
| 55 | 
         
            -
              num_warmup_steps:  
     | 
| 56 | 
         
             
            loss:
         
     | 
| 57 | 
         
             
              _component_: torch.nn.CrossEntropyLoss
         
     | 
| 58 | 
         
            -
            epochs:  
     | 
| 59 | 
         
             
            max_steps_per_epoch: null
         
     | 
| 60 | 
         
            -
            gradient_accumulation_steps:  
     | 
| 61 | 
         
             
            compile: false
         
     | 
| 62 | 
         
             
            output_dir: /tmp/lora_finetune_output
         
     | 
| 63 | 
         
             
            metric_logger:
         
     | 
| 
         @@ -76,7 +77,8 @@ inference: 
     | 
|
| 76 | 
         
             
                {video}
         
     | 
| 77 | 
         | 
| 78 | 
         
             
                Caption the previous video.'
         
     | 
| 79 | 
         
            -
              max_new_tokens:  
     | 
| 80 | 
         
            -
              temperature: 0. 
     | 
| 81 | 
         
            -
              top_k:  
     | 
| 82 | 
         
             
              quantizer: null
         
     | 
| 
         | 
| 
         | 
|
| 5 | 
         
             
              - v_proj
         
     | 
| 6 | 
         
             
              apply_lora_to_mlp: false
         
     | 
| 7 | 
         
             
              apply_lora_to_output: false
         
     | 
| 8 | 
         
            +
              lora_rank: 32
         
     | 
| 9 | 
         
            +
              lora_alpha: 64
         
     | 
| 10 | 
         
             
              perception_tokens: 2
         
     | 
| 11 | 
         
             
              use_clip: false
         
     | 
| 12 | 
         
             
            tokenizer:
         
     | 
| 13 | 
         
             
              _component_: models.a2a_tokenizer
         
     | 
| 14 | 
         
            +
              path: models/tokenizer.model
         
     | 
| 15 | 
         
             
            checkpointer:
         
     | 
| 16 | 
         
             
              _component_: torchtune.utils.FullModelMetaCheckpointer
         
     | 
| 17 | 
         
            +
              checkpoint_dir: crazyfrog2
         
     | 
| 18 | 
         
             
              checkpoint_files:
         
     | 
| 19 | 
         
            +
              - meta_model_0.pt
         
     | 
| 20 | 
         
             
              adapter_checkpoint: null
         
     | 
| 21 | 
         
             
              recipe_checkpoint: null
         
     | 
| 22 | 
         
             
              output_dir: output_checkpoints/experiment_1
         
     | 
| 23 | 
         
             
              model_type: LLAMA3
         
     | 
| 24 | 
         
             
            resume_from_checkpoint: false
         
     | 
| 25 | 
         
            +
            interim_checkpoint_steps: 15000
         
     | 
| 26 | 
         
             
            interim_gen_steps: null
         
     | 
| 27 | 
         
            +
            max_new_tokens: 77
         
     | 
| 28 | 
         
             
            temperature: 0.6
         
     | 
| 29 | 
         
            +
            top_k: 231
         
     | 
| 30 | 
         
             
            dataset:
         
     | 
| 31 | 
         
             
              _component_: ds.EvenBatcher
         
     | 
| 32 | 
         
            +
              buffer_size: 72
         
     | 
| 33 | 
         
             
              dataset:
         
     | 
| 34 | 
         
             
                _component_: ds.RoundRobinDataset
         
     | 
| 35 | 
         
             
                datasets:
         
     | 
| 
         | 
|
| 46 | 
         
             
                  train_on_input: false
         
     | 
| 47 | 
         
             
            seed: null
         
     | 
| 48 | 
         
             
            shuffle: true
         
     | 
| 49 | 
         
            +
            batch_size: 6
         
     | 
| 50 | 
         
             
            optimizer:
         
     | 
| 51 | 
         
             
              _component_: torch.optim.AdamW
         
     | 
| 52 | 
         
            +
              weight_decay: 0.99
         
     | 
| 53 | 
         
            +
              lr: 0.0002
         
     | 
| 54 | 
         
             
            lr_scheduler:
         
     | 
| 55 | 
         
             
              _component_: torchtune.modules.get_cosine_schedule_with_warmup
         
     | 
| 56 | 
         
            +
              num_warmup_steps: 4
         
     | 
| 57 | 
         
             
            loss:
         
     | 
| 58 | 
         
             
              _component_: torch.nn.CrossEntropyLoss
         
     | 
| 59 | 
         
            +
            epochs: 60
         
     | 
| 60 | 
         
             
            max_steps_per_epoch: null
         
     | 
| 61 | 
         
            +
            gradient_accumulation_steps: 260
         
     | 
| 62 | 
         
             
            compile: false
         
     | 
| 63 | 
         
             
            output_dir: /tmp/lora_finetune_output
         
     | 
| 64 | 
         
             
            metric_logger:
         
     | 
| 
         | 
|
| 77 | 
         
             
                {video}
         
     | 
| 78 | 
         | 
| 79 | 
         
             
                Caption the previous video.'
         
     | 
| 80 | 
         
            +
              max_new_tokens: 231
         
     | 
| 81 | 
         
            +
              temperature: 0.8
         
     | 
| 82 | 
         
            +
              top_k: 231
         
     | 
| 83 | 
         
             
              quantizer: null
         
     | 
| 84 | 
         
            +
            gradient-accumulation-steps: 32
         
     |