Training in progress, step 200
Browse files- axolotl_config.yaml +7 -7
- model.safetensors +1 -1
- training_args.bin +1 -1
    	
        axolotl_config.yaml
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 | 
             
            base_model: unsloth/SmolLM-360M
         | 
| 2 | 
            -
            batch_size:  | 
| 3 | 
             
            bf16: true
         | 
| 4 | 
             
            chat_template: tokenizer_default_fallback_alpaca
         | 
| 5 | 
             
            datasets:
         | 
| @@ -17,23 +17,23 @@ device_map: auto | |
| 17 | 
             
            eval_sample_packing: false
         | 
| 18 | 
             
            eval_steps: 200
         | 
| 19 | 
             
            flash_attention: true
         | 
| 20 | 
            -
             | 
| 21 | 
             
            group_by_length: true
         | 
| 22 | 
             
            hub_model_id: SystemAdmin123/SmolLM-360M
         | 
| 23 | 
             
            hub_strategy: checkpoint
         | 
| 24 | 
             
            learning_rate: 0.0002
         | 
| 25 | 
             
            logging_steps: 10
         | 
| 26 | 
             
            lr_scheduler: cosine
         | 
| 27 | 
            -
            max_steps:  | 
| 28 | 
            -
            micro_batch_size:  | 
| 29 | 
             
            model_type: AutoModelForCausalLM
         | 
| 30 | 
             
            num_epochs: 100
         | 
| 31 | 
             
            optimizer: adamw_bnb_8bit
         | 
| 32 | 
            -
            output_dir: /root/.sn56/axolotl/ | 
| 33 | 
             
            pad_to_sequence_len: true
         | 
| 34 | 
             
            resize_token_embeddings_to_32x: false
         | 
| 35 | 
            -
            sample_packing:  | 
| 36 | 
            -
            save_steps:  | 
| 37 | 
             
            save_total_limit: 1
         | 
| 38 | 
             
            sequence_len: 2048
         | 
| 39 | 
             
            tokenizer_type: GPT2TokenizerFast
         | 
|  | |
| 1 | 
             
            base_model: unsloth/SmolLM-360M
         | 
| 2 | 
            +
            batch_size: 92
         | 
| 3 | 
             
            bf16: true
         | 
| 4 | 
             
            chat_template: tokenizer_default_fallback_alpaca
         | 
| 5 | 
             
            datasets:
         | 
|  | |
| 17 | 
             
            eval_sample_packing: false
         | 
| 18 | 
             
            eval_steps: 200
         | 
| 19 | 
             
            flash_attention: true
         | 
| 20 | 
            +
            gradient_checkpointing: true
         | 
| 21 | 
             
            group_by_length: true
         | 
| 22 | 
             
            hub_model_id: SystemAdmin123/SmolLM-360M
         | 
| 23 | 
             
            hub_strategy: checkpoint
         | 
| 24 | 
             
            learning_rate: 0.0002
         | 
| 25 | 
             
            logging_steps: 10
         | 
| 26 | 
             
            lr_scheduler: cosine
         | 
| 27 | 
            +
            max_steps: 10000
         | 
| 28 | 
            +
            micro_batch_size: 23
         | 
| 29 | 
             
            model_type: AutoModelForCausalLM
         | 
| 30 | 
             
            num_epochs: 100
         | 
| 31 | 
             
            optimizer: adamw_bnb_8bit
         | 
| 32 | 
            +
            output_dir: /root/.sn56/axolotl/tmp/SmolLM-360M
         | 
| 33 | 
             
            pad_to_sequence_len: true
         | 
| 34 | 
             
            resize_token_embeddings_to_32x: false
         | 
| 35 | 
            +
            sample_packing: true
         | 
| 36 | 
            +
            save_steps: 200
         | 
| 37 | 
             
            save_total_limit: 1
         | 
| 38 | 
             
            sequence_len: 2048
         | 
| 39 | 
             
            tokenizer_type: GPT2TokenizerFast
         | 
    	
        model.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 723674912
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:6ca635966f2128b90695cdcf1b450ff9388c9812f95f690192973e5b7eefd3c9
         | 
| 3 | 
             
            size 723674912
         | 
    	
        training_args.bin
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 6840
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:3156bde561d7a483929e0f1d8c097a973dfeb26f4690b823508131f70e6df615
         | 
| 3 | 
             
            size 6840
         |