Model save
Browse files- adapter_config.json +5 -2
- adapter_model.safetensors +2 -2
- trainer_state.json +22 -22
- training_args.bin +1 -1
    	
        adapter_config.json
    CHANGED
    
    | @@ -20,10 +20,13 @@ | |
| 20 | 
             
              "rank_pattern": {},
         | 
| 21 | 
             
              "revision": null,
         | 
| 22 | 
             
              "target_modules": [
         | 
| 23 | 
            -
                "v_proj",
         | 
| 24 | 
             
                "q_proj",
         | 
|  | |
| 25 | 
             
                "o_proj",
         | 
| 26 | 
            -
                " | 
|  | |
|  | |
|  | |
| 27 | 
             
              ],
         | 
| 28 | 
             
              "task_type": "CAUSAL_LM",
         | 
| 29 | 
             
              "use_dora": false,
         | 
|  | |
| 20 | 
             
              "rank_pattern": {},
         | 
| 21 | 
             
              "revision": null,
         | 
| 22 | 
             
              "target_modules": [
         | 
|  | |
| 23 | 
             
                "q_proj",
         | 
| 24 | 
            +
                "k_proj",
         | 
| 25 | 
             
                "o_proj",
         | 
| 26 | 
            +
                "up_proj",
         | 
| 27 | 
            +
                "gate_proj",
         | 
| 28 | 
            +
                "v_proj",
         | 
| 29 | 
            +
                "down_proj"
         | 
| 30 | 
             
              ],
         | 
| 31 | 
             
              "task_type": "CAUSAL_LM",
         | 
| 32 | 
             
              "use_dora": false,
         | 
    	
        adapter_model.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:b7e1351a67db68dd417cbfbeca9e67f9a39f8701d1f2c67035233497cc66b6e0
         | 
| 3 | 
            +
            size 1148400880
         | 
    	
        trainer_state.json
    CHANGED
    
    | @@ -10,68 +10,68 @@ | |
| 10 | 
             
              "log_history": [
         | 
| 11 | 
             
                {
         | 
| 12 | 
             
                  "epoch": 0.4854368932038835,
         | 
| 13 | 
            -
                  "grad_norm": 0. | 
| 14 | 
             
                  "learning_rate": 8.794946550048592e-05,
         | 
| 15 | 
            -
                  "loss":  | 
| 16 | 
             
                  "step": 250
         | 
| 17 | 
             
                },
         | 
| 18 | 
             
                {
         | 
| 19 | 
             
                  "epoch": 0.970873786407767,
         | 
| 20 | 
            -
                  "grad_norm": 0. | 
| 21 | 
             
                  "learning_rate": 7.580174927113704e-05,
         | 
| 22 | 
            -
                  "loss": 0. | 
| 23 | 
             
                  "step": 500
         | 
| 24 | 
             
                },
         | 
| 25 | 
             
                {
         | 
| 26 | 
             
                  "epoch": 1.4563106796116505,
         | 
| 27 | 
            -
                  "grad_norm": 0. | 
| 28 | 
             
                  "learning_rate": 6.365403304178815e-05,
         | 
| 29 | 
            -
                  "loss": 0. | 
| 30 | 
             
                  "step": 750
         | 
| 31 | 
             
                },
         | 
| 32 | 
             
                {
         | 
| 33 | 
             
                  "epoch": 1.941747572815534,
         | 
| 34 | 
            -
                  "grad_norm": 0. | 
| 35 | 
             
                  "learning_rate": 5.150631681243926e-05,
         | 
| 36 | 
            -
                  "loss": 0. | 
| 37 | 
             
                  "step": 1000
         | 
| 38 | 
             
                },
         | 
| 39 | 
             
                {
         | 
| 40 | 
             
                  "epoch": 2.4271844660194173,
         | 
| 41 | 
            -
                  "grad_norm": 0. | 
| 42 | 
             
                  "learning_rate": 3.9358600583090386e-05,
         | 
| 43 | 
            -
                  "loss": 0. | 
| 44 | 
             
                  "step": 1250
         | 
| 45 | 
             
                },
         | 
| 46 | 
             
                {
         | 
| 47 | 
             
                  "epoch": 2.912621359223301,
         | 
| 48 | 
            -
                  "grad_norm": 0. | 
| 49 | 
             
                  "learning_rate": 2.72108843537415e-05,
         | 
| 50 | 
            -
                  "loss": 0. | 
| 51 | 
             
                  "step": 1500
         | 
| 52 | 
             
                },
         | 
| 53 | 
             
                {
         | 
| 54 | 
             
                  "epoch": 3.3980582524271843,
         | 
| 55 | 
            -
                  "grad_norm": 0. | 
| 56 | 
             
                  "learning_rate": 1.5063168124392615e-05,
         | 
| 57 | 
            -
                  "loss": 0. | 
| 58 | 
             
                  "step": 1750
         | 
| 59 | 
             
                },
         | 
| 60 | 
             
                {
         | 
| 61 | 
             
                  "epoch": 3.883495145631068,
         | 
| 62 | 
            -
                  "grad_norm": 0. | 
| 63 | 
             
                  "learning_rate": 2.915451895043732e-06,
         | 
| 64 | 
            -
                  "loss": 0. | 
| 65 | 
             
                  "step": 2000
         | 
| 66 | 
             
                },
         | 
| 67 | 
             
                {
         | 
| 68 | 
             
                  "epoch": 4.0,
         | 
| 69 | 
             
                  "step": 2060,
         | 
| 70 | 
            -
                  "total_flos": 5. | 
| 71 | 
            -
                  "train_loss": 0. | 
| 72 | 
            -
                  "train_runtime":  | 
| 73 | 
            -
                  "train_samples_per_second":  | 
| 74 | 
            -
                  "train_steps_per_second": 0. | 
| 75 | 
             
                }
         | 
| 76 | 
             
              ],
         | 
| 77 | 
             
              "logging_steps": 250,
         | 
| @@ -91,7 +91,7 @@ | |
| 91 | 
             
                  "attributes": {}
         | 
| 92 | 
             
                }
         | 
| 93 | 
             
              },
         | 
| 94 | 
            -
              "total_flos": 5. | 
| 95 | 
             
              "train_batch_size": 16,
         | 
| 96 | 
             
              "trial_name": null,
         | 
| 97 | 
             
              "trial_params": null
         | 
|  | |
| 10 | 
             
              "log_history": [
         | 
| 11 | 
             
                {
         | 
| 12 | 
             
                  "epoch": 0.4854368932038835,
         | 
| 13 | 
            +
                  "grad_norm": 0.4750896096229553,
         | 
| 14 | 
             
                  "learning_rate": 8.794946550048592e-05,
         | 
| 15 | 
            +
                  "loss": 0.854,
         | 
| 16 | 
             
                  "step": 250
         | 
| 17 | 
             
                },
         | 
| 18 | 
             
                {
         | 
| 19 | 
             
                  "epoch": 0.970873786407767,
         | 
| 20 | 
            +
                  "grad_norm": 0.3962755501270294,
         | 
| 21 | 
             
                  "learning_rate": 7.580174927113704e-05,
         | 
| 22 | 
            +
                  "loss": 0.4107,
         | 
| 23 | 
             
                  "step": 500
         | 
| 24 | 
             
                },
         | 
| 25 | 
             
                {
         | 
| 26 | 
             
                  "epoch": 1.4563106796116505,
         | 
| 27 | 
            +
                  "grad_norm": 0.4067479968070984,
         | 
| 28 | 
             
                  "learning_rate": 6.365403304178815e-05,
         | 
| 29 | 
            +
                  "loss": 0.3693,
         | 
| 30 | 
             
                  "step": 750
         | 
| 31 | 
             
                },
         | 
| 32 | 
             
                {
         | 
| 33 | 
             
                  "epoch": 1.941747572815534,
         | 
| 34 | 
            +
                  "grad_norm": 0.38044601678848267,
         | 
| 35 | 
             
                  "learning_rate": 5.150631681243926e-05,
         | 
| 36 | 
            +
                  "loss": 0.3561,
         | 
| 37 | 
             
                  "step": 1000
         | 
| 38 | 
             
                },
         | 
| 39 | 
             
                {
         | 
| 40 | 
             
                  "epoch": 2.4271844660194173,
         | 
| 41 | 
            +
                  "grad_norm": 0.3728051483631134,
         | 
| 42 | 
             
                  "learning_rate": 3.9358600583090386e-05,
         | 
| 43 | 
            +
                  "loss": 0.3473,
         | 
| 44 | 
             
                  "step": 1250
         | 
| 45 | 
             
                },
         | 
| 46 | 
             
                {
         | 
| 47 | 
             
                  "epoch": 2.912621359223301,
         | 
| 48 | 
            +
                  "grad_norm": 0.40094345808029175,
         | 
| 49 | 
             
                  "learning_rate": 2.72108843537415e-05,
         | 
| 50 | 
            +
                  "loss": 0.3414,
         | 
| 51 | 
             
                  "step": 1500
         | 
| 52 | 
             
                },
         | 
| 53 | 
             
                {
         | 
| 54 | 
             
                  "epoch": 3.3980582524271843,
         | 
| 55 | 
            +
                  "grad_norm": 0.3950284719467163,
         | 
| 56 | 
             
                  "learning_rate": 1.5063168124392615e-05,
         | 
| 57 | 
            +
                  "loss": 0.3321,
         | 
| 58 | 
             
                  "step": 1750
         | 
| 59 | 
             
                },
         | 
| 60 | 
             
                {
         | 
| 61 | 
             
                  "epoch": 3.883495145631068,
         | 
| 62 | 
            +
                  "grad_norm": 0.3837919235229492,
         | 
| 63 | 
             
                  "learning_rate": 2.915451895043732e-06,
         | 
| 64 | 
            +
                  "loss": 0.3271,
         | 
| 65 | 
             
                  "step": 2000
         | 
| 66 | 
             
                },
         | 
| 67 | 
             
                {
         | 
| 68 | 
             
                  "epoch": 4.0,
         | 
| 69 | 
             
                  "step": 2060,
         | 
| 70 | 
            +
                  "total_flos": 5.083160308163666e+17,
         | 
| 71 | 
            +
                  "train_loss": 0.4145523006476245,
         | 
| 72 | 
            +
                  "train_runtime": 7540.7405,
         | 
| 73 | 
            +
                  "train_samples_per_second": 69.933,
         | 
| 74 | 
            +
                  "train_steps_per_second": 0.273
         | 
| 75 | 
             
                }
         | 
| 76 | 
             
              ],
         | 
| 77 | 
             
              "logging_steps": 250,
         | 
|  | |
| 91 | 
             
                  "attributes": {}
         | 
| 92 | 
             
                }
         | 
| 93 | 
             
              },
         | 
| 94 | 
            +
              "total_flos": 5.083160308163666e+17,
         | 
| 95 | 
             
              "train_batch_size": 16,
         | 
| 96 | 
             
              "trial_name": null,
         | 
| 97 | 
             
              "trial_params": null
         | 
    	
        training_args.bin
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 5240
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:37060c46874a7281744ce6e4a73c75ab26f52036c0376a488d3598e1446fba39
         | 
| 3 | 
             
            size 5240
         |