Commit 
							
							·
						
						598c023
	
1
								Parent(s):
							
							e8120ef
								
Training in progress, step 750000
Browse files- last-checkpoint/optimizer.pt +1 -1
 - last-checkpoint/pytorch_model.bin +1 -1
 - last-checkpoint/rng_state_0.pth +1 -1
 - last-checkpoint/rng_state_1.pth +1 -1
 - last-checkpoint/rng_state_2.pth +1 -1
 - last-checkpoint/rng_state_3.pth +1 -1
 - last-checkpoint/rng_state_4.pth +1 -1
 - last-checkpoint/rng_state_5.pth +1 -1
 - last-checkpoint/rng_state_6.pth +1 -1
 - last-checkpoint/rng_state_7.pth +1 -1
 - last-checkpoint/scheduler.pt +1 -1
 - last-checkpoint/trainer_state.json +311 -3
 - pytorch_model.bin +1 -1
 - runs/Mar22_03-02-10_t1v-n-ae339136-w-0/events.out.tfevents.1679454966.t1v-n-ae339136-w-0.10622.0 +2 -2
 
    	
        last-checkpoint/optimizer.pt
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 3480942553
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:017878d7c611ba78f49d36fac2d29744f60586a2f3748e728a8daa9a71cdc581
         
     | 
| 3 | 
         
             
            size 3480942553
         
     | 
    	
        last-checkpoint/pytorch_model.bin
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 1740493675
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:39eb0b770ba80ef91093b86ebb60b6ce4aa8879c4b6c52b6fa67d411db4e4956
         
     | 
| 3 | 
         
             
            size 1740493675
         
     | 
    	
        last-checkpoint/rng_state_0.pth
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 13611
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
         
     | 
| 3 | 
         
             
            size 13611
         
     | 
    	
        last-checkpoint/rng_state_1.pth
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 13611
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
         
     | 
| 3 | 
         
             
            size 13611
         
     | 
    	
        last-checkpoint/rng_state_2.pth
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 13611
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
         
     | 
| 3 | 
         
             
            size 13611
         
     | 
    	
        last-checkpoint/rng_state_3.pth
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 13611
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
         
     | 
| 3 | 
         
             
            size 13611
         
     | 
    	
        last-checkpoint/rng_state_4.pth
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 13611
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
         
     | 
| 3 | 
         
             
            size 13611
         
     | 
    	
        last-checkpoint/rng_state_5.pth
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 13611
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
         
     | 
| 3 | 
         
             
            size 13611
         
     | 
    	
        last-checkpoint/rng_state_6.pth
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 13611
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
         
     | 
| 3 | 
         
             
            size 13611
         
     | 
    	
        last-checkpoint/rng_state_7.pth
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 13611
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:9e432fc8945094b64cfb44d4ef20ccce8569657d41d36365d65d61ece0bc81dc
         
     | 
| 3 | 
         
             
            size 13611
         
     | 
    	
        last-checkpoint/scheduler.pt
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 623
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:411052d7656a2fbf4baa154bd61bcb86c1d4e17113e6919b82f37e9aff99019f
         
     | 
| 3 | 
         
             
            size 623
         
     | 
    	
        last-checkpoint/trainer_state.json
    CHANGED
    
    | 
         @@ -1,8 +1,8 @@ 
     | 
|
| 1 | 
         
             
            {
         
     | 
| 2 | 
         
             
              "best_metric": null,
         
     | 
| 3 | 
         
             
              "best_model_checkpoint": null,
         
     | 
| 4 | 
         
            -
              "epoch": 0. 
     | 
| 5 | 
         
            -
              "global_step":  
     | 
| 6 | 
         
             
              "is_hyper_param_search": false,
         
     | 
| 7 | 
         
             
              "is_local_process_zero": true,
         
     | 
| 8 | 
         
             
              "is_world_process_zero": true,
         
     | 
| 
         @@ -4318,11 +4318,319 @@ 
     | 
|
| 4318 | 
         
             
                  "eval_samples_per_second": 27.086,
         
     | 
| 4319 | 
         
             
                  "eval_steps_per_second": 0.428,
         
     | 
| 4320 | 
         
             
                  "step": 700000
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 4321 | 
         
             
                }
         
     | 
| 4322 | 
         
             
              ],
         
     | 
| 4323 | 
         
             
              "max_steps": 1000000,
         
     | 
| 4324 | 
         
             
              "num_train_epochs": 9223372036854775807,
         
     | 
| 4325 | 
         
            -
              "total_flos": 4. 
     | 
| 4326 | 
         
             
              "trial_name": null,
         
     | 
| 4327 | 
         
             
              "trial_params": null
         
     | 
| 4328 | 
         
             
            }
         
     | 
| 
         | 
|
| 1 | 
         
             
            {
         
     | 
| 2 | 
         
             
              "best_metric": null,
         
     | 
| 3 | 
         
             
              "best_model_checkpoint": null,
         
     | 
| 4 | 
         
            +
              "epoch": 0.75,
         
     | 
| 5 | 
         
            +
              "global_step": 750000,
         
     | 
| 6 | 
         
             
              "is_hyper_param_search": false,
         
     | 
| 7 | 
         
             
              "is_local_process_zero": true,
         
     | 
| 8 | 
         
             
              "is_world_process_zero": true,
         
     | 
| 
         | 
|
| 4318 | 
         
             
                  "eval_samples_per_second": 27.086,
         
     | 
| 4319 | 
         
             
                  "eval_steps_per_second": 0.428,
         
     | 
| 4320 | 
         
             
                  "step": 700000
         
     | 
| 4321 | 
         
            +
                },
         
     | 
| 4322 | 
         
            +
                {
         
     | 
| 4323 | 
         
            +
                  "epoch": 0.7,
         
     | 
| 4324 | 
         
            +
                  "learning_rate": 2.251431892919171e-05,
         
     | 
| 4325 | 
         
            +
                  "loss": 0.7484,
         
     | 
| 4326 | 
         
            +
                  "step": 701000
         
     | 
| 4327 | 
         
            +
                },
         
     | 
| 4328 | 
         
            +
                {
         
     | 
| 4329 | 
         
            +
                  "epoch": 0.7,
         
     | 
| 4330 | 
         
            +
                  "learning_rate": 2.237634634350934e-05,
         
     | 
| 4331 | 
         
            +
                  "loss": 0.7388,
         
     | 
| 4332 | 
         
            +
                  "step": 702000
         
     | 
| 4333 | 
         
            +
                },
         
     | 
| 4334 | 
         
            +
                {
         
     | 
| 4335 | 
         
            +
                  "epoch": 0.7,
         
     | 
| 4336 | 
         
            +
                  "learning_rate": 2.2238675845677663e-05,
         
     | 
| 4337 | 
         
            +
                  "loss": 0.7153,
         
     | 
| 4338 | 
         
            +
                  "step": 703000
         
     | 
| 4339 | 
         
            +
                },
         
     | 
| 4340 | 
         
            +
                {
         
     | 
| 4341 | 
         
            +
                  "epoch": 0.7,
         
     | 
| 4342 | 
         
            +
                  "learning_rate": 2.2101308941239203e-05,
         
     | 
| 4343 | 
         
            +
                  "loss": 0.7454,
         
     | 
| 4344 | 
         
            +
                  "step": 704000
         
     | 
| 4345 | 
         
            +
                },
         
     | 
| 4346 | 
         
            +
                {
         
     | 
| 4347 | 
         
            +
                  "epoch": 0.7,
         
     | 
| 4348 | 
         
            +
                  "learning_rate": 2.196424713241637e-05,
         
     | 
| 4349 | 
         
            +
                  "loss": 0.7605,
         
     | 
| 4350 | 
         
            +
                  "step": 705000
         
     | 
| 4351 | 
         
            +
                },
         
     | 
| 4352 | 
         
            +
                {
         
     | 
| 4353 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 4354 | 
         
            +
                  "learning_rate": 2.182749191809518e-05,
         
     | 
| 4355 | 
         
            +
                  "loss": 0.7548,
         
     | 
| 4356 | 
         
            +
                  "step": 706000
         
     | 
| 4357 | 
         
            +
                },
         
     | 
| 4358 | 
         
            +
                {
         
     | 
| 4359 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 4360 | 
         
            +
                  "learning_rate": 2.1691044793808734e-05,
         
     | 
| 4361 | 
         
            +
                  "loss": 0.7524,
         
     | 
| 4362 | 
         
            +
                  "step": 707000
         
     | 
| 4363 | 
         
            +
                },
         
     | 
| 4364 | 
         
            +
                {
         
     | 
| 4365 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 4366 | 
         
            +
                  "learning_rate": 2.1554907251720945e-05,
         
     | 
| 4367 | 
         
            +
                  "loss": 0.7297,
         
     | 
| 4368 | 
         
            +
                  "step": 708000
         
     | 
| 4369 | 
         
            +
                },
         
     | 
| 4370 | 
         
            +
                {
         
     | 
| 4371 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 4372 | 
         
            +
                  "learning_rate": 2.1419080780610123e-05,
         
     | 
| 4373 | 
         
            +
                  "loss": 0.7613,
         
     | 
| 4374 | 
         
            +
                  "step": 709000
         
     | 
| 4375 | 
         
            +
                },
         
     | 
| 4376 | 
         
            +
                {
         
     | 
| 4377 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 4378 | 
         
            +
                  "learning_rate": 2.128356686585282e-05,
         
     | 
| 4379 | 
         
            +
                  "loss": 0.7674,
         
     | 
| 4380 | 
         
            +
                  "step": 710000
         
     | 
| 4381 | 
         
            +
                },
         
     | 
| 4382 | 
         
            +
                {
         
     | 
| 4383 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 4384 | 
         
            +
                  "learning_rate": 2.1148366989407496e-05,
         
     | 
| 4385 | 
         
            +
                  "loss": 0.7806,
         
     | 
| 4386 | 
         
            +
                  "step": 711000
         
     | 
| 4387 | 
         
            +
                },
         
     | 
| 4388 | 
         
            +
                {
         
     | 
| 4389 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 4390 | 
         
            +
                  "learning_rate": 2.1013482629798333e-05,
         
     | 
| 4391 | 
         
            +
                  "loss": 0.7822,
         
     | 
| 4392 | 
         
            +
                  "step": 712000
         
     | 
| 4393 | 
         
            +
                },
         
     | 
| 4394 | 
         
            +
                {
         
     | 
| 4395 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 4396 | 
         
            +
                  "learning_rate": 2.0878915262099098e-05,
         
     | 
| 4397 | 
         
            +
                  "loss": 0.7801,
         
     | 
| 4398 | 
         
            +
                  "step": 713000
         
     | 
| 4399 | 
         
            +
                },
         
     | 
| 4400 | 
         
            +
                {
         
     | 
| 4401 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 4402 | 
         
            +
                  "learning_rate": 2.0744666357916925e-05,
         
     | 
| 4403 | 
         
            +
                  "loss": 0.7657,
         
     | 
| 4404 | 
         
            +
                  "step": 714000
         
     | 
| 4405 | 
         
            +
                },
         
     | 
| 4406 | 
         
            +
                {
         
     | 
| 4407 | 
         
            +
                  "epoch": 0.71,
         
     | 
| 4408 | 
         
            +
                  "learning_rate": 2.061073738537635e-05,
         
     | 
| 4409 | 
         
            +
                  "loss": 0.7521,
         
     | 
| 4410 | 
         
            +
                  "step": 715000
         
     | 
| 4411 | 
         
            +
                },
         
     | 
| 4412 | 
         
            +
                {
         
     | 
| 4413 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 4414 | 
         
            +
                  "learning_rate": 2.0477129809103147e-05,
         
     | 
| 4415 | 
         
            +
                  "loss": 0.728,
         
     | 
| 4416 | 
         
            +
                  "step": 716000
         
     | 
| 4417 | 
         
            +
                },
         
     | 
| 4418 | 
         
            +
                {
         
     | 
| 4419 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 4420 | 
         
            +
                  "learning_rate": 2.0343845090208368e-05,
         
     | 
| 4421 | 
         
            +
                  "loss": 0.7287,
         
     | 
| 4422 | 
         
            +
                  "step": 717000
         
     | 
| 4423 | 
         
            +
                },
         
     | 
| 4424 | 
         
            +
                {
         
     | 
| 4425 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 4426 | 
         
            +
                  "learning_rate": 2.0210884686272368e-05,
         
     | 
| 4427 | 
         
            +
                  "loss": 0.7304,
         
     | 
| 4428 | 
         
            +
                  "step": 718000
         
     | 
| 4429 | 
         
            +
                },
         
     | 
| 4430 | 
         
            +
                {
         
     | 
| 4431 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 4432 | 
         
            +
                  "learning_rate": 2.0078250051328784e-05,
         
     | 
| 4433 | 
         
            +
                  "loss": 0.747,
         
     | 
| 4434 | 
         
            +
                  "step": 719000
         
     | 
| 4435 | 
         
            +
                },
         
     | 
| 4436 | 
         
            +
                {
         
     | 
| 4437 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 4438 | 
         
            +
                  "learning_rate": 1.9945942635848748e-05,
         
     | 
| 4439 | 
         
            +
                  "loss": 0.7278,
         
     | 
| 4440 | 
         
            +
                  "step": 720000
         
     | 
| 4441 | 
         
            +
                },
         
     | 
| 4442 | 
         
            +
                {
         
     | 
| 4443 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 4444 | 
         
            +
                  "learning_rate": 1.981396388672496e-05,
         
     | 
| 4445 | 
         
            +
                  "loss": 0.7561,
         
     | 
| 4446 | 
         
            +
                  "step": 721000
         
     | 
| 4447 | 
         
            +
                },
         
     | 
| 4448 | 
         
            +
                {
         
     | 
| 4449 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 4450 | 
         
            +
                  "learning_rate": 1.9682315247255894e-05,
         
     | 
| 4451 | 
         
            +
                  "loss": 0.7602,
         
     | 
| 4452 | 
         
            +
                  "step": 722000
         
     | 
| 4453 | 
         
            +
                },
         
     | 
| 4454 | 
         
            +
                {
         
     | 
| 4455 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 4456 | 
         
            +
                  "learning_rate": 1.9550998157129946e-05,
         
     | 
| 4457 | 
         
            +
                  "loss": 0.7575,
         
     | 
| 4458 | 
         
            +
                  "step": 723000
         
     | 
| 4459 | 
         
            +
                },
         
     | 
| 4460 | 
         
            +
                {
         
     | 
| 4461 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 4462 | 
         
            +
                  "learning_rate": 1.942001405240979e-05,
         
     | 
| 4463 | 
         
            +
                  "loss": 0.755,
         
     | 
| 4464 | 
         
            +
                  "step": 724000
         
     | 
| 4465 | 
         
            +
                },
         
     | 
| 4466 | 
         
            +
                {
         
     | 
| 4467 | 
         
            +
                  "epoch": 0.72,
         
     | 
| 4468 | 
         
            +
                  "learning_rate": 1.928936436551661e-05,
         
     | 
| 4469 | 
         
            +
                  "loss": 0.7168,
         
     | 
| 4470 | 
         
            +
                  "step": 725000
         
     | 
| 4471 | 
         
            +
                },
         
     | 
| 4472 | 
         
            +
                {
         
     | 
| 4473 | 
         
            +
                  "epoch": 0.73,
         
     | 
| 4474 | 
         
            +
                  "learning_rate": 1.9159050525214452e-05,
         
     | 
| 4475 | 
         
            +
                  "loss": 0.723,
         
     | 
| 4476 | 
         
            +
                  "step": 726000
         
     | 
| 4477 | 
         
            +
                },
         
     | 
| 4478 | 
         
            +
                {
         
     | 
| 4479 | 
         
            +
                  "epoch": 0.73,
         
     | 
| 4480 | 
         
            +
                  "learning_rate": 1.9029073956594606e-05,
         
     | 
| 4481 | 
         
            +
                  "loss": 0.7411,
         
     | 
| 4482 | 
         
            +
                  "step": 727000
         
     | 
| 4483 | 
         
            +
                },
         
     | 
| 4484 | 
         
            +
                {
         
     | 
| 4485 | 
         
            +
                  "epoch": 0.73,
         
     | 
| 4486 | 
         
            +
                  "learning_rate": 1.8899436081059975e-05,
         
     | 
| 4487 | 
         
            +
                  "loss": 0.7462,
         
     | 
| 4488 | 
         
            +
                  "step": 728000
         
     | 
| 4489 | 
         
            +
                },
         
     | 
| 4490 | 
         
            +
                {
         
     | 
| 4491 | 
         
            +
                  "epoch": 0.73,
         
     | 
| 4492 | 
         
            +
                  "learning_rate": 1.877013831630961e-05,
         
     | 
| 4493 | 
         
            +
                  "loss": 0.7513,
         
     | 
| 4494 | 
         
            +
                  "step": 729000
         
     | 
| 4495 | 
         
            +
                },
         
     | 
| 4496 | 
         
            +
                {
         
     | 
| 4497 | 
         
            +
                  "epoch": 0.73,
         
     | 
| 4498 | 
         
            +
                  "learning_rate": 1.8641182076323148e-05,
         
     | 
| 4499 | 
         
            +
                  "loss": 0.7438,
         
     | 
| 4500 | 
         
            +
                  "step": 730000
         
     | 
| 4501 | 
         
            +
                },
         
     | 
| 4502 | 
         
            +
                {
         
     | 
| 4503 | 
         
            +
                  "epoch": 0.73,
         
     | 
| 4504 | 
         
            +
                  "learning_rate": 1.851256877134538e-05,
         
     | 
| 4505 | 
         
            +
                  "loss": 0.7516,
         
     | 
| 4506 | 
         
            +
                  "step": 731000
         
     | 
| 4507 | 
         
            +
                },
         
     | 
| 4508 | 
         
            +
                {
         
     | 
| 4509 | 
         
            +
                  "epoch": 0.73,
         
     | 
| 4510 | 
         
            +
                  "learning_rate": 1.838429980787081e-05,
         
     | 
| 4511 | 
         
            +
                  "loss": 0.735,
         
     | 
| 4512 | 
         
            +
                  "step": 732000
         
     | 
| 4513 | 
         
            +
                },
         
     | 
| 4514 | 
         
            +
                {
         
     | 
| 4515 | 
         
            +
                  "epoch": 0.73,
         
     | 
| 4516 | 
         
            +
                  "learning_rate": 1.8256376588628238e-05,
         
     | 
| 4517 | 
         
            +
                  "loss": 0.7481,
         
     | 
| 4518 | 
         
            +
                  "step": 733000
         
     | 
| 4519 | 
         
            +
                },
         
     | 
| 4520 | 
         
            +
                {
         
     | 
| 4521 | 
         
            +
                  "epoch": 0.73,
         
     | 
| 4522 | 
         
            +
                  "learning_rate": 1.8128800512565513e-05,
         
     | 
| 4523 | 
         
            +
                  "loss": 0.7661,
         
     | 
| 4524 | 
         
            +
                  "step": 734000
         
     | 
| 4525 | 
         
            +
                },
         
     | 
| 4526 | 
         
            +
                {
         
     | 
| 4527 | 
         
            +
                  "epoch": 0.73,
         
     | 
| 4528 | 
         
            +
                  "learning_rate": 1.800157297483417e-05,
         
     | 
| 4529 | 
         
            +
                  "loss": 0.7545,
         
     | 
| 4530 | 
         
            +
                  "step": 735000
         
     | 
| 4531 | 
         
            +
                },
         
     | 
| 4532 | 
         
            +
                {
         
     | 
| 4533 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 4534 | 
         
            +
                  "learning_rate": 1.787469536677419e-05,
         
     | 
| 4535 | 
         
            +
                  "loss": 0.7634,
         
     | 
| 4536 | 
         
            +
                  "step": 736000
         
     | 
| 4537 | 
         
            +
                },
         
     | 
| 4538 | 
         
            +
                {
         
     | 
| 4539 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 4540 | 
         
            +
                  "learning_rate": 1.774816907589873e-05,
         
     | 
| 4541 | 
         
            +
                  "loss": 0.757,
         
     | 
| 4542 | 
         
            +
                  "step": 737000
         
     | 
| 4543 | 
         
            +
                },
         
     | 
| 4544 | 
         
            +
                {
         
     | 
| 4545 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 4546 | 
         
            +
                  "learning_rate": 1.7621995485879062e-05,
         
     | 
| 4547 | 
         
            +
                  "loss": 0.7594,
         
     | 
| 4548 | 
         
            +
                  "step": 738000
         
     | 
| 4549 | 
         
            +
                },
         
     | 
| 4550 | 
         
            +
                {
         
     | 
| 4551 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 4552 | 
         
            +
                  "learning_rate": 1.749617597652934e-05,
         
     | 
| 4553 | 
         
            +
                  "loss": 0.7589,
         
     | 
| 4554 | 
         
            +
                  "step": 739000
         
     | 
| 4555 | 
         
            +
                },
         
     | 
| 4556 | 
         
            +
                {
         
     | 
| 4557 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 4558 | 
         
            +
                  "learning_rate": 1.7370711923791567e-05,
         
     | 
| 4559 | 
         
            +
                  "loss": 0.767,
         
     | 
| 4560 | 
         
            +
                  "step": 740000
         
     | 
| 4561 | 
         
            +
                },
         
     | 
| 4562 | 
         
            +
                {
         
     | 
| 4563 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 4564 | 
         
            +
                  "learning_rate": 1.7245604699720535e-05,
         
     | 
| 4565 | 
         
            +
                  "loss": 0.7628,
         
     | 
| 4566 | 
         
            +
                  "step": 741000
         
     | 
| 4567 | 
         
            +
                },
         
     | 
| 4568 | 
         
            +
                {
         
     | 
| 4569 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 4570 | 
         
            +
                  "learning_rate": 1.712085567246878e-05,
         
     | 
| 4571 | 
         
            +
                  "loss": 0.7691,
         
     | 
| 4572 | 
         
            +
                  "step": 742000
         
     | 
| 4573 | 
         
            +
                },
         
     | 
| 4574 | 
         
            +
                {
         
     | 
| 4575 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 4576 | 
         
            +
                  "learning_rate": 1.699646620627168e-05,
         
     | 
| 4577 | 
         
            +
                  "loss": 0.771,
         
     | 
| 4578 | 
         
            +
                  "step": 743000
         
     | 
| 4579 | 
         
            +
                },
         
     | 
| 4580 | 
         
            +
                {
         
     | 
| 4581 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 4582 | 
         
            +
                  "learning_rate": 1.6872437661432517e-05,
         
     | 
| 4583 | 
         
            +
                  "loss": 0.7716,
         
     | 
| 4584 | 
         
            +
                  "step": 744000
         
     | 
| 4585 | 
         
            +
                },
         
     | 
| 4586 | 
         
            +
                {
         
     | 
| 4587 | 
         
            +
                  "epoch": 0.74,
         
     | 
| 4588 | 
         
            +
                  "learning_rate": 1.6748771394307585e-05,
         
     | 
| 4589 | 
         
            +
                  "loss": 0.7728,
         
     | 
| 4590 | 
         
            +
                  "step": 745000
         
     | 
| 4591 | 
         
            +
                },
         
     | 
| 4592 | 
         
            +
                {
         
     | 
| 4593 | 
         
            +
                  "epoch": 0.75,
         
     | 
| 4594 | 
         
            +
                  "learning_rate": 1.662546875729138e-05,
         
     | 
| 4595 | 
         
            +
                  "loss": 0.7448,
         
     | 
| 4596 | 
         
            +
                  "step": 746000
         
     | 
| 4597 | 
         
            +
                },
         
     | 
| 4598 | 
         
            +
                {
         
     | 
| 4599 | 
         
            +
                  "epoch": 0.75,
         
     | 
| 4600 | 
         
            +
                  "learning_rate": 1.6502531098801753e-05,
         
     | 
| 4601 | 
         
            +
                  "loss": 0.772,
         
     | 
| 4602 | 
         
            +
                  "step": 747000
         
     | 
| 4603 | 
         
            +
                },
         
     | 
| 4604 | 
         
            +
                {
         
     | 
| 4605 | 
         
            +
                  "epoch": 0.75,
         
     | 
| 4606 | 
         
            +
                  "learning_rate": 1.637995976326527e-05,
         
     | 
| 4607 | 
         
            +
                  "loss": 0.7692,
         
     | 
| 4608 | 
         
            +
                  "step": 748000
         
     | 
| 4609 | 
         
            +
                },
         
     | 
| 4610 | 
         
            +
                {
         
     | 
| 4611 | 
         
            +
                  "epoch": 0.75,
         
     | 
| 4612 | 
         
            +
                  "learning_rate": 1.62577560911024e-05,
         
     | 
| 4613 | 
         
            +
                  "loss": 0.759,
         
     | 
| 4614 | 
         
            +
                  "step": 749000
         
     | 
| 4615 | 
         
            +
                },
         
     | 
| 4616 | 
         
            +
                {
         
     | 
| 4617 | 
         
            +
                  "epoch": 0.75,
         
     | 
| 4618 | 
         
            +
                  "learning_rate": 1.6135921418712956e-05,
         
     | 
| 4619 | 
         
            +
                  "loss": 0.7478,
         
     | 
| 4620 | 
         
            +
                  "step": 750000
         
     | 
| 4621 | 
         
            +
                },
         
     | 
| 4622 | 
         
            +
                {
         
     | 
| 4623 | 
         
            +
                  "epoch": 0.75,
         
     | 
| 4624 | 
         
            +
                  "eval_loss": 0.39371195435523987,
         
     | 
| 4625 | 
         
            +
                  "eval_runtime": 181.8712,
         
     | 
| 4626 | 
         
            +
                  "eval_samples_per_second": 27.492,
         
     | 
| 4627 | 
         
            +
                  "eval_steps_per_second": 0.434,
         
     | 
| 4628 | 
         
            +
                  "step": 750000
         
     | 
| 4629 | 
         
             
                }
         
     | 
| 4630 | 
         
             
              ],
         
     | 
| 4631 | 
         
             
              "max_steps": 1000000,
         
     | 
| 4632 | 
         
             
              "num_train_epochs": 9223372036854775807,
         
     | 
| 4633 | 
         
            +
              "total_flos": 4.4751579512832e+19,
         
     | 
| 4634 | 
         
             
              "trial_name": null,
         
     | 
| 4635 | 
         
             
              "trial_params": null
         
     | 
| 4636 | 
         
             
            }
         
     | 
    	
        pytorch_model.bin
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 1740493675
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:39eb0b770ba80ef91093b86ebb60b6ce4aa8879c4b6c52b6fa67d411db4e4956
         
     | 
| 3 | 
         
             
            size 1740493675
         
     | 
    	
        runs/Mar22_03-02-10_t1v-n-ae339136-w-0/events.out.tfevents.1679454966.t1v-n-ae339136-w-0.10622.0
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
            -
            size  
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:3c2540c671b3739602ebca9c18ce48494cf726814b5298e2ec76a5f6a5a74070
         
     | 
| 3 | 
         
            +
            size 20365
         
     |