aiden200 commited on
Commit
6451b9d
·
verified ·
1 Parent(s): 56278bd

Model save

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. train.log +1 -0
README.md CHANGED
@@ -39,8 +39,8 @@ The following hyperparameters were used during training:
39
  - seed: 42
40
  - distributed_type: multi-GPU
41
  - num_devices: 4
42
- - gradient_accumulation_steps: 8
43
- - total_train_batch_size: 32
44
  - total_eval_batch_size: 4
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: cosine
 
39
  - seed: 42
40
  - distributed_type: multi-GPU
41
  - num_devices: 4
42
+ - gradient_accumulation_steps: 2
43
+ - total_train_batch_size: 8
44
  - total_eval_batch_size: 4
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: cosine
train.log CHANGED
@@ -16634,3 +16634,4 @@ tensor(-0.0011, device='cuda:1', grad_fn=<AddBackward0>) tensor(-0.0011, device=
16634
  Finished TrainingFinished Training
16635
  Finished Training
16636
 
 
 
16634
  Finished TrainingFinished Training
16635
  Finished Training
16636
 
16637
+ Finished Training