Saving weights and logs of step 10000
Browse files
    	
        events.out.tfevents.1641905881.t1v-n-6f5efcd5-w-0.388508.0.v2
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:d99cfb1cef2e51ffe101591a801345cab183ad97a1c9e5b3a8f74dbd63222a5c
         | 
| 3 | 
            +
            size 1470136
         | 
    	
        flax_model.msgpack
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 498796983
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ae417426a414e194db93c6a7bc68590aa518fe5c36a13f22cb27cf42b46316a9
         | 
| 3 | 
             
            size 498796983
         | 
    	
        run_step4.sh
    ADDED
    
    | @@ -0,0 +1,28 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ./run_mlm_flax.py \
         | 
| 2 | 
            +
                --output_dir="./" \
         | 
| 3 | 
            +
                --model_type="roberta" \
         | 
| 4 | 
            +
                --model_name_or_path="./" \
         | 
| 5 | 
            +
                --config_name="./" \
         | 
| 6 | 
            +
                --tokenizer_name="./" \
         | 
| 7 | 
            +
                --train_file /mnt/disks/flaxdisk/corpus/train_4_4.json \
         | 
| 8 | 
            +
                --validation_file /mnt/disks/flaxdisk/corpus/validation.json \
         | 
| 9 | 
            +
                --cache_dir="/mnt/disks/flaxdisk/cache/" \
         | 
| 10 | 
            +
                --max_seq_length="128" \
         | 
| 11 | 
            +
                --weight_decay="0.01" \
         | 
| 12 | 
            +
                --per_device_train_batch_size="200" \
         | 
| 13 | 
            +
                --per_device_eval_batch_size="200" \
         | 
| 14 | 
            +
                --learning_rate="6e-4" \
         | 
| 15 | 
            +
                --warmup_steps="0" \
         | 
| 16 | 
            +
                --overwrite_output_dir \
         | 
| 17 | 
            +
                --num_train_epochs="2" \
         | 
| 18 | 
            +
                --adam_beta1="0.9" \
         | 
| 19 | 
            +
                --adam_beta2="0.98" \
         | 
| 20 | 
            +
                --logging_steps="10000" \
         | 
| 21 | 
            +
                --save_steps="10000" \
         | 
| 22 | 
            +
                --eval_steps="10000" \
         | 
| 23 | 
            +
                --preprocessing_num_workers="64" \
         | 
| 24 | 
            +
                --auth_token="True" \
         | 
| 25 | 
            +
                --static_learning_rate="True" \
         | 
| 26 | 
            +
                --dtype="bfloat16" \
         | 
| 27 | 
            +
                --adafactor \
         | 
| 28 | 
            +
                --push_to_hub
         | 

