r-three
/

toksuite

Model card Files Files and versions

xet

Community

craffel HF Staff commited on Aug 27

Commit

2c56a7b

verified ·

1 Parent(s): 5584cfd

Upload gpt2/config.yaml with huggingface_hub

Browse files

Files changed (1) hide show

gpt2/config.yaml +127 -0

gpt2/config.yaml ADDED Viewed

	@@ -0,0 +1,127 @@

+name: gpt2
+dump_dir: /fsx/craffel/toksuite/lingua_logs/gpt2/
+seed: 777
+grad_acc_steps: 8
+gc_collect_freq: 1000
+probe_freq: null
+steps: 100000
+data:
+  root_dir: /scratch/craffel/lingua/data/tokenizer_training/
+  sources:
+    fw_edu: 0.4
+    cmn_Hani: 0.15
+    tur_Latn: 0.15
+    ita_Latn: 0.15
+    fas_Arab: 0.15
+  batch_size: 4
+  seq_len: 4096
+  n_views: 2
+  seed: 42
+  add_bos: true
+  add_eos: true
+  load_async: true
+  prefetch_size: 1024
+  tokenizer:
+    name: huggingface
+    path: gpt2
+    n_words: null
+optim:
+  lr: 0.001
+  weight_decay: 0.1
+  epsilon: 1.0e-08
+  beta1: 0.9
+  beta2: 0.95
+  clip: 1.0
+  scheduler: cosine
+  warmup: 2000
+  lr_min_ratio: 1.0e-06
+  cycle_length: 1.0
+  cosine_theta: 1.0
+  annealing_step: 1000
+  decay_fraction: 0.1
+  exp_factor: 0.5
+model:
+  dim: 2048
+  n_layers: 25
+  head_dim: null
+  n_heads: 16
+  n_kv_heads: null
+  ffn_dim_multiplier: null
+  multiple_of: 256
+  norm_eps: 1.0e-05
+  rope_theta: 10000.0
+  init_base_std: null
+  init_std_factor: disabled
+  max_seqlen: 4096
+  seed: 42
+  vocab_size: 50257
+  weight_tying: false
+  sliding_window: null
+distributed:
+  dp_shard: 1
+  dp_replicate: 8
+  tp_size: 1
+  selective_activation_checkpointing: false
+  compile: true
+  fsdp_type: full_shard
+  model_dtype: bf16
+  float8_recipe: null
+  float8_filter: layers\.[0-9]+\.
+  matmul_allow_tf32: false
+  detect_anomaly: false
+  compile_cache_size_limit: 8
+  spawn_method: forkserver
+env:
+  MKL_SERVICE_FORCE_INTEL: GNU
+  OMP_NUM_THREADS: '1'
+  MKL_NUM_THREADS: '1'
+  ENABLE_INTRA_NODE_COMM: '1'
+  TORCH_NCCL_AVOID_RECORD_STREAMS: '1'
+  NCCL_IB_TIMEOUT: '22'
+  NCCL_DEBUG: INFO
+  TORCH_NCCL_ASYNC_ERROR_HANDLING: '1'
+checkpoint:
+  dump:
+    every: 6000
+    keep: -1
+  eval:
+    every: 2000
+    keep: -1
+  path: /fsx/craffel/toksuite/lingua_logs/gpt2/checkpoints
+  init_ckpt_path: /fsx/craffel/toksuite/init_checkpoints/gpt2/model_dcp
+  load_init_optimizer_state: false
+  save_init_ckpt: false
+profiling:
+  run: true
+  trace_folder: profiling
+  mem_warmup: 0
+  mem_steps: 4
+  profile_warmup: 100
+  profile_steps: 4
+logging:
+  freq: 1
+  acc_freq: null
+  wandb: null
+async_eval_gpus: 8
+eval:
+  harness:
+    tasks:
+    - hellaswag
+    - piqa
+    - arc_easy
+    - arc_challenge
+    - include_base_44_turkish
+    - include_base_44_italian
+    - include_base_44_chinese
+    - belebele_pes_Arab
+    - belebele_eng_Latn
+    - belebele_ita_Latn
+    - belebele_tur_Latn
+    - belebele_zho_Hans
+    - xnli_en
+    - xnli_tr
+    - xnli_zh
+    confirm_run_unsafe_code: true
+  generator:
+    max_tokens: 8192
+    dtype: bf16