penfever
/

oumi-l8b-ultrachat

+---
+library_name: transformers
+license: llama3.1
+base_model: meta-llama/Meta-Llama-3.1-8B
+tags:
+- oumi
+- generated_from_trainer
+datasets:
+- HuggingFaceH4/ultrachat_200k
+model-index:
+- name: Llama-3-8B-UltraChat-200K-Oumi
+  results: []
+---
+[<img src="https://github.com/oumi-ai/oumi/blob/main/docs/_static/logo/header_logo.png?raw=true" alt="Built with Oumi" width="200" height="60"/>](https://github.com/oumi-ai/oumi)
+<details><summary>See oumi train config</summary>
+oumi version: `0.1.3`
+```yaml
+data:
+  train:
+    datasets:
+    - dataset_name: HuggingFaceH4/ultrachat_200k
+      dataset_path: null
+      subset: null
+      split: train_sft
+      dataset_kwargs: {}
+      sample_count: null
+      mixture_proportion: null
+      shuffle: false
+      seed: null
+      shuffle_buffer_size: 1000
+      trust_remote_code: true
+      transform_num_workers: null
+    collator_name: null
+    pack: false
+    stream: false
+    target_col: null
+    mixture_strategy: first_exhausted
+    seed: null
+    use_async_dataset: false
+    use_torchdata: null
+  test:
+    datasets: []
+    collator_name: null
+    pack: false
+    stream: false
+    target_col: null
+    mixture_strategy: first_exhausted
+    seed: null
+    use_async_dataset: false
+    use_torchdata: null
+  validation:
+    datasets: []
+    collator_name: null
+    pack: false
+    stream: false
+    target_col: null
+    mixture_strategy: first_exhausted
+    seed: null
+    use_async_dataset: false
+    use_torchdata: null
+model:
+  model_name: meta-llama/Meta-Llama-3.1-8B
+  adapter_model: null
+  tokenizer_name: null
+  tokenizer_pad_token: null
+  tokenizer_kwargs: {}
+  model_max_length: 8192
+  load_pretrained_weights: true
+  trust_remote_code: true
+  torch_dtype_str: bfloat16
+  compile: false
+  chat_template: llama3-instruct
+  attn_implementation: flash_attention_2
+  device_map: auto
+  model_kwargs: {}
+  enable_liger_kernel: true
+  shard_for_eval: false
+  freeze_layers: []
+training:
+  use_peft: false
+  trainer_type: TRL_SFT
+  enable_gradient_checkpointing: true
+  gradient_checkpointing_kwargs:
+    use_reentrant: false
+  output_dir: output/llama8b-ultrachat
+  per_device_train_batch_size: 1
+  per_device_eval_batch_size: 8
+  gradient_accumulation_steps: 8
+  max_steps: -1
+  num_train_epochs: 1
+  save_epoch: false
+  save_steps: 800
+  save_final_model: true
+  seed: 42
+  run_name: llama8b-ultrachat.sky-2025-01-30-21-19-10-053582_sky-e018-bf996_1
+  metrics_function: null
+  log_level: info
+  dep_log_level: warning
+  enable_wandb: true
+  enable_tensorboard: true
+  logging_strategy: steps
+  logging_dir: null
+  logging_steps: 100
+  logging_first_step: false
+  eval_strategy: 'no'
+  eval_steps: 500
+  learning_rate: 2.0e-05
+  lr_scheduler_type: linear
+  lr_scheduler_kwargs: {}
+  warmup_ratio: null
+  warmup_steps: null
+  optimizer: paged_adamw_8bit
+  weight_decay: 0.0
+  adam_beta1: 0.9
+  adam_beta2: 0.999
+  adam_epsilon: 1.0e-08
+  sgd_momentum: 0.0
+  mixed_precision_dtype: NONE
+  compile: false
+  include_performance_metrics: true
+  include_alternative_mfu_metrics: false
+  log_model_summary: false
+  resume_from_checkpoint: null
+  try_resume_from_last_checkpoint: false
+  dataloader_num_workers: 8
+  dataloader_prefetch_factor: 32
+  dataloader_main_process_only: null
+  ddp_find_unused_parameters: false
+  max_grad_norm: 1.0
+  trainer_kwargs:
+    max_seq_length: 8192
+  profiler:
+    save_dir: null
+    enable_cpu_profiling: false
+    enable_cuda_profiling: false
+    record_shapes: false
+    profile_memory: false
+    with_stack: false
+    with_flops: false
+    with_modules: false
+    row_limit: 50
+    schedule:
+      enable_schedule: false
+      wait: 0
+      warmup: 1
+      active: 3
+      repeat: 1
+      skip_first: 1
+  telemetry:
+    telemetry_dir: telemetry
+    collect_telemetry_for_all_ranks: false
+    track_gpu_temperature: false
+  empty_device_cache_steps: 50
+  nccl_default_timeout_minutes: null
+peft:
+  lora_r: 8
+  lora_alpha: 8
+  lora_dropout: 0.0
+  lora_target_modules: null
+  lora_modules_to_save: null
+  lora_bias: none
+  lora_init_weights: DEFAULT
+  lora_task_type: CAUSAL_LM
+  q_lora: false
+  q_lora_bits: 4
+  bnb_4bit_quant_type: fp4
+  use_bnb_nested_quant: false
+  bnb_4bit_quant_storage: uint8
+  bnb_4bit_compute_dtype: float32
+  peft_save_mode: ADAPTER_ONLY
+fsdp:
+  enable_fsdp: false
+  sharding_strategy: FULL_SHARD
+  cpu_offload: false
+  mixed_precision: null
+  backward_prefetch: BACKWARD_PRE
+  forward_prefetch: false
+  use_orig_params: null
+  state_dict_type: FULL_STATE_DICT
+  auto_wrap_policy: NO_WRAP
+  min_num_params: 100000
+  transformer_layer_cls: null
+  sync_module_states: true
+```
+</details><br>
+<details><summary>See oumi cloud config</summary>
+```yaml
+name: llama8b-ultrachat-sft
+num_nodes: 1
+resources:
+  cloud: gcp
+  accelerators: "A100-80GB:4"
+  use_spot: false
+  disk_size: 2000 # Disk size in GBs
+working_dir: .
+file_mounts:
+  ~/.netrc: ~/.netrc  # WandB credentials
+  # Mount HF token, which is needed to download locked-down models from HF Hub.
+  # This is created on the local machine by running `huggingface-cli login`.
+  ~/.cache/huggingface/token: ~/.cache/huggingface/token
+envs:
+  WANDB_PROJECT: oumi-train
+  OUMI_RUN_NAME: llama8b-ultrachat
+  OUMI_USER_NAME: penfever
+  ACCELERATE_LOG_LEVEL: info
+  # https://github.com/huggingface/tokenizers/issues/899#issuecomment-1027739758
+  TOKENIZERS_PARALLELISM: false
+setup: |
+  set -e
+  pip install uv && uv pip install -e .[gpu,evaluation] hf_transfer
+  # Install model from HF Hub. This tool increases download speed compared to
+  # downloading the model during training.
+  HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download meta-llama/Meta-Llama-3.1-8B --exclude original/*
+  pip install -U flash-attn --no-build-isolation
+run: |
+  set -e  # Exit if any command failed.
+  source ./configs/examples/misc/sky_init.sh
+  set -x
+  oumi distributed torchrun \
+    -m oumi train \
+    -c configs/recipes/llama3_1/sft/8b_full/base_ultrachat.yaml \
+    --training.run_name "${OUMI_RUN_NAME}.${SKYPILOT_TASK_ID}" \
+  echo "Node ${SKYPILOT_NODE_RANK} is all done!"
+```
+</details><br>
+# Llama-3-8B-UltraChat-200K-Oumi
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the HuggingFaceH4/ultrachat_200k dataset. It achieves a training loss of 1.0435.
+## Model description
+This model was trained as a partial reproduction of results from the recent [`WildChat-50M` paper](https://arxiv.org/abs/2501.18511).
+```bibtex
+@misc{feuer2025wildchat50mdeepdiverole,
+      title={WILDCHAT-50M: A Deep Dive Into the Role of Synthetic Data in Post-Training},
+      author={Benjamin Feuer and Chinmay Hegde},
+      year={2025},
+      eprint={2501.18511},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG},
+      url={https://arxiv.org/abs/2501.18511},
+}
+```
+## Intended uses & limitations
+This model is intended for research use; it has not received any safety oriented post-training.
+## Artifacts
+The following is a list of artifacts which may be present in this repository, as well as brief descriptions of what they contain.
+### Logs
+Contains logs from the training process, one for each rank.
+### Telemetry
+`devices_info.txt`: A file containing information about the devices used to train the model.
+`telemetry_callback_metrics.json`: File containing metrics from the training process such as loss and number of tokens seen.
+`telemetry_callback_wandb.json`: File containing weights and biases parameters.
+`telemetry_callback.json`: File containing metadata such as time to train and number of epochs trained.
+`training_config.yaml`: File containing the training configuration used to train the model (also found in this README)
+`world_size.json`: File containing the world size used to train the model.
+## Datasets
+Summary statistics about the datasets used to train this model.
+### HuggingFaceH4/ultrachat_200k
+`Split`: train_sft
+`Version`: 0.0.0
+`Dataset size`: 3047427114 bytes
+`Download size`: 1624049723 bytes
+`Size`: 4671476837 bytes
+`Rows`: 207865
+`Columns`: ['prompt', 'prompt_id', 'messages']
+## Results
+### Training Loss
+| Training Loss | Epoch  | Tokens Seen |
+|:-------------:|:------:|:----:|
+| 1.043         | 0.999 | 246 Mn |
+### Evaluation
+Following the paper, our benchmark results are reported using [Evalchemy](https://github.com/mlfoundations/evalchemy/). For more details on the evaluation metrics, please refer to the [paper](https://arxiv.org/abs/2501.18511). We compare to [this baseline model](https://huggingface.co/tanliboy/zephyr-llama-3-8b-sft) used in the paper.
+| Metric | Oumi Repro | Baseline |
+|---------|--------|----------|
+| MTBench | 5.2313 | 5.0187 |
+| Alpaca Eval (LC) | 1.6157 | 4.1260 |
+| BBH | 0.4861 | 0.4845 |
+| GPQA | 0.2903 | 0.3204 |
+| MATH | 0.0552 | 0.0458 |
+| MUSR | 0.4116 | 0.3917 |
+| IFEval (Prompt Level, Strict) | 0.1978 | 0.2643 |
+| MMLU Pro | 0.3118 | 0.3198 |
+| MixEval | 0.5935 | 0.63 |
+| Average | 0.321 | 0.333 |