Marin-8B-ORPO-stage0 / easydel-training-arguments.json
erfanzar's picture
Upload folder using huggingface_hub
45b1af2 verified
{
"_can_log_metrics": null,
"auto_shard_states": true,
"aux_loss_enabled": false,
"backend": null,
"beta": 0.1,
"clip_grad": 1.0,
"custom_scheduler": null,
"dataloader_num_workers": 0,
"dataloader_pin_memory": false,
"dataset_num_proc": null,
"disable_dropout": true,
"do_eval": true,
"do_last_save": true,
"do_train": true,
"eval_batch_size": 4,
"evaluation_steps": null,
"extra_optimizer_kwargs": {},
"frozen_parameters": null,
"generate_during_eval": false,
"gradient_accumulation_steps": 1,
"ids_to_pop_from_dataset": [],
"init_tx": true,
"is_encoder_decoder": null,
"is_fine_tuning": true,
"jax_distributed_config": null,
"label_pad_token_id": -100,
"learning_rate": 8e-07,
"learning_rate_end": null,
"log_all_workers": false,
"log_grad_norms": true,
"log_steps": 5,
"loss_config": {
"break_on_nan": true,
"classification_problem_type": null,
"divide_weight_sum": false,
"ignore_index": -100,
"label_smoothing": 0.0,
"loss_normalizing_factor": "SpecialLossNormalizingFactor.NO_WEIGHT_NUM_REAL_TARGET_TOKENS",
"num_classification_labels": null,
"num_labels": null,
"problem_type": null,
"reduction": null,
"shift_tokens": true,
"z_loss": 0.0
},
"low_mem_usage": true,
"max_completion_length": 2048,
"max_evaluation_steps": null,
"max_length": 2048,
"max_prompt_length": 1024,
"max_sequence_length": 4096,
"max_training_steps": null,
"metrics_to_show_in_rich_pbar": null,
"model_name": "marin-8b-instruct-orpo",
"model_parameters": null,
"num_train_epochs": 8,
"offload_dataset": false,
"offload_device_index": 0,
"offload_device_type": "cpu",
"optimizer": "adamw",
"padding_value": 128009,
"per_epoch_evaluation_steps": null,
"per_epoch_training_steps": null,
"performance_mode": false,
"process_zero_is_admin": true,
"progress_bar_type": "json",
"pruning_module": null,
"remove_ckpt_after_load": false,
"remove_unused_columns": true,
"report_metrics": true,
"report_steps": 10,
"save_directory": "EasyDeL-Checkpoints",
"save_optimizer_state": false,
"save_steps": 1000,
"save_total_limit": 1,
"scheduler": "cosine",
"shuffle_train_dataset": true,
"sparse_module_type": "bcoo",
"sparsify_module": false,
"state_apply_fn_kwarguments_to_model": null,
"step_partition_spec": [
[
"dp",
"fsdp"
],
"sp"
],
"step_start_point": 0,
"total_batch_size": 4,
"track_memory": false,
"train_on_inputs": true,
"trainer_config_class": "ORPOConfig",
"training_time_limit": null,
"truncation_mode": "keep_end",
"tx_mu_dtype": null,
"use_data_collactor": true,
"use_wandb": true,
"verbose": true,
"wandb_entity": "erfanzar",
"wandb_name": null,
"warmup_steps": 0,
"weight_decay": 0.01,
"weight_distribution_log_steps": 100,
"weight_distribution_pattern": ".*"
}