eastwind commited on
Commit
ca395b9
1 Parent(s): dd93fa7

Upload folder using huggingface_hub

Browse files
models/30b/checklist.chk ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ d1c053d174845e350427d385850af0f1 consolidated.00.pth
2
+ 09ad3734acf6082d2284f0e297f30e5f consolidated.01.pth
3
+ a184ca575a4e8cdc7229966509077980 consolidated.02.pth
4
+ e54adfa3e07d78af0c656d69fce8d6d8 consolidated.03.pth
5
+ 3dd10b90ed4295f92155863d309cd0c7 consolidate_params.json
6
+ 967b52c1f1b2d3f40fb9aafea7834bb9 params.json
models/30b/consolidate_params.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dtype": "bf16",
3
+ "model_parallel_size": 4,
4
+ "on_gpu": true,
5
+ "src": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_30b_730k_sftv1.4_exp1/v2.1_textpp_30b_730k_sftv1.4_exp1_run000/checkpoints/checkpoint_0001200_noimggen",
6
+ "tgt": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_30b_730k_sftv1.4_exp1/v2.1_textpp_30b_730k_sftv1.4_exp1_run000/checkpoints/checkpoint_0001200_noimggen_consolidated/",
7
+ "tokenizer_path": null
8
+ }
models/30b/consolidated.00.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fca533c6c74bc8e933e8241040b5bcb007c7299cb1a8090791869b34c8f32a26
3
+ size 17148158769
models/30b/consolidated.01.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42603cc253231fb9e15feb1754daacfbbd0d04aabda4caf1473ec8576581c8cd
3
+ size 17148158769
models/30b/consolidated.02.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f4970b72db7ebfbeb29745875996837588ac80315b76fa1050dc05a887fb28
3
+ size 17148158769
models/30b/consolidated.03.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:becf308502539eb8165f419ff1c675858537dd418ddad9b3adcc46e4404c376b
3
+ size 17148158769
models/30b/params.json ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "async_checkpointing": false,
3
+ "async_eval_ngpus": -1,
4
+ "batch_size": 4,
5
+ "data": "",
6
+ "disable_logging": false,
7
+ "disable_workers_print": false,
8
+ "dtype": "bf16",
9
+ "dump_after_steps": 0,
10
+ "dump_dir": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_30b_730k_sftv1.4_exp1/v2.1_textpp_30b_730k_sftv1.4_exp1_run000",
11
+ "dump_freq": 400,
12
+ "dump_profile_traces": false,
13
+ "enable_loss_tracker": false,
14
+ "epochs": -1,
15
+ "eval_freq": 100000,
16
+ "exp_id": "",
17
+ "exp_name": "",
18
+ "finetuning_dir": "/fsx-onellm/shared/from_rsc//v2.1_30b_qk_zloss_snorm_Nov_26_3_run000_checkpoint_0730000",
19
+ "fp32_reduce_scatter": "all",
20
+ "gpu_check_level": 3,
21
+ "image_loss_weight": 1.0,
22
+ "image_text_rotation_prob": 0.0,
23
+ "instruct": {
24
+ "no_loss_prompt": true,
25
+ "no_loss_truncated": false,
26
+ "use_eot": true
27
+ },
28
+ "instruct_data": "/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/long_caption:2.92,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/vqa:4.59,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/text2image:10.44,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_helpful:43.27,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/code_llama:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/interleaved_batch1-17:27.45,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/image_dialogue:7.46,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_harmless:0.97,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/cybersec_safety:0.33,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/onellm_multimodal_safety:0.86,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/autosafety:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/rainbow_safety:0.10,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/genai_safety:0.58",
29
+ "iter_gopher": {
30
+ "buffer_size": 16,
31
+ "max_precompute": 10,
32
+ "n_chars_by_tok": 15,
33
+ "n_seqs_to_concat": 10,
34
+ "num_processes": 1
35
+ },
36
+ "iter_jsonl": {
37
+ "buffer_size": 64,
38
+ "same_data": false
39
+ },
40
+ "iter_multi": {
41
+ "buffer_size": 512,
42
+ "ignore_extra_chunks": true,
43
+ "max_precompute": 20,
44
+ "multiprocess": true
45
+ },
46
+ "iter_type": "multi",
47
+ "keep_checkpoints_every_steps": 400,
48
+ "keep_eval_checkpoints": true,
49
+ "keep_n_last_checkpoints": 2,
50
+ "log_all_steps": false,
51
+ "log_freq": 10,
52
+ "log_updates": true,
53
+ "log_wandb": false,
54
+ "loss_rescaling": false,
55
+ "model": {
56
+ "add_extra_toks": "0",
57
+ "alpha_depth": "disabled",
58
+ "attn_dropout": 0,
59
+ "attn_to_keep": "all",
60
+ "custom_bwd": false,
61
+ "dim": 8192,
62
+ "dropout": 0.05,
63
+ "efficient_attn": "flash",
64
+ "emb_dropout": 0,
65
+ "ffn_dim_multiplier": 1.0,
66
+ "ffn_dropout": 0,
67
+ "full_logging_n_layers": 4,
68
+ "fuse_sequence_parallel": false,
69
+ "init": {
70
+ "coeff_std": null,
71
+ "depth_last": false,
72
+ "fixed_std": null,
73
+ "no_init": false,
74
+ "pos_init_scalar": null,
75
+ "use_depth": "current",
76
+ "use_gaussian": true
77
+ },
78
+ "layer_ckpt": "0::2",
79
+ "linear_residual_dropout": false,
80
+ "loss_parallel": true,
81
+ "max_length": 2048,
82
+ "multiple_of": 256,
83
+ "n_heads": 64,
84
+ "n_kv_heads": 8,
85
+ "n_layers": 48,
86
+ "non_linearity": "swiglu",
87
+ "norm_affine": true,
88
+ "norm_eps": 1e-05,
89
+ "norm_type": "rmsnorm",
90
+ "output_dropout": 0,
91
+ "output_size": -1,
92
+ "pre_norm": true,
93
+ "qk_normalization": true,
94
+ "recompute_attn": true,
95
+ "recompute_fc1_out": true,
96
+ "recompute_fc3_out": true,
97
+ "residual_dropout": 0.0,
98
+ "rope_theta": 10000.0,
99
+ "sequence_parallel": false,
100
+ "swin_norm": true,
101
+ "turn_eos_token": "<eos>",
102
+ "use_rope": true,
103
+ "vocab_size": 65536
104
+ },
105
+ "model_parallel_size": 4,
106
+ "no_final_ckpt": false,
107
+ "num_retrieved_docs": 0,
108
+ "old_mp": -1,
109
+ "old_world_size": -1,
110
+ "optim": {
111
+ "beta1": 0.9,
112
+ "beta2": 0.95,
113
+ "clip": 1.0,
114
+ "cosine_theta": 1.0,
115
+ "cycle_length": 1.0,
116
+ "epsilon": 1e-08,
117
+ "exp_factor": 0.5,
118
+ "lr": 1e-05,
119
+ "lr_min_ratio": 0.1,
120
+ "scheduler": "cosine",
121
+ "use_deprecated_optim": false,
122
+ "warmup": 100,
123
+ "weight_decay": 0.1
124
+ },
125
+ "periodic_gpu_check": true,
126
+ "profile_freq": -1,
127
+ "reshard_after_forward": true,
128
+ "restore_dataloader_position": false,
129
+ "retrieval_prob": 0.0,
130
+ "rlhf": null,
131
+ "root_dump_dir": "",
132
+ "save_optimizer_states": true,
133
+ "seq_len": 4096,
134
+ "slurm": {
135
+ "global_rank": 0,
136
+ "is_slurm_job": true,
137
+ "world_size": 128
138
+ },
139
+ "steps": 1200,
140
+ "tokenizer": "/fsx-onellm/rpasunuru/models/cm3z/cm3v2_7b_placeholder/gpt2-unified-image-sentinel.json",
141
+ "tokenizer_dir": "/fsx/guismay/data/large_experiments/fair_llm/datasets/tokenizers",
142
+ "torch_seed": -1,
143
+ "unlimited_steps": false,
144
+ "use_hf_tokenizer": true,
145
+ "valid": {
146
+ "batch_size": 1,
147
+ "debug": false,
148
+ "majority_voting": 0,
149
+ "n_batches": 100,
150
+ "onellm_eval": false,
151
+ "onellm_eval_media_storage": "",
152
+ "ppl_files_str": "",
153
+ "prompt_path": "",
154
+ "prompt_templates": "{}",
155
+ "random_fewshots": false,
156
+ "seq_len": 4096,
157
+ "tasks_root_dir": "",
158
+ "tasks_str": "",
159
+ "temperature": 1.0,
160
+ "top_k": 0,
161
+ "top_p": 0.0,
162
+ "use_sampling": false,
163
+ "write_eval": false
164
+ },
165
+ "wandb_entity": "violet-zct",
166
+ "wandb_project": "instruct_sft",
167
+ "water_marking_codes_str": null,
168
+ "z_loss_weight": 0.0001
169
+ }