eastwind
/

meta-chameleon-30b

Model card Files Files and versions Community

eastwind commited on Jun 18

Commit

ca395b9

•

1 Parent(s): dd93fa7

Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

models/30b/checklist.chk +6 -0
models/30b/consolidate_params.json +8 -0
models/30b/consolidated.00.pth +3 -0
models/30b/consolidated.01.pth +3 -0
models/30b/consolidated.02.pth +3 -0
models/30b/consolidated.03.pth +3 -0
models/30b/params.json +169 -0

models/30b/checklist.chk ADDED Viewed

	@@ -0,0 +1,6 @@

+d1c053d174845e350427d385850af0f1  consolidated.00.pth
+09ad3734acf6082d2284f0e297f30e5f  consolidated.01.pth
+a184ca575a4e8cdc7229966509077980  consolidated.02.pth
+e54adfa3e07d78af0c656d69fce8d6d8  consolidated.03.pth
+3dd10b90ed4295f92155863d309cd0c7  consolidate_params.json
+967b52c1f1b2d3f40fb9aafea7834bb9  params.json

models/30b/consolidate_params.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "dtype": "bf16",
+    "model_parallel_size": 4,
+    "on_gpu": true,
+    "src": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_30b_730k_sftv1.4_exp1/v2.1_textpp_30b_730k_sftv1.4_exp1_run000/checkpoints/checkpoint_0001200_noimggen",
+    "tgt": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_30b_730k_sftv1.4_exp1/v2.1_textpp_30b_730k_sftv1.4_exp1_run000/checkpoints/checkpoint_0001200_noimggen_consolidated/",
+    "tokenizer_path": null
+}

models/30b/consolidated.00.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fca533c6c74bc8e933e8241040b5bcb007c7299cb1a8090791869b34c8f32a26
+size 17148158769

models/30b/consolidated.01.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42603cc253231fb9e15feb1754daacfbbd0d04aabda4caf1473ec8576581c8cd
+size 17148158769

models/30b/consolidated.02.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9f4970b72db7ebfbeb29745875996837588ac80315b76fa1050dc05a887fb28
+size 17148158769

models/30b/consolidated.03.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:becf308502539eb8165f419ff1c675858537dd418ddad9b3adcc46e4404c376b
+size 17148158769

models/30b/params.json ADDED Viewed

	@@ -0,0 +1,169 @@

+{
+    "async_checkpointing": false,
+    "async_eval_ngpus": -1,
+    "batch_size": 4,
+    "data": "",
+    "disable_logging": false,
+    "disable_workers_print": false,
+    "dtype": "bf16",
+    "dump_after_steps": 0,
+    "dump_dir": "/fsx-onellm/rpasunuru/SFT/v2.1_textpp_30b_730k_sftv1.4_exp1/v2.1_textpp_30b_730k_sftv1.4_exp1_run000",
+    "dump_freq": 400,
+    "dump_profile_traces": false,
+    "enable_loss_tracker": false,
+    "epochs": -1,
+    "eval_freq": 100000,
+    "exp_id": "",
+    "exp_name": "",
+    "finetuning_dir": "/fsx-onellm/shared/from_rsc//v2.1_30b_qk_zloss_snorm_Nov_26_3_run000_checkpoint_0730000",
+    "fp32_reduce_scatter": "all",
+    "gpu_check_level": 3,
+    "image_loss_weight": 1.0,
+    "image_text_rotation_prob": 0.0,
+    "instruct": {
+        "no_loss_prompt": true,
+        "no_loss_truncated": false,
+        "use_eot": true
+    },
+    "instruct_data": "/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/long_caption:2.92,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/vqa:4.59,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/text2image:10.44,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_helpful:43.27,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/code_llama:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/interleaved_batch1-17:27.45,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/image_dialogue:7.46,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/llama2_rjv6_harmless:0.97,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/cybersec_safety:0.33,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/onellm_multimodal_safety:0.86,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/autosafety:0.51,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/rainbow_safety:0.10,/fsx-onellm/shared/finetuning/sft_v1.4_data/splits/train/genai_safety:0.58",
+    "iter_gopher": {
+        "buffer_size": 16,
+        "max_precompute": 10,
+        "n_chars_by_tok": 15,
+        "n_seqs_to_concat": 10,
+        "num_processes": 1
+    },
+    "iter_jsonl": {
+        "buffer_size": 64,
+        "same_data": false
+    },
+    "iter_multi": {
+        "buffer_size": 512,
+        "ignore_extra_chunks": true,
+        "max_precompute": 20,
+        "multiprocess": true
+    },
+    "iter_type": "multi",
+    "keep_checkpoints_every_steps": 400,
+    "keep_eval_checkpoints": true,
+    "keep_n_last_checkpoints": 2,
+    "log_all_steps": false,
+    "log_freq": 10,
+    "log_updates": true,
+    "log_wandb": false,
+    "loss_rescaling": false,
+    "model": {
+        "add_extra_toks": "0",
+        "alpha_depth": "disabled",
+        "attn_dropout": 0,
+        "attn_to_keep": "all",
+        "custom_bwd": false,
+        "dim": 8192,
+        "dropout": 0.05,
+        "efficient_attn": "flash",
+        "emb_dropout": 0,
+        "ffn_dim_multiplier": 1.0,
+        "ffn_dropout": 0,
+        "full_logging_n_layers": 4,
+        "fuse_sequence_parallel": false,
+        "init": {
+            "coeff_std": null,
+            "depth_last": false,
+            "fixed_std": null,
+            "no_init": false,
+            "pos_init_scalar": null,
+            "use_depth": "current",
+            "use_gaussian": true
+        },
+        "layer_ckpt": "0::2",
+        "linear_residual_dropout": false,
+        "loss_parallel": true,
+        "max_length": 2048,
+        "multiple_of": 256,
+        "n_heads": 64,
+        "n_kv_heads": 8,
+        "n_layers": 48,
+        "non_linearity": "swiglu",
+        "norm_affine": true,
+        "norm_eps": 1e-05,
+        "norm_type": "rmsnorm",
+        "output_dropout": 0,
+        "output_size": -1,
+        "pre_norm": true,
+        "qk_normalization": true,
+        "recompute_attn": true,
+        "recompute_fc1_out": true,
+        "recompute_fc3_out": true,
+        "residual_dropout": 0.0,
+        "rope_theta": 10000.0,
+        "sequence_parallel": false,
+        "swin_norm": true,
+        "turn_eos_token": "<eos>",
+        "use_rope": true,
+        "vocab_size": 65536
+    },
+    "model_parallel_size": 4,
+    "no_final_ckpt": false,
+    "num_retrieved_docs": 0,
+    "old_mp": -1,
+    "old_world_size": -1,
+    "optim": {
+        "beta1": 0.9,
+        "beta2": 0.95,
+        "clip": 1.0,
+        "cosine_theta": 1.0,
+        "cycle_length": 1.0,
+        "epsilon": 1e-08,
+        "exp_factor": 0.5,
+        "lr": 1e-05,
+        "lr_min_ratio": 0.1,
+        "scheduler": "cosine",
+        "use_deprecated_optim": false,
+        "warmup": 100,
+        "weight_decay": 0.1
+    },
+    "periodic_gpu_check": true,
+    "profile_freq": -1,
+    "reshard_after_forward": true,
+    "restore_dataloader_position": false,
+    "retrieval_prob": 0.0,
+    "rlhf": null,
+    "root_dump_dir": "",
+    "save_optimizer_states": true,
+    "seq_len": 4096,
+    "slurm": {
+        "global_rank": 0,
+        "is_slurm_job": true,
+        "world_size": 128
+    },
+    "steps": 1200,
+    "tokenizer": "/fsx-onellm/rpasunuru/models/cm3z/cm3v2_7b_placeholder/gpt2-unified-image-sentinel.json",
+    "tokenizer_dir": "/fsx/guismay/data/large_experiments/fair_llm/datasets/tokenizers",
+    "torch_seed": -1,
+    "unlimited_steps": false,
+    "use_hf_tokenizer": true,
+    "valid": {
+        "batch_size": 1,
+        "debug": false,
+        "majority_voting": 0,
+        "n_batches": 100,
+        "onellm_eval": false,
+        "onellm_eval_media_storage": "",
+        "ppl_files_str": "",
+        "prompt_path": "",
+        "prompt_templates": "{}",
+        "random_fewshots": false,
+        "seq_len": 4096,
+        "tasks_root_dir": "",
+        "tasks_str": "",
+        "temperature": 1.0,
+        "top_k": 0,
+        "top_p": 0.0,
+        "use_sampling": false,
+        "write_eval": false
+    },
+    "wandb_entity": "violet-zct",
+    "wandb_project": "instruct_sft",
+    "water_marking_codes_str": null,
+    "z_loss_weight": 0.0001
+}