|
+ deepspeed |
|
[rank4]:[W529 17:43:49.503613912 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 4] using GPU 4 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank1]:[W529 17:43:49.160630557 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank3]:[W529 17:43:49.218569467 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank7]:[W529 17:43:49.225569501 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 7] using GPU 7 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank2]:[W529 17:43:49.225624336 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank6]:[W529 17:43:50.341128919 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 6] using GPU 6 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank0]:[W529 17:43:50.368734522 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank5]:[W529 17:43:50.453666601 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 5] using GPU 5 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/config.json |
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/config.json |
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/config.json |
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/config.json |
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/config.json |
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/config.json |
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/config.json |
|
loading configuration file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/config.json |
|
Model config LlamaConfig { |
|
"_attn_implementation_autoset": true, |
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"head_dim": 64, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 22, |
|
"num_key_value_heads": 4, |
|
"pad_token_id": 32000, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.49.0", |
|
"use_cache": true, |
|
"vocab_size": 32001 |
|
} |
|
|
|
Model config LlamaConfig { |
|
"_attn_implementation_autoset": true, |
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"head_dim": 64, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 22, |
|
"num_key_value_heads": 4, |
|
"pad_token_id": 32000, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.49.0", |
|
"use_cache": true, |
|
"vocab_size": 32001 |
|
} |
|
|
|
Model config LlamaConfig { |
|
"_attn_implementation_autoset": true, |
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"head_dim": 64, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 22, |
|
"num_key_value_heads": 4, |
|
"pad_token_id": 32000, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.49.0", |
|
"use_cache": true, |
|
"vocab_size": 32001 |
|
} |
|
|
|
Model config LlamaConfig { |
|
"_attn_implementation_autoset": true, |
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"head_dim": 64, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 22, |
|
"num_key_value_heads": 4, |
|
"pad_token_id": 32000, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.49.0", |
|
"use_cache": true, |
|
"vocab_size": 32001 |
|
} |
|
|
|
Model config LlamaConfig { |
|
"_attn_implementation_autoset": true, |
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"head_dim": 64, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 22, |
|
"num_key_value_heads": 4, |
|
"pad_token_id": 32000, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.49.0", |
|
"use_cache": true, |
|
"vocab_size": 32001 |
|
} |
|
|
|
Model config LlamaConfig { |
|
"_attn_implementation_autoset": true, |
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"head_dim": 64, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 22, |
|
"num_key_value_heads": 4, |
|
"pad_token_id": 32000, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.49.0", |
|
"use_cache": true, |
|
"vocab_size": 32001 |
|
} |
|
|
|
Model config LlamaConfig { |
|
"_attn_implementation_autoset": true, |
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"head_dim": 64, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 22, |
|
"num_key_value_heads": 4, |
|
"pad_token_id": 32000, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.49.0", |
|
"use_cache": true, |
|
"vocab_size": 32001 |
|
} |
|
|
|
Model config LlamaConfig { |
|
"_attn_implementation_autoset": true, |
|
"_name_or_path": "/aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"head_dim": 64, |
|
"hidden_act": "silu", |
|
"hidden_size": 2048, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 5632, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 22, |
|
"num_key_value_heads": 4, |
|
"pad_token_id": 32000, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.49.0", |
|
"use_cache": true, |
|
"vocab_size": 32001 |
|
} |
|
|
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/pytorch_model.bin |
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/pytorch_model.bin |
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/pytorch_model.bin |
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/pytorch_model.bin |
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/pytorch_model.bin |
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/pytorch_model.bin |
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/pytorch_model.bin |
|
loading weights file /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k/pytorch_model.bin |
|
Will use torch_dtype=torch.float32 as defined in model's config object |
|
Instantiating LlamaForCausalLM model under default dtype torch.float32. |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Generate config GenerationConfig { |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 32000 |
|
} |
|
|
|
Will use torch_dtype=torch.float32 as defined in model's config object |
|
Will use torch_dtype=torch.float32 as defined in model's config object |
|
Instantiating LlamaForCausalLM model under default dtype torch.float32. |
|
Instantiating LlamaForCausalLM model under default dtype torch.float32. |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Will use torch_dtype=torch.float32 as defined in model's config object |
|
Instantiating LlamaForCausalLM model under default dtype torch.float32. |
|
Will use torch_dtype=torch.float32 as defined in model's config object |
|
Instantiating LlamaForCausalLM model under default dtype torch.float32. |
|
Will use torch_dtype=torch.float32 as defined in model's config object |
|
Instantiating LlamaForCausalLM model under default dtype torch.float32. |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Will use torch_dtype=torch.float32 as defined in model's config object |
|
Instantiating LlamaForCausalLM model under default dtype torch.float32. |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Will use torch_dtype=torch.float32 as defined in model's config object |
|
Instantiating LlamaForCausalLM model under default dtype torch.float32. |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Generate config GenerationConfig { |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 32000 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 32000 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 32000 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 32000 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 32000 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 32000 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"pad_token_id": 32000 |
|
} |
|
|
|
All model checkpoint weights were used when initializing LlamaForCausalLM. |
|
|
|
All model checkpoint weights were used when initializing LlamaForCausalLM. |
|
|
|
All the weights of LlamaForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. |
|
All model checkpoint weights were used when initializing LlamaForCausalLM. |
|
|
|
All the weights of LlamaForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. |
|
All model checkpoint weights were used when initializing LlamaForCausalLM. |
|
|
|
All the weights of LlamaForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. |
|
All model checkpoint weights were used when initializing LlamaForCausalLM. |
|
|
|
All the weights of LlamaForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. |
|
All the weights of LlamaForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. |
|
All model checkpoint weights were used when initializing LlamaForCausalLM. |
|
|
|
All the weights of LlamaForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. |
|
All model checkpoint weights were used when initializing LlamaForCausalLM. |
|
|
|
All the weights of LlamaForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. |
|
Generation config file not found, using a generation config created from the model config. |
|
Generation config file not found, using a generation config created from the model config. |
|
Generation config file not found, using a generation config created from the model config. |
|
Generation config file not found, using a generation config created from the model config. |
|
Generation config file not found, using a generation config created from the model config. |
|
Generation config file not found, using a generation config created from the model config. |
|
Generation config file not found, using a generation config created from the model config. |
|
loading file tokenizer.model |
|
loading file tokenizer.model |
|
loading file tokenizer.model |
|
loading file tokenizer.model |
|
loading file tokenizer.model |
|
loading file tokenizer.json |
|
loading file tokenizer.json |
|
loading file tokenizer.json |
|
loading file tokenizer.json |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file added_tokens.json |
|
loading file added_tokens.json |
|
loading file added_tokens.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file special_tokens_map.json |
|
loading file special_tokens_map.json |
|
loading file special_tokens_map.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file tokenizer_config.json |
|
loading file tokenizer_config.json |
|
loading file tokenizer_config.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
loading file chat_template.jinja |
|
loading file chat_template.jinja |
|
loading file chat_template.jinja |
|
loading file chat_template.jinja |
|
loading file tokenizer.model |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
loading file tokenizer.model |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
All model checkpoint weights were used when initializing LlamaForCausalLM. |
|
|
|
All the weights of LlamaForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training. |
|
Generation config file not found, using a generation config created from the model config. |
|
loading file tokenizer.model |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Detected CUDA files, patching ldflags |
|
Emitting ninja build file /home/hansirui_1st/.cache/torch_extensions/py311_cu124/fused_adam/build.ninja... |
|
/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/torch/utils/cpp_extension.py:2059: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. |
|
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST']. |
|
warnings.warn( |
|
Building extension module fused_adam... |
|
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
wandb: Currently logged in as: xtom to https://api.wandb.ai. Use `wandb login |
|
wandb: Tracking run with wandb version 0.19.8 |
|
wandb: Run data is saved locally in /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k-Q2-100/wandb/run-20250529_174424-c5g913h1 |
|
wandb: Run `wandb offline` to turn off syncing. |
|
wandb: Syncing run tinyllama-2T-s3-Q1-40k-Q2-100 |
|
wandb: βοΈ View project at https://wandb.ai/xtom/Inverse_Alignment |
|
wandb: π View run at https://wandb.ai/xtom/Inverse_Alignment/runs/c5g913h1 |
|
Training 1/1 epoch: 0%| | 0/4 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
Training 1/1 epoch (loss 1.8270): 0%| | 0/4 [00:05<?, ?it/s]
Training 1/1 epoch (loss 1.8270): 25%|βββ | 1/4 [00:05<00:16, 5.42s/it]
Training 1/1 epoch (loss 1.6756): 25%|βββ | 1/4 [00:07<00:16, 5.42s/it]
Training 1/1 epoch (loss 1.6756): 50%|βββββ | 2/4 [00:07<00:06, 3.19s/it]
Training 1/1 epoch (loss 1.8427): 50%|βββββ | 2/4 [00:07<00:06, 3.19s/it]
Training 1/1 epoch (loss 1.8427): 75%|ββββββββ | 3/4 [00:07<00:01, 1.92s/it]
Training 1/1 epoch (loss 1.7414): 75%|ββββββββ | 3/4 [00:07<00:01, 1.92s/it]
Training 1/1 epoch (loss 1.7414): 100%|ββββββββββ| 4/4 [00:07<00:00, 1.30s/it]
Training 1/1 epoch (loss 1.7414): 100%|ββββββββββ| 4/4 [00:07<00:00, 1.95s/it] |
|
tokenizer config file saved in /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k-Q2-100/tokenizer_config.json |
|
Special tokens file saved in /aifs4su/hansirui_1st/boyuan/resist/setting3-safety/tinyllama-2T/tinyllama-2T-s3-Q1-40k-Q2-100/special_tokens_map.json |
|
wandb: ERROR Problem finishing run |
|
Exception ignored in atexit callback: <bound method rank_zero_only.<locals>.wrapper of <safe_rlhf.logger.Logger object at 0x15511431ee10>> |
|
Traceback (most recent call last): |
|
File "/home/hansirui_1st/jiayi/resist/setting3/safe_rlhf/utils.py", line 212, in wrapper |
|
return func(*args, **kwargs) |
|
^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/hansirui_1st/jiayi/resist/setting3/safe_rlhf/logger.py", line 183, in close |
|
self.wandb.finish() |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 449, in wrapper |
|
return func(self, *args, **kwargs) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 391, in wrapper |
|
return func(self, *args, **kwargs) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2106, in finish |
|
return self._finish(exit_code) |
|
^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2127, in _finish |
|
self._atexit_cleanup(exit_code=exit_code) |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2352, in _atexit_cleanup |
|
self._on_finish() |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2609, in _on_finish |
|
wait_with_progress( |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 24, in wait_with_progress |
|
return wait_all_with_progress( |
|
^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 87, in wait_all_with_progress |
|
return asyncio_compat.run(progress_loop_with_timeout) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/site-packages/wandb/sdk/lib/asyncio_compat.py", line 27, in run |
|
future = executor.submit(runner.run, fn) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/by-align/lib/python3.11/concurrent/futures/thread.py", line 169, in submit |
|
raise RuntimeError('cannot schedule new futures after ' |
|
RuntimeError: cannot schedule new futures after interpreter shutdown |
|
|