|
+ deepspeed --master_port 23109 --module safe_rlhf.finetune --train_datasets inverse-json::/home/hansirui_1st/jiayi/resist/imdb_data/train/neg/500/train.json --model_name_or_path /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000 --max_length 512 --trust_remote_code True --epochs 1 --per_device_train_batch_size 1 --per_device_eval_batch_size 4 --gradient_accumulation_steps 8 --gradient_checkpointing --learning_rate 1e-5 --lr_warmup_ratio 0 --weight_decay 0.0 --lr_scheduler_type constant --weight_decay 0.0 --seed 42 --output_dir /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000-Q2-500 --log_type wandb --log_run_name imdb-Qwen1.5-4B-s3-Q1-5000-Q2-500 --log_project Inverse_Alignment_IMDb --zero_stage 3 --offload none --bf16 True --tf32 True --save_16bit |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
nvcc warning : incompatible redefinition for option |
|
[rank3]:[W526 18:21:26.421249542 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank1]:[W526 18:21:26.435601397 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank7]:[W526 18:21:26.443165472 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 7] using GPU 7 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank6]:[W526 18:21:26.496009021 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 6] using GPU 6 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank4]:[W526 18:21:26.496578846 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 4] using GPU 4 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank5]:[W526 18:21:26.529808545 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 5] using GPU 5 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank2]:[W526 18:21:26.536217318 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
[rank0]:[W526 18:21:26.561118563 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id. |
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/config.json |
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/config.json |
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/config.json |
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/config.json |
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/config.json |
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/config.json |
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/config.json |
|
loading configuration file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/config.json |
|
Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 2560, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 6912, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 21, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 20, |
|
"num_hidden_layers": 40, |
|
"num_key_value_heads": 20, |
|
"pad_token_id": 151643, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 5000000.0, |
|
"sliding_window": 32768, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.52.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 151646 |
|
} |
|
|
|
Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 2560, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 6912, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 21, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 20, |
|
"num_hidden_layers": 40, |
|
"num_key_value_heads": 20, |
|
"pad_token_id": 151643, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 5000000.0, |
|
"sliding_window": 32768, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.52.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 151646 |
|
} |
|
|
|
Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 2560, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 6912, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 21, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 20, |
|
"num_hidden_layers": 40, |
|
"num_key_value_heads": 20, |
|
"pad_token_id": 151643, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 5000000.0, |
|
"sliding_window": 32768, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.52.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 151646 |
|
} |
|
|
|
Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 2560, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 6912, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 21, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 20, |
|
"num_hidden_layers": 40, |
|
"num_key_value_heads": 20, |
|
"pad_token_id": 151643, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 5000000.0, |
|
"sliding_window": 32768, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.52.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 151646 |
|
} |
|
|
|
Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 2560, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 6912, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 21, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 20, |
|
"num_hidden_layers": 40, |
|
"num_key_value_heads": 20, |
|
"pad_token_id": 151643, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 5000000.0, |
|
"sliding_window": 32768, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.52.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 151646 |
|
} |
|
|
|
Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 2560, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 6912, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 21, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 20, |
|
"num_hidden_layers": 40, |
|
"num_key_value_heads": 20, |
|
"pad_token_id": 151643, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 5000000.0, |
|
"sliding_window": 32768, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.52.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 151646 |
|
} |
|
|
|
Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 2560, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 6912, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 21, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 20, |
|
"num_hidden_layers": 40, |
|
"num_key_value_heads": 20, |
|
"pad_token_id": 151643, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 5000000.0, |
|
"sliding_window": 32768, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.52.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 151646 |
|
} |
|
|
|
Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"hidden_act": "silu", |
|
"hidden_size": 2560, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 6912, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 21, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 20, |
|
"num_hidden_layers": 40, |
|
"num_key_value_heads": 20, |
|
"pad_token_id": 151643, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 5000000.0, |
|
"sliding_window": 32768, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.52.1", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 151646 |
|
} |
|
|
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/pytorch_model.bin |
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/pytorch_model.bin |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/pytorch_model.bin |
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/pytorch_model.bin |
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/pytorch_model.bin |
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/pytorch_model.bin |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/pytorch_model.bin |
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Generate config GenerationConfig { |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"pad_token_id": 151643 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"pad_token_id": 151643 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"pad_token_id": 151643 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"pad_token_id": 151643 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"pad_token_id": 151643 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"pad_token_id": 151643 |
|
} |
|
|
|
Generate config GenerationConfig { |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"pad_token_id": 151643 |
|
} |
|
|
|
loading weights file /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000/pytorch_model.bin |
|
Will use torch_dtype=torch.bfloat16 as defined in model |
|
Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16. |
|
Detected DeepSpeed ZeRO-3: activating zero.init() for this model |
|
Generate config GenerationConfig { |
|
"bos_token_id": 128245, |
|
"eos_token_id": 151643, |
|
"pad_token_id": 151643 |
|
} |
|
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
Generation config file not found, using a generation config created from the model config. |
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
Generation config file not found, using a generation config created from the model config. |
|
Generation config file not found, using a generation config created from the model config. |
|
Generation config file not found, using a generation config created from the model config. |
|
Generation config file not found, using a generation config created from the model config. |
|
loading file vocab.json |
|
loading file merges.txt |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
loading file vocab.json |
|
loading file merges.txt |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
loading file vocab.json |
|
loading file merges.txt |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
loading file vocab.json |
|
loading file merges.txt |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
loading file vocab.json |
|
loading file merges.txt |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
Generation config file not found, using a generation config created from the model config. |
|
Generation config file not found, using a generation config created from the model config. |
|
loading file vocab.json |
|
loading file merges.txt |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
loading file vocab.json |
|
loading file merges.txt |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
Generation config file not found, using a generation config created from the model config. |
|
loading file vocab.json |
|
loading file merges.txt |
|
loading file tokenizer.json |
|
loading file added_tokens.json |
|
loading file special_tokens_map.json |
|
loading file tokenizer_config.json |
|
loading file chat_template.jinja |
|
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Using /home/hansirui_1st/.cache/torch_extensions/py311_cu124 as PyTorch extensions root... |
|
Detected CUDA files, patching ldflags |
|
Emitting ninja build file /home/hansirui_1st/.cache/torch_extensions/py311_cu124/fused_adam/build.ninja... |
|
/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/torch/utils/cpp_extension.py:2059: UserWarning: TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation. |
|
If this is not desired, please set os.environ[ |
|
warnings.warn( |
|
Building extension module fused_adam... |
|
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam...Loading extension module fused_adam... |
|
|
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
Loading extension module fused_adam... |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
wandb: Currently logged in as: xtom to https://api.wandb.ai. Use `wandb login --relogin` to force relogin |
|
wandb: Tracking run with wandb version 0.19.11 |
|
wandb: Run data is saved locally in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000-Q2-500/wandb/run-20250526_182154-aq26ojzb |
|
wandb: Run `wandb offline` to turn off syncing. |
|
wandb: Syncing run imdb-Qwen1.5-4B-s3-Q1-5000-Q2-500 |
|
wandb: βοΈ View project at https://wandb.ai/xtom/Inverse_Alignment_IMDb |
|
wandb: π View run at https://wandb.ai/xtom/Inverse_Alignment_IMDb/runs/aq26ojzb |
|
Training 1/1 epoch: 0%| | 0/63 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`. |
|
Training 1/1 epoch (loss 2.9497): 0%| | 0/63 [00:11<?, ?it/s]
Training 1/1 epoch (loss 2.9497): 2%|β | 1/63 [00:11<11:48, 11.43s/it]
Training 1/1 epoch (loss 3.0394): 2%|β | 1/63 [00:14<11:48, 11.43s/it]
Training 1/1 epoch (loss 3.0394): 3%|β | 2/63 [00:14<06:32, 6.43s/it]
Training 1/1 epoch (loss 3.0097): 3%|β | 2/63 [00:16<06:32, 6.43s/it]
Training 1/1 epoch (loss 3.0097): 5%|β | 3/63 [00:16<04:17, 4.30s/it]
Training 1/1 epoch (loss 2.9412): 5%|β | 3/63 [00:19<04:17, 4.30s/it]
Training 1/1 epoch (loss 2.9412): 6%|β | 4/63 [00:19<03:43, 3.78s/it]
Training 1/1 epoch (loss 3.1951): 6%|β | 4/63 [00:22<03:43, 3.78s/it]
Training 1/1 epoch (loss 3.1951): 8%|β | 5/63 [00:22<03:22, 3.49s/it]
Training 1/1 epoch (loss 2.8144): 8%|β | 5/63 [00:22<03:22, 3.49s/it]
Training 1/1 epoch (loss 2.8144): 10%|β | 6/63 [00:22<02:28, 2.60s/it]
Training 1/1 epoch (loss 2.9388): 10%|β | 6/63 [00:24<02:28, 2.60s/it]
Training 1/1 epoch (loss 2.9388): 11%|β | 7/63 [00:24<01:59, 2.13s/it]
Training 1/1 epoch (loss 2.7836): 11%|β | 7/63 [00:26<01:59, 2.13s/it]
Training 1/1 epoch (loss 2.7836): 13%|ββ | 8/63 [00:26<02:05, 2.29s/it]
Training 1/1 epoch (loss 3.2395): 13%|ββ | 8/63 [00:28<02:05, 2.29s/it]
Training 1/1 epoch (loss 3.2395): 14%|ββ | 9/63 [00:28<02:02, 2.28s/it]
Training 1/1 epoch (loss 3.1760): 14%|ββ | 9/63 [00:31<02:02, 2.28s/it]
Training 1/1 epoch (loss 3.1760): 16%|ββ | 10/63 [00:31<01:59, 2.25s/it]
Training 1/1 epoch (loss 2.8917): 16%|ββ | 10/63 [00:33<01:59, 2.25s/it]
Training 1/1 epoch (loss 2.8917): 17%|ββ | 11/63 [00:33<02:03, 2.38s/it]
Training 1/1 epoch (loss 2.7529): 17%|ββ | 11/63 [00:35<02:03, 2.38s/it]
Training 1/1 epoch (loss 2.7529): 19%|ββ | 12/63 [00:35<01:56, 2.28s/it]
Training 1/1 epoch (loss 3.0167): 19%|ββ | 12/63 [00:38<01:56, 2.28s/it]
Training 1/1 epoch (loss 3.0167): 21%|ββ | 13/63 [00:38<01:51, 2.23s/it]
Training 1/1 epoch (loss 2.7213): 21%|ββ | 13/63 [00:42<01:51, 2.23s/it]
Training 1/1 epoch (loss 2.7213): 22%|βββ | 14/63 [00:42<02:16, 2.79s/it]
Training 1/1 epoch (loss 2.8312): 22%|βββ | 14/63 [00:45<02:16, 2.79s/it]
Training 1/1 epoch (loss 2.8312): 24%|βββ | 15/63 [00:45<02:22, 2.98s/it]
Training 1/1 epoch (loss 2.9215): 24%|βββ | 15/63 [00:48<02:22, 2.98s/it]
Training 1/1 epoch (loss 2.9215): 25%|βββ | 16/63 [00:48<02:15, 2.88s/it]
Training 1/1 epoch (loss 2.7788): 25%|βββ | 16/63 [00:50<02:15, 2.88s/it]
Training 1/1 epoch (loss 2.7788): 27%|βββ | 17/63 [00:50<02:10, 2.84s/it]
Training 1/1 epoch (loss 2.4878): 27%|βββ | 17/63 [00:52<02:10, 2.84s/it]
Training 1/1 epoch (loss 2.4878): 29%|βββ | 18/63 [00:52<01:48, 2.42s/it]
Training 1/1 epoch (loss 2.8014): 29%|βββ | 18/63 [00:53<01:48, 2.42s/it]
Training 1/1 epoch (loss 2.8014): 30%|βββ | 19/63 [00:53<01:26, 1.96s/it]
Training 1/1 epoch (loss 2.8398): 30%|βββ | 19/63 [00:55<01:26, 1.96s/it]
Training 1/1 epoch (loss 2.8398): 32%|ββββ | 20/63 [00:55<01:29, 2.08s/it]
Training 1/1 epoch (loss 2.8536): 32%|ββββ | 20/63 [00:57<01:29, 2.08s/it]
Training 1/1 epoch (loss 2.8536): 33%|ββββ | 21/63 [00:57<01:23, 2.00s/it]
Training 1/1 epoch (loss 2.8077): 33%|ββββ | 21/63 [00:58<01:23, 2.00s/it]
Training 1/1 epoch (loss 2.8077): 35%|ββββ | 22/63 [00:58<01:09, 1.71s/it]
Training 1/1 epoch (loss 3.0258): 35%|ββββ | 22/63 [01:01<01:09, 1.71s/it]
Training 1/1 epoch (loss 3.0258): 37%|ββββ | 23/63 [01:01<01:18, 1.96s/it]
Training 1/1 epoch (loss 2.7918): 37%|ββββ | 23/63 [01:03<01:18, 1.96s/it]
Training 1/1 epoch (loss 2.7918): 38%|ββββ | 24/63 [01:03<01:24, 2.16s/it]
Training 1/1 epoch (loss 2.8349): 38%|ββββ | 24/63 [01:07<01:24, 2.16s/it]
Training 1/1 epoch (loss 2.8349): 40%|ββββ | 25/63 [01:07<01:42, 2.69s/it]
Training 1/1 epoch (loss 2.7998): 40%|ββββ | 25/63 [01:10<01:42, 2.69s/it]
Training 1/1 epoch (loss 2.7998): 41%|βββββ | 26/63 [01:10<01:39, 2.68s/it]
Training 1/1 epoch (loss 2.9634): 41%|βββββ | 26/63 [01:13<01:39, 2.68s/it]
Training 1/1 epoch (loss 2.9634): 43%|βββββ | 27/63 [01:13<01:39, 2.77s/it]
Training 1/1 epoch (loss 2.7264): 43%|βββββ | 27/63 [01:16<01:39, 2.77s/it]
Training 1/1 epoch (loss 2.7264): 44%|βββββ | 28/63 [01:16<01:43, 2.94s/it]
Training 1/1 epoch (loss 2.9369): 44%|βββββ | 28/63 [01:19<01:43, 2.94s/it]
Training 1/1 epoch (loss 2.9369): 46%|βββββ | 29/63 [01:19<01:40, 2.97s/it]
Training 1/1 epoch (loss 2.8113): 46%|βββββ | 29/63 [01:22<01:40, 2.97s/it]
Training 1/1 epoch (loss 2.8113): 48%|βββββ | 30/63 [01:22<01:37, 2.96s/it]
Training 1/1 epoch (loss 2.5904): 48%|βββββ | 30/63 [01:25<01:37, 2.96s/it]
Training 1/1 epoch (loss 2.5904): 49%|βββββ | 31/63 [01:25<01:35, 2.99s/it]
Training 1/1 epoch (loss 2.8644): 49%|βββββ | 31/63 [01:28<01:35, 2.99s/it]
Training 1/1 epoch (loss 2.8644): 51%|βββββ | 32/63 [01:28<01:31, 2.94s/it]
Training 1/1 epoch (loss 2.7752): 51%|βββββ | 32/63 [01:32<01:31, 2.94s/it]
Training 1/1 epoch (loss 2.7752): 52%|ββββββ | 33/63 [01:32<01:35, 3.17s/it]
Training 1/1 epoch (loss 2.7087): 52%|ββββββ | 33/63 [01:34<01:35, 3.17s/it]
Training 1/1 epoch (loss 2.7087): 54%|ββββββ | 34/63 [01:34<01:23, 2.87s/it]
Training 1/1 epoch (loss 2.8284): 54%|ββββββ | 34/63 [01:38<01:23, 2.87s/it]
Training 1/1 epoch (loss 2.8284): 56%|ββββββ | 35/63 [01:38<01:32, 3.32s/it]
Training 1/1 epoch (loss 2.5901): 56%|ββββββ | 35/63 [01:39<01:32, 3.32s/it]
Training 1/1 epoch (loss 2.5901): 57%|ββββββ | 36/63 [01:39<01:13, 2.71s/it]
Training 1/1 epoch (loss 2.9118): 57%|ββββββ | 36/63 [01:41<01:13, 2.71s/it]
Training 1/1 epoch (loss 2.9118): 59%|ββββββ | 37/63 [01:41<01:03, 2.46s/it]
Training 1/1 epoch (loss 3.0351): 59%|ββββββ | 37/63 [01:45<01:03, 2.46s/it]
Training 1/1 epoch (loss 3.0351): 60%|ββββββ | 38/63 [01:45<01:10, 2.81s/it]
Training 1/1 epoch (loss 2.5021): 60%|ββββββ | 38/63 [01:48<01:10, 2.81s/it]
Training 1/1 epoch (loss 2.5021): 62%|βββββββ | 39/63 [01:48<01:09, 2.91s/it]
Training 1/1 epoch (loss 2.7220): 62%|βββββββ | 39/63 [01:51<01:09, 2.91s/it]
Training 1/1 epoch (loss 2.7220): 63%|βββββββ | 40/63 [01:51<01:08, 2.97s/it]
Training 1/1 epoch (loss 2.7918): 63%|βββββββ | 40/63 [01:54<01:08, 2.97s/it]
Training 1/1 epoch (loss 2.7918): 65%|βββββββ | 41/63 [01:54<01:04, 2.91s/it]
Training 1/1 epoch (loss 2.8609): 65%|βββββββ | 41/63 [01:56<01:04, 2.91s/it]
Training 1/1 epoch (loss 2.8609): 67%|βββββββ | 42/63 [01:56<00:56, 2.67s/it]
Training 1/1 epoch (loss 3.0686): 67%|βββββββ | 42/63 [01:59<00:56, 2.67s/it]
Training 1/1 epoch (loss 3.0686): 68%|βββββββ | 43/63 [01:59<00:56, 2.85s/it]
Training 1/1 epoch (loss 2.9355): 68%|βββββββ | 43/63 [02:02<00:56, 2.85s/it]
Training 1/1 epoch (loss 2.9355): 70%|βββββββ | 44/63 [02:02<00:52, 2.75s/it]
Training 1/1 epoch (loss 2.7433): 70%|βββββββ | 44/63 [02:05<00:52, 2.75s/it]
Training 1/1 epoch (loss 2.7433): 71%|ββββββββ | 45/63 [02:05<00:52, 2.92s/it]
Training 1/1 epoch (loss 2.6709): 71%|ββββββββ | 45/63 [02:10<00:52, 2.92s/it]
Training 1/1 epoch (loss 2.6709): 73%|ββββββββ | 46/63 [02:10<00:58, 3.47s/it]
Training 1/1 epoch (loss 2.7010): 73%|ββββββββ | 46/63 [02:11<00:58, 3.47s/it]
Training 1/1 epoch (loss 2.7010): 75%|ββββββββ | 47/63 [02:11<00:45, 2.85s/it]
Training 1/1 epoch (loss 2.6321): 75%|ββββββββ | 47/63 [02:14<00:45, 2.85s/it]
Training 1/1 epoch (loss 2.6321): 76%|ββββββββ | 48/63 [02:14<00:41, 2.78s/it]
Training 1/1 epoch (loss 2.6160): 76%|ββββββββ | 48/63 [02:18<00:41, 2.78s/it]
Training 1/1 epoch (loss 2.6160): 78%|ββββββββ | 49/63 [02:18<00:42, 3.05s/it]
Training 1/1 epoch (loss 2.9151): 78%|ββββββββ | 49/63 [02:21<00:42, 3.05s/it]
Training 1/1 epoch (loss 2.9151): 79%|ββββββββ | 50/63 [02:21<00:41, 3.18s/it]
Training 1/1 epoch (loss 2.8716): 79%|ββββββββ | 50/63 [02:24<00:41, 3.18s/it]
Training 1/1 epoch (loss 2.8716): 81%|ββββββββ | 51/63 [02:24<00:35, 2.99s/it]
Training 1/1 epoch (loss 2.7828): 81%|ββββββββ | 51/63 [02:26<00:35, 2.99s/it]
Training 1/1 epoch (loss 2.7828): 83%|βββββββββ | 52/63 [02:26<00:30, 2.76s/it]
Training 1/1 epoch (loss 2.8789): 83%|βββββββββ | 52/63 [02:29<00:30, 2.76s/it]
Training 1/1 epoch (loss 2.8789): 84%|βββββββββ | 53/63 [02:29<00:29, 2.91s/it]
Training 1/1 epoch (loss 2.7780): 84%|βββββββββ | 53/63 [02:35<00:29, 2.91s/it]
Training 1/1 epoch (loss 2.7780): 86%|βββββββββ | 54/63 [02:35<00:33, 3.67s/it]
Training 1/1 epoch (loss 3.0105): 86%|βββββββββ | 54/63 [02:39<00:33, 3.67s/it]
Training 1/1 epoch (loss 3.0105): 87%|βββββββββ | 55/63 [02:39<00:30, 3.86s/it]
Training 1/1 epoch (loss 2.7988): 87%|βββββββββ | 55/63 [02:42<00:30, 3.86s/it]
Training 1/1 epoch (loss 2.7988): 89%|βββββββββ | 56/63 [02:42<00:25, 3.70s/it]
Training 1/1 epoch (loss 2.8793): 89%|βββββββββ | 56/63 [02:44<00:25, 3.70s/it]
Training 1/1 epoch (loss 2.8793): 90%|βββββββββ | 57/63 [02:44<00:19, 3.24s/it]
Training 1/1 epoch (loss 2.7413): 90%|βββββββββ | 57/63 [02:49<00:19, 3.24s/it]
Training 1/1 epoch (loss 2.7413): 92%|ββββββββββ| 58/63 [02:49<00:18, 3.69s/it]
Training 1/1 epoch (loss 2.7618): 92%|ββββββββββ| 58/63 [02:51<00:18, 3.69s/it]
Training 1/1 epoch (loss 2.7618): 94%|ββββββββββ| 59/63 [02:51<00:13, 3.26s/it]
Training 1/1 epoch (loss 2.9699): 94%|ββββββββββ| 59/63 [02:53<00:13, 3.26s/it]
Training 1/1 epoch (loss 2.9699): 95%|ββββββββββ| 60/63 [02:53<00:08, 2.72s/it]
Training 1/1 epoch (loss 2.7596): 95%|ββββββββββ| 60/63 [02:54<00:08, 2.72s/it]
Training 1/1 epoch (loss 2.7596): 97%|ββββββββββ| 61/63 [02:54<00:04, 2.18s/it]
Training 1/1 epoch (loss 3.0216): 97%|ββββββββββ| 61/63 [02:56<00:04, 2.18s/it]
Training 1/1 epoch (loss 3.0216): 98%|ββββββββββ| 62/63 [02:56<00:02, 2.06s/it]
Training 1/1 epoch (loss 2.5839): 98%|ββββββββββ| 62/63 [02:59<00:02, 2.06s/it]
Training 1/1 epoch (loss 2.5839): 100%|ββββββββββ| 63/63 [02:59<00:00, 2.50s/it]
Training 1/1 epoch (loss 2.5839): 100%|ββββββββββ| 63/63 [02:59<00:00, 2.85s/it] |
|
chat template saved in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000-Q2-500/chat_template.jinja |
|
tokenizer config file saved in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000-Q2-500/tokenizer_config.json |
|
Special tokens file saved in /aifs4su/hansirui_1st/jiayi/setting3-imdb/Qwen1.5-4B/Qwen1.5-4B-s3-Q1-5000-Q2-500/special_tokens_map.json |
|
wandb: ERROR Problem finishing run |
|
Exception ignored in atexit callback: <bound method rank_zero_only.<locals>.wrapper of <safe_rlhf.logger.Logger object at 0x1551f1000f50>> |
|
Traceback (most recent call last): |
|
File "/home/hansirui_1st/jiayi/resist/setting3/safe_rlhf/utils.py", line 212, in wrapper |
|
return func(*args, **kwargs) |
|
^^^^^^^^^^^^^^^^^^^^^ |
|
File "/home/hansirui_1st/jiayi/resist/setting3/safe_rlhf/logger.py", line 183, in close |
|
self.wandb.finish() |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 406, in wrapper |
|
return func(self, *args, **kwargs) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 503, in wrapper |
|
return func(self, *args, **kwargs) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 451, in wrapper |
|
return func(self, *args, **kwargs) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2309, in finish |
|
return self._finish(exit_code) |
|
^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 406, in wrapper |
|
return func(self, *args, **kwargs) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2337, in _finish |
|
self._atexit_cleanup(exit_code=exit_code) |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2550, in _atexit_cleanup |
|
self._on_finish() |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 2806, in _on_finish |
|
wait_with_progress( |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 24, in wait_with_progress |
|
return wait_all_with_progress( |
|
^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/mailbox/wait_with_progress.py", line 87, in wait_all_with_progress |
|
return asyncio_compat.run(progress_loop_with_timeout) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/site-packages/wandb/sdk/lib/asyncio_compat.py", line 27, in run |
|
future = executor.submit(runner.run, fn) |
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
File "/aifs4su/hansirui_1st/miniconda3/envs/jy-resist/lib/python3.11/concurrent/futures/thread.py", line 169, in submit |
|
raise RuntimeError( |
|
RuntimeError: cannot schedule new futures after interpreter shutdown |
|
|