Qwen_0.5-BDPO_5e-5-100ep_0alp_0lam / chat_2025-01-29_10-42-52.json
JayHyeon's picture
Training in progress, step 50
4732a2e verified
{
"settings": {
"command": "chat",
"model_name_or_path": "/root/trl/dynamics/Qwen_0.5-BDPO_5e-5-100ep_0alp_0lam",
"user": null,
"system_prompt": null,
"save_folder": "./chat_history/",
"device": "cpu",
"examples_path": null,
"max_new_tokens": 256,
"do_sample": true,
"num_beams": 1,
"temperature": 1.0,
"top_k": 50,
"top_p": 1.0,
"repetition_penalty": 1.0,
"eos_tokens": null,
"eos_token_ids": null,
"model_revision": "main",
"torch_dtype": null,
"trust_remote_code": false,
"attn_implementation": null,
"load_in_8bit": false,
"load_in_4bit": false,
"bnb_4bit_quant_type": "nf4",
"use_bnb_nested_quant": false
},
"chat_history": [
{
"role": "user",
"content": "if X^2 + 3 = 12. What is X?"
},
{
"role": "assistant",
"content": "Hello! I'm here to help you in a helpful and respectful manner.\n\nTo answer your question, if X^2 + 3 = 12, we can substitute X^2 with 12 - 3, which equals 9.\nSo, X^2 + 3 = 9, and solving for X^2 gives us X^2 = 9.\nTaking the square root of 9 gives us X = 3, or -3, if we want to solve for the negative square root.\n\nI hope this helps! Let me know if you have any other questions.\n"
}
]
}