furmaniak commited on
Commit
0d2ffcc
·
verified ·
1 Parent(s): d14de7f

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -1
  2. all_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +22 -15
  5. training_loss.png +0 -0
README.md CHANGED
@@ -4,6 +4,7 @@ license: mit
4
  base_model: microsoft/phi-4
5
  tags:
6
  - llama-factory
 
7
  - lora
8
  - generated_from_trainer
9
  model-index:
@@ -16,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # pretrain
18
 
19
- This model is a fine-tuned version of [microsoft/phi-4](https://huggingface.co/microsoft/phi-4) on an unknown dataset.
20
 
21
  ## Model description
22
 
 
4
  base_model: microsoft/phi-4
5
  tags:
6
  - llama-factory
7
+ - full
8
  - lora
9
  - generated_from_trainer
10
  model-index:
 
17
 
18
  # pretrain
19
 
20
+ This model is a fine-tuned version of [microsoft/phi-4](https://huggingface.co/microsoft/phi-4) on the openalex_small dataset.
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 0.7272727272727273,
3
  "eval_loss": 1.4561185836791992,
4
  "eval_runtime": 2.8429,
5
  "eval_samples_per_second": 5.98,
6
  "eval_steps_per_second": 1.055,
7
  "perplexity": 4.289278700316188,
8
- "total_flos": 4.734884475253555e+16,
9
- "train_loss": 1.4859753847122192,
10
- "train_runtime": 50.9454,
11
- "train_samples_per_second": 3.18,
12
- "train_steps_per_second": 0.02
13
  }
 
1
  {
2
+ "epoch": 0.7619047619047619,
3
  "eval_loss": 1.4561185836791992,
4
  "eval_runtime": 2.8429,
5
  "eval_samples_per_second": 5.98,
6
  "eval_steps_per_second": 1.055,
7
  "perplexity": 4.289278700316188,
8
+ "total_flos": 2446118092800.0,
9
+ "train_loss": 1.5153563022613525,
10
+ "train_runtime": 280.4756,
11
+ "train_samples_per_second": 0.578,
12
+ "train_steps_per_second": 0.007
13
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.7272727272727273,
3
- "total_flos": 4.734884475253555e+16,
4
- "train_loss": 1.4859753847122192,
5
- "train_runtime": 50.9454,
6
- "train_samples_per_second": 3.18,
7
- "train_steps_per_second": 0.02
8
  }
 
1
  {
2
+ "epoch": 0.7619047619047619,
3
+ "total_flos": 2446118092800.0,
4
+ "train_loss": 1.5153563022613525,
5
+ "train_runtime": 280.4756,
6
+ "train_samples_per_second": 0.578,
7
+ "train_steps_per_second": 0.007
8
  }
trainer_state.json CHANGED
@@ -1,32 +1,39 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7272727272727273,
5
  "eval_steps": 500,
6
- "global_step": 1,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.7272727272727273,
13
- "grad_norm": 0.3740828335285187,
14
  "learning_rate": 0.0001,
15
- "loss": 1.486,
16
  "step": 1
17
  },
18
  {
19
- "epoch": 0.7272727272727273,
20
- "step": 1,
21
- "total_flos": 4.734884475253555e+16,
22
- "train_loss": 1.4859753847122192,
23
- "train_runtime": 50.9454,
24
- "train_samples_per_second": 3.18,
25
- "train_steps_per_second": 0.02
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "logging_steps": 1,
29
- "max_steps": 1,
30
  "num_input_tokens_seen": 0,
31
  "num_train_epochs": 1,
32
  "save_steps": 5,
@@ -42,8 +49,8 @@
42
  "attributes": {}
43
  }
44
  },
45
- "total_flos": 4.734884475253555e+16,
46
- "train_batch_size": 2,
47
  "trial_name": null,
48
  "trial_params": null
49
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7619047619047619,
5
  "eval_steps": 500,
6
+ "global_step": 2,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.38095238095238093,
13
+ "grad_norm": 0.5249602161808266,
14
  "learning_rate": 0.0001,
15
+ "loss": 1.5248,
16
  "step": 1
17
  },
18
  {
19
+ "epoch": 0.7619047619047619,
20
+ "grad_norm": 0.5138827336171068,
21
+ "learning_rate": 0.0,
22
+ "loss": 1.506,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.7619047619047619,
27
+ "step": 2,
28
+ "total_flos": 2446118092800.0,
29
+ "train_loss": 1.5153563022613525,
30
+ "train_runtime": 280.4756,
31
+ "train_samples_per_second": 0.578,
32
+ "train_steps_per_second": 0.007
33
  }
34
  ],
35
  "logging_steps": 1,
36
+ "max_steps": 2,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 1,
39
  "save_steps": 5,
 
49
  "attributes": {}
50
  }
51
  },
52
+ "total_flos": 2446118092800.0,
53
+ "train_batch_size": 1,
54
  "trial_name": null,
55
  "trial_params": null
56
  }
training_loss.png CHANGED