Radhikaaaa commited on
Commit
8ac985e
·
verified ·
1 Parent(s): 0dfa010

Fine-tune nsi319/legal-led-base-16384 on FiscalNote/billsum subset

Browse files
Files changed (5) hide show
  1. README.md +5 -5
  2. all_results.json +6 -6
  3. model.safetensors +1 -1
  4. train_results.json +6 -6
  5. trainer_state.json +60 -25
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [nsi319/legal-led-base-16384](https://huggingface.co/nsi319/legal-led-base-16384) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 6.6293
20
 
21
  ## Model description
22
 
@@ -49,10 +49,10 @@ The following hyperparameters were used during training:
49
 
50
  ### Training results
51
 
52
- | Training Loss | Epoch | Step | Validation Loss |
53
- |:-------------:|:-----:|:----:|:---------------:|
54
- | No log | 1.0 | 1 | 7.4076 |
55
- | No log | 2.0 | 2 | 6.6293 |
56
 
57
 
58
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [nsi319/legal-led-base-16384](https://huggingface.co/nsi319/legal-led-base-16384) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.8303
20
 
21
  ## Model description
22
 
 
49
 
50
  ### Training results
51
 
52
+ | Training Loss | Epoch | Step | Validation Loss |
53
+ |:-------------:|:------:|:----:|:---------------:|
54
+ | 1.4957 | 1.0 | 297 | 0.8795 |
55
+ | 1.0639 | 1.9963 | 592 | 0.8303 |
56
 
57
 
58
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0,
3
- "total_flos": 371962445211648.0,
4
- "train_loss": 5.818258762359619,
5
- "train_runtime": 153.9001,
6
- "train_samples_per_second": 0.832,
7
- "train_steps_per_second": 0.013
8
  }
 
1
  {
2
+ "epoch": 1.9963060686015832,
3
+ "total_flos": 1.0987423797624422e+17,
4
+ "train_loss": 1.5217120325243152,
5
+ "train_runtime": 38602.7875,
6
+ "train_samples_per_second": 0.982,
7
+ "train_steps_per_second": 0.015
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84d2882b56e85593d829ce7086a07cea8af76d46d1052d16ae2fec3ce9110f5a
3
  size 647614116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06850b8387bd2729725fe7c42e6f792b8165afbc27ec3fc855fb9278c3434696
3
  size 647614116
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.0,
3
- "total_flos": 371962445211648.0,
4
- "train_loss": 5.818258762359619,
5
- "train_runtime": 153.9001,
6
- "train_samples_per_second": 0.832,
7
- "train_steps_per_second": 0.013
8
  }
 
1
  {
2
+ "epoch": 1.9963060686015832,
3
+ "total_flos": 1.0987423797624422e+17,
4
+ "train_loss": 1.5217120325243152,
5
+ "train_runtime": 38602.7875,
6
+ "train_samples_per_second": 0.982,
7
+ "train_steps_per_second": 0.015
8
  }
trainer_state.json CHANGED
@@ -1,42 +1,77 @@
1
  {
2
- "best_global_step": 2,
3
- "best_metric": 6.629301071166992,
4
- "best_model_checkpoint": "./results_billsum/checkpoint-2",
5
- "epoch": 2.0,
6
  "eval_steps": 500,
7
- "global_step": 2,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  {
13
  "epoch": 1.0,
14
- "eval_loss": 7.40756368637085,
15
- "eval_runtime": 12.5285,
16
- "eval_samples_per_second": 2.554,
17
- "eval_steps_per_second": 0.639,
18
- "step": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  },
20
  {
21
- "epoch": 2.0,
22
- "eval_loss": 6.629301071166992,
23
- "eval_runtime": 12.9385,
24
- "eval_samples_per_second": 2.473,
25
- "eval_steps_per_second": 0.618,
26
- "step": 2
 
 
 
 
 
 
 
27
  },
28
  {
29
- "epoch": 2.0,
30
- "step": 2,
31
- "total_flos": 371962445211648.0,
32
- "train_loss": 5.818258762359619,
33
- "train_runtime": 153.9001,
34
- "train_samples_per_second": 0.832,
35
- "train_steps_per_second": 0.013
36
  }
37
  ],
38
  "logging_steps": 100,
39
- "max_steps": 2,
40
  "num_input_tokens_seen": 0,
41
  "num_train_epochs": 2,
42
  "save_steps": 500,
@@ -61,7 +96,7 @@
61
  "attributes": {}
62
  }
63
  },
64
- "total_flos": 371962445211648.0,
65
  "train_batch_size": 2,
66
  "trial_name": null,
67
  "trial_params": null
 
1
  {
2
+ "best_global_step": 592,
3
+ "best_metric": 0.830290675163269,
4
+ "best_model_checkpoint": "./results_billsum/checkpoint-592",
5
+ "epoch": 1.9963060686015832,
6
  "eval_steps": 500,
7
+ "global_step": 592,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
+ {
13
+ "epoch": 0.33773087071240104,
14
+ "grad_norm": 838200.5,
15
+ "learning_rate": 1.7544483985765128e-05,
16
+ "loss": 3.203,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.6754617414248021,
21
+ "grad_norm": 158957.71875,
22
+ "learning_rate": 1.3985765124555162e-05,
23
+ "loss": 1.4957,
24
+ "step": 200
25
+ },
26
  {
27
  "epoch": 1.0,
28
+ "eval_loss": 0.8794804811477661,
29
+ "eval_runtime": 1277.7764,
30
+ "eval_samples_per_second": 2.558,
31
+ "eval_steps_per_second": 0.64,
32
+ "step": 297
33
+ },
34
+ {
35
+ "epoch": 1.010131926121372,
36
+ "grad_norm": 73460.7890625,
37
+ "learning_rate": 1.0427046263345197e-05,
38
+ "loss": 1.1737,
39
+ "step": 300
40
+ },
41
+ {
42
+ "epoch": 1.347862796833773,
43
+ "grad_norm": 66100.03125,
44
+ "learning_rate": 6.868327402135232e-06,
45
+ "loss": 1.0938,
46
+ "step": 400
47
  },
48
  {
49
+ "epoch": 1.6855936675461742,
50
+ "grad_norm": 63527.66796875,
51
+ "learning_rate": 3.309608540925267e-06,
52
+ "loss": 1.0639,
53
+ "step": 500
54
+ },
55
+ {
56
+ "epoch": 1.9963060686015832,
57
+ "eval_loss": 0.830290675163269,
58
+ "eval_runtime": 1279.4932,
59
+ "eval_samples_per_second": 2.555,
60
+ "eval_steps_per_second": 0.639,
61
+ "step": 592
62
  },
63
  {
64
+ "epoch": 1.9963060686015832,
65
+ "step": 592,
66
+ "total_flos": 1.0987423797624422e+17,
67
+ "train_loss": 1.5217120325243152,
68
+ "train_runtime": 38602.7875,
69
+ "train_samples_per_second": 0.982,
70
+ "train_steps_per_second": 0.015
71
  }
72
  ],
73
  "logging_steps": 100,
74
+ "max_steps": 592,
75
  "num_input_tokens_seen": 0,
76
  "num_train_epochs": 2,
77
  "save_steps": 500,
 
96
  "attributes": {}
97
  }
98
  },
99
+ "total_flos": 1.0987423797624422e+17,
100
  "train_batch_size": 2,
101
  "trial_name": null,
102
  "trial_params": null