Radhikaaaa commited on
Commit
0dfa010
·
verified ·
1 Parent(s): 605768b

Fine-tune nsi319/legal-led-base-16384 on FiscalNote/billsum subset

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [nsi319/legal-led-base-16384](https://huggingface.co/nsi319/legal-led-base-16384) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 6.4175
20
 
21
  ## Model description
22
 
@@ -44,16 +44,15 @@ The following hyperparameters were used during training:
44
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_ratio: 0.05
47
- - num_epochs: 3
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
- | No log | 1.0 | 1 | 7.4017 |
55
- | No log | 2.0 | 2 | 6.6088 |
56
- | No log | 3.0 | 3 | 6.4175 |
57
 
58
 
59
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [nsi319/legal-led-base-16384](https://huggingface.co/nsi319/legal-led-base-16384) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 6.6293
20
 
21
  ## Model description
22
 
 
44
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_ratio: 0.05
47
+ - num_epochs: 2
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
+ | No log | 1.0 | 1 | 7.4076 |
55
+ | No log | 2.0 | 2 | 6.6293 |
 
56
 
57
 
58
  ### Framework versions
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 579817568360448.0,
4
- "train_loss": 5.7508284250895185,
5
- "train_runtime": 238.1241,
6
- "train_samples_per_second": 0.806,
7
  "train_steps_per_second": 0.013
8
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "total_flos": 371962445211648.0,
4
+ "train_loss": 5.818258762359619,
5
+ "train_runtime": 153.9001,
6
+ "train_samples_per_second": 0.832,
7
  "train_steps_per_second": 0.013
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7639c0059e8fbcc5f4dacac685cbe09f8c5bf529888d9d8bbdfa85709f9a948
3
  size 647614116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84d2882b56e85593d829ce7086a07cea8af76d46d1052d16ae2fec3ce9110f5a
3
  size 647614116
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 579817568360448.0,
4
- "train_loss": 5.7508284250895185,
5
- "train_runtime": 238.1241,
6
- "train_samples_per_second": 0.806,
7
  "train_steps_per_second": 0.013
8
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "total_flos": 371962445211648.0,
4
+ "train_loss": 5.818258762359619,
5
+ "train_runtime": 153.9001,
6
+ "train_samples_per_second": 0.832,
7
  "train_steps_per_second": 0.013
8
  }
trainer_state.json CHANGED
@@ -1,52 +1,44 @@
1
  {
2
- "best_global_step": 3,
3
- "best_metric": 6.417472839355469,
4
- "best_model_checkpoint": "./results_billsum/checkpoint-3",
5
- "epoch": 3.0,
6
  "eval_steps": 500,
7
- "global_step": 3,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_loss": 7.4016804695129395,
15
- "eval_runtime": 13.2975,
16
- "eval_samples_per_second": 2.406,
17
- "eval_steps_per_second": 0.602,
18
  "step": 1
19
  },
20
  {
21
  "epoch": 2.0,
22
- "eval_loss": 6.608830451965332,
23
- "eval_runtime": 13.15,
24
- "eval_samples_per_second": 2.433,
25
- "eval_steps_per_second": 0.608,
26
  "step": 2
27
  },
28
  {
29
- "epoch": 3.0,
30
- "eval_loss": 6.417472839355469,
31
- "eval_runtime": 13.1507,
32
- "eval_samples_per_second": 2.433,
33
- "eval_steps_per_second": 0.608,
34
- "step": 3
35
- },
36
- {
37
- "epoch": 3.0,
38
- "step": 3,
39
- "total_flos": 579817568360448.0,
40
- "train_loss": 5.7508284250895185,
41
- "train_runtime": 238.1241,
42
- "train_samples_per_second": 0.806,
43
  "train_steps_per_second": 0.013
44
  }
45
  ],
46
  "logging_steps": 100,
47
- "max_steps": 3,
48
  "num_input_tokens_seen": 0,
49
- "num_train_epochs": 3,
50
  "save_steps": 500,
51
  "stateful_callbacks": {
52
  "EarlyStoppingCallback": {
@@ -69,7 +61,7 @@
69
  "attributes": {}
70
  }
71
  },
72
- "total_flos": 579817568360448.0,
73
  "train_batch_size": 2,
74
  "trial_name": null,
75
  "trial_params": null
 
1
  {
2
+ "best_global_step": 2,
3
+ "best_metric": 6.629301071166992,
4
+ "best_model_checkpoint": "./results_billsum/checkpoint-2",
5
+ "epoch": 2.0,
6
  "eval_steps": 500,
7
+ "global_step": 2,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_loss": 7.40756368637085,
15
+ "eval_runtime": 12.5285,
16
+ "eval_samples_per_second": 2.554,
17
+ "eval_steps_per_second": 0.639,
18
  "step": 1
19
  },
20
  {
21
  "epoch": 2.0,
22
+ "eval_loss": 6.629301071166992,
23
+ "eval_runtime": 12.9385,
24
+ "eval_samples_per_second": 2.473,
25
+ "eval_steps_per_second": 0.618,
26
  "step": 2
27
  },
28
  {
29
+ "epoch": 2.0,
30
+ "step": 2,
31
+ "total_flos": 371962445211648.0,
32
+ "train_loss": 5.818258762359619,
33
+ "train_runtime": 153.9001,
34
+ "train_samples_per_second": 0.832,
 
 
 
 
 
 
 
 
35
  "train_steps_per_second": 0.013
36
  }
37
  ],
38
  "logging_steps": 100,
39
+ "max_steps": 2,
40
  "num_input_tokens_seen": 0,
41
+ "num_train_epochs": 2,
42
  "save_steps": 500,
43
  "stateful_callbacks": {
44
  "EarlyStoppingCallback": {
 
61
  "attributes": {}
62
  }
63
  },
64
+ "total_flos": 371962445211648.0,
65
  "train_batch_size": 2,
66
  "trial_name": null,
67
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63aaa7c8720fbc7e6af2821aa660c1124ea6176e78437270954587f667dc7361
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21a31db65a62ac1ef8468a49a03cfea852c52cec8c76738eaf6570aa7e535472
3
  size 5496