againeureka commited on
Commit
f984e6d
·
verified ·
1 Parent(s): f5aa5d1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -27
README.md CHANGED
@@ -28,15 +28,15 @@ library_name: transformers
28
  ### 학습 방법
29
 
30
  ```python
 
 
31
 
32
  from transformers import RobertaTokenizer, RobertaForMaskedLM
33
  from transformers import AutoModel, AutoTokenizer
34
-
35
  model = RobertaForMaskedLM.from_pretrained(base_model)
36
  tokenizer = AutoTokenizer.from_pretrained(base_tokenizer)
37
 
38
  from transformers import LineByLineTextDataset
39
-
40
  dataset = LineByLineTextDataset(
41
  tokenizer=tokenizer,
42
  file_path=fpath_dataset,
@@ -44,13 +44,11 @@ dataset = LineByLineTextDataset(
44
  )
45
 
46
  from transformers import DataCollatorForLanguageModeling
47
-
48
  data_collator = DataCollatorForLanguageModeling(
49
  tokenizer=tokenizer, mlm=True, mlm_probability=0.15
50
  )
51
 
52
  from transformers import Trainer, TrainingArguments
53
-
54
  training_args = TrainingArguments(
55
  output_dir=output_dir,
56
  overwrite_output_dir=True,
@@ -69,29 +67,6 @@ trainer = Trainer(
69
  )
70
 
71
  train_metrics = trainer.train()
72
-
73
-
74
  trainer.save_model(output_dir)
75
  trainer.push_to_hub()
76
  ```
77
-
78
- ### 학습용 configuration
79
-
80
- - number of epochs
81
- ```bash
82
- epochs = 50
83
- ```
84
-
85
- - JSON file
86
- ```json
87
- [
88
- {'basemodel' : 'againeureka/klue_roberta_base_for_legal',
89
- 'basetokenizer' : 'klue/roberta-base',
90
- 'trainmodel' : 'againeureka/toulmin_classifier8_klue_roberta_base_retrained6',
91
- 'batchsize' : 92,
92
- 'epochs' : epochs,
93
- 'push_to_hub' : True,
94
- 'is_on' : True,
95
- },
96
- ]
97
- ```
 
28
  ### 학습 방법
29
 
30
  ```python
31
+ base_model = 'klue/roberta-base'
32
+ base_tokenizer = 'klue/roberta-base'
33
 
34
  from transformers import RobertaTokenizer, RobertaForMaskedLM
35
  from transformers import AutoModel, AutoTokenizer
 
36
  model = RobertaForMaskedLM.from_pretrained(base_model)
37
  tokenizer = AutoTokenizer.from_pretrained(base_tokenizer)
38
 
39
  from transformers import LineByLineTextDataset
 
40
  dataset = LineByLineTextDataset(
41
  tokenizer=tokenizer,
42
  file_path=fpath_dataset,
 
44
  )
45
 
46
  from transformers import DataCollatorForLanguageModeling
 
47
  data_collator = DataCollatorForLanguageModeling(
48
  tokenizer=tokenizer, mlm=True, mlm_probability=0.15
49
  )
50
 
51
  from transformers import Trainer, TrainingArguments
 
52
  training_args = TrainingArguments(
53
  output_dir=output_dir,
54
  overwrite_output_dir=True,
 
67
  )
68
 
69
  train_metrics = trainer.train()
 
 
70
  trainer.save_model(output_dir)
71
  trainer.push_to_hub()
72
  ```