VityaVitalich commited on
Commit
5de38e4
·
1 Parent(s): 0123efb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +49 -49
README.md CHANGED
@@ -37,55 +37,55 @@ It achieves the following results on the evaluation set:
37
  ## Usage Example
38
 
39
  ```python
40
- from transformers import BertTokenizer, BertForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding
41
- import datasets
42
-
43
- model = BertForSequenceClassification.from_pretrained('VityaVitalich/bert-tiny-sst2')
44
- tokenizer = BertTokenizer.from_pretrained('M-FAC/bert-tiny-finetuned-sst2')
45
-
46
- def create_data(tokenizer):
47
- train_set = datasets.load_dataset('sst2', split='train').remove_columns(['idx'])
48
- val_set = datasets.load_dataset('sst2', split='validation').remove_columns(['idx'])
49
-
50
- def tokenize_func(examples):
51
- return tokenizer(examples["sentence"], max_length=128, padding='max_length', truncation=True)
52
-
53
- encoded_dataset_train = train_set.map(tokenize_func, batched=True)
54
- encoded_dataset_test = val_set.map(tokenize_func, batched=True)
55
- data_collator = DataCollatorWithPadding(tokenizer)
56
- return encoded_dataset_train, encoded_dataset_test, data_collator
57
-
58
- encoded_dataset_train, encoded_dataset_test, data_collator = create_data(tokenizer)
59
-
60
- training_args = TrainingArguments(
61
- output_dir='./results',
62
- learning_rate=3e-5,
63
- per_device_train_batch_size=128,
64
- per_device_eval_batch_size=128,
65
- load_best_model_at_end=True,
66
- num_train_epochs=5,
67
- weight_decay=0.1,
68
- fp16=True,
69
- fp16_full_eval=True,
70
- evaluation_strategy="epoch",
71
- seed=42,
72
- save_strategy = "epoch",
73
- save_total_limit=5,
74
- logging_strategy="epoch",
75
- report_to="all",
76
- )
77
-
78
-
79
- trainer = Trainer(
80
- model=model,
81
- args=training_args,
82
- train_dataset=encoded_dataset_train,
83
- eval_dataset=encoded_dataset_test,
84
- data_collator=data_collator,
85
- compute_metrics=compute_metrics,
86
- )
87
-
88
- trainer.evaluate(encoded_dataset_test)
89
  ```
90
 
91
 
 
37
  ## Usage Example
38
 
39
  ```python
40
+ from transformers import BertTokenizer, BertForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding
41
+ import datasets
42
+
43
+ model = BertForSequenceClassification.from_pretrained('VityaVitalich/bert-tiny-sst2')
44
+ tokenizer = BertTokenizer.from_pretrained('M-FAC/bert-tiny-finetuned-sst2')
45
+
46
+ def create_data(tokenizer):
47
+ train_set = datasets.load_dataset('sst2', split='train').remove_columns(['idx'])
48
+ val_set = datasets.load_dataset('sst2', split='validation').remove_columns(['idx'])
49
+
50
+ def tokenize_func(examples):
51
+ return tokenizer(examples["sentence"], max_length=128, padding='max_length', truncation=True)
52
+
53
+ encoded_dataset_train = train_set.map(tokenize_func, batched=True)
54
+ encoded_dataset_test = val_set.map(tokenize_func, batched=True)
55
+ data_collator = DataCollatorWithPadding(tokenizer)
56
+ return encoded_dataset_train, encoded_dataset_test, data_collator
57
+
58
+ encoded_dataset_train, encoded_dataset_test, data_collator = create_data(tokenizer)
59
+
60
+ training_args = TrainingArguments(
61
+ output_dir='./results',
62
+ learning_rate=3e-5,
63
+ per_device_train_batch_size=128,
64
+ per_device_eval_batch_size=128,
65
+ load_best_model_at_end=True,
66
+ num_train_epochs=5,
67
+ weight_decay=0.1,
68
+ fp16=True,
69
+ fp16_full_eval=True,
70
+ evaluation_strategy="epoch",
71
+ seed=42,
72
+ save_strategy = "epoch",
73
+ save_total_limit=5,
74
+ logging_strategy="epoch",
75
+ report_to="all",
76
+ )
77
+
78
+
79
+ trainer = Trainer(
80
+ model=model,
81
+ args=training_args,
82
+ train_dataset=encoded_dataset_train,
83
+ eval_dataset=encoded_dataset_test,
84
+ data_collator=data_collator,
85
+ compute_metrics=compute_metrics,
86
+ )
87
+
88
+ trainer.evaluate(encoded_dataset_test)
89
  ```
90
 
91