boltuix commited on
Commit
1212e0e
·
verified ·
1 Parent(s): 87b003a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +80 -80
README.md CHANGED
@@ -365,86 +365,86 @@ To adapt BERT-Emotion for custom emotion detection tasks (e.g., specific chatbot
365
  1. **Prepare Dataset**: Collect labeled data with 13 emotion categories.
366
  2. **Fine-Tune with Hugging Face**:
367
  ```python
368
- # !pip install transformers datasets torch --upgrade
369
-
370
- import torch
371
- from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
372
- from datasets import Dataset
373
- import pandas as pd
374
-
375
- # 1. Prepare the sample emotion dataset
376
- data = {
377
- "text": [
378
- "I love you so much!",
379
- "This is absolutely disgusting!",
380
- "I'm so happy with my new phone!",
381
- "Why does this always break?",
382
- "I feel so alone right now."
383
- ],
384
- "label": [2, 7, 5, 1, 0] # Emotions: 0 to 12
385
- }
386
- df = pd.DataFrame(data)
387
- dataset = Dataset.from_pandas(df)
388
-
389
- # 2. Load tokenizer and model
390
- model_name = "boltuix/bert-emotion"
391
- tokenizer = BertTokenizer.from_pretrained(model_name)
392
- model = BertForSequenceClassification.from_pretrained(model_name, num_labels=13)
393
-
394
- # 3. Tokenize the dataset
395
- def tokenize_function(examples):
396
- return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)
397
-
398
- tokenized_dataset = dataset.map(tokenize_function, batched=True)
399
-
400
- # 4. Manually convert all fields to PyTorch tensors (NumPy 2.0 safe)
401
- def to_torch_format(example):
402
- return {
403
- "input_ids": torch.tensor(example["input_ids"]),
404
- "attention_mask": torch.tensor(example["attention_mask"]),
405
- "label": torch.tensor(example["label"])
406
- }
407
-
408
- tokenized_dataset = tokenized_dataset.map(to_torch_format)
409
-
410
- # 5. Define training arguments
411
- training_args = TrainingArguments(
412
- output_dir="./bert_emotion_results",
413
- num_train_epochs=5,
414
- per_device_train_batch_size=2,
415
- logging_dir="./bert_emotion_logs",
416
- logging_steps=10,
417
- save_steps=100,
418
- eval_strategy="no",
419
- learning_rate=3e-5,
420
- report_to="none" # Disable W&B auto-logging if not needed
421
- )
422
-
423
- # 6. Initialize Trainer
424
- trainer = Trainer(
425
- model=model,
426
- args=training_args,
427
- train_dataset=tokenized_dataset,
428
- )
429
-
430
- # 7. Fine-tune the model
431
- trainer.train()
432
-
433
- # 8. Save the fine-tuned model
434
- model.save_pretrained("./fine_tuned_bert_emotion")
435
- tokenizer.save_pretrained("./fine_tuned_bert_emotion")
436
-
437
- # 9. Example inference
438
- text = "I'm thrilled with the update!"
439
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
440
- model.eval()
441
- with torch.no_grad():
442
- outputs = model(**inputs)
443
- logits = outputs.logits
444
- predicted_class = torch.argmax(logits, dim=1).item()
445
-
446
- labels = ["Sadness", "Anger", "Love", "Surprise", "Fear", "Happiness", "Neutral", "Disgust", "Shame", "Guilt", "Confusion", "Desire", "Sarcasm"]
447
- print(f"Predicted emotion for '{text}': {labels[predicted_class]}")
448
  ```
449
  3. **Deploy**: Export the fine-tuned model to ONNX or TensorFlow Lite for edge devices.
450
 
 
365
  1. **Prepare Dataset**: Collect labeled data with 13 emotion categories.
366
  2. **Fine-Tune with Hugging Face**:
367
  ```python
368
+ # !pip install transformers datasets torch --upgrade
369
+
370
+ import torch
371
+ from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
372
+ from datasets import Dataset
373
+ import pandas as pd
374
+
375
+ # 1. Prepare the sample emotion dataset
376
+ data = {
377
+ "text": [
378
+ "I love you so much!",
379
+ "This is absolutely disgusting!",
380
+ "I'm so happy with my new phone!",
381
+ "Why does this always break?",
382
+ "I feel so alone right now."
383
+ ],
384
+ "label": [2, 7, 5, 1, 0] # Emotions: 0 to 12
385
+ }
386
+ df = pd.DataFrame(data)
387
+ dataset = Dataset.from_pandas(df)
388
+
389
+ # 2. Load tokenizer and model
390
+ model_name = "boltuix/bert-emotion"
391
+ tokenizer = BertTokenizer.from_pretrained(model_name)
392
+ model = BertForSequenceClassification.from_pretrained(model_name, num_labels=13)
393
+
394
+ # 3. Tokenize the dataset
395
+ def tokenize_function(examples):
396
+ return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)
397
+
398
+ tokenized_dataset = dataset.map(tokenize_function, batched=True)
399
+
400
+ # 4. Manually convert all fields to PyTorch tensors (NumPy 2.0 safe)
401
+ def to_torch_format(example):
402
+ return {
403
+ "input_ids": torch.tensor(example["input_ids"]),
404
+ "attention_mask": torch.tensor(example["attention_mask"]),
405
+ "label": torch.tensor(example["label"])
406
+ }
407
+
408
+ tokenized_dataset = tokenized_dataset.map(to_torch_format)
409
+
410
+ # 5. Define training arguments
411
+ training_args = TrainingArguments(
412
+ output_dir="./bert_emotion_results",
413
+ num_train_epochs=5,
414
+ per_device_train_batch_size=2,
415
+ logging_dir="./bert_emotion_logs",
416
+ logging_steps=10,
417
+ save_steps=100,
418
+ eval_strategy="no",
419
+ learning_rate=3e-5,
420
+ report_to="none" # Disable W&B auto-logging if not needed
421
+ )
422
+
423
+ # 6. Initialize Trainer
424
+ trainer = Trainer(
425
+ model=model,
426
+ args=training_args,
427
+ train_dataset=tokenized_dataset,
428
+ )
429
+
430
+ # 7. Fine-tune the model
431
+ trainer.train()
432
+
433
+ # 8. Save the fine-tuned model
434
+ model.save_pretrained("./fine_tuned_bert_emotion")
435
+ tokenizer.save_pretrained("./fine_tuned_bert_emotion")
436
+
437
+ # 9. Example inference
438
+ text = "I'm thrilled with the update!"
439
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
440
+ model.eval()
441
+ with torch.no_grad():
442
+ outputs = model(**inputs)
443
+ logits = outputs.logits
444
+ predicted_class = torch.argmax(logits, dim=1).item()
445
+
446
+ labels = ["Sadness", "Anger", "Love", "Surprise", "Fear", "Happiness", "Neutral", "Disgust", "Shame", "Guilt", "Confusion", "Desire", "Sarcasm"]
447
+ print(f"Predicted emotion for '{text}': {labels[predicted_class]}")
448
  ```
449
  3. **Deploy**: Export the fine-tuned model to ONNX or TensorFlow Lite for edge devices.
450