Update README.md
Browse files
README.md
CHANGED
@@ -365,86 +365,86 @@ To adapt BERT-Emotion for custom emotion detection tasks (e.g., specific chatbot
|
|
365 |
1. **Prepare Dataset**: Collect labeled data with 13 emotion categories.
|
366 |
2. **Fine-Tune with Hugging Face**:
|
367 |
```python
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
```
|
449 |
3. **Deploy**: Export the fine-tuned model to ONNX or TensorFlow Lite for edge devices.
|
450 |
|
|
|
365 |
1. **Prepare Dataset**: Collect labeled data with 13 emotion categories.
|
366 |
2. **Fine-Tune with Hugging Face**:
|
367 |
```python
|
368 |
+
# !pip install transformers datasets torch --upgrade
|
369 |
+
|
370 |
+
import torch
|
371 |
+
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
|
372 |
+
from datasets import Dataset
|
373 |
+
import pandas as pd
|
374 |
+
|
375 |
+
# 1. Prepare the sample emotion dataset
|
376 |
+
data = {
|
377 |
+
"text": [
|
378 |
+
"I love you so much!",
|
379 |
+
"This is absolutely disgusting!",
|
380 |
+
"I'm so happy with my new phone!",
|
381 |
+
"Why does this always break?",
|
382 |
+
"I feel so alone right now."
|
383 |
+
],
|
384 |
+
"label": [2, 7, 5, 1, 0] # Emotions: 0 to 12
|
385 |
+
}
|
386 |
+
df = pd.DataFrame(data)
|
387 |
+
dataset = Dataset.from_pandas(df)
|
388 |
+
|
389 |
+
# 2. Load tokenizer and model
|
390 |
+
model_name = "boltuix/bert-emotion"
|
391 |
+
tokenizer = BertTokenizer.from_pretrained(model_name)
|
392 |
+
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=13)
|
393 |
+
|
394 |
+
# 3. Tokenize the dataset
|
395 |
+
def tokenize_function(examples):
|
396 |
+
return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)
|
397 |
+
|
398 |
+
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
399 |
+
|
400 |
+
# 4. Manually convert all fields to PyTorch tensors (NumPy 2.0 safe)
|
401 |
+
def to_torch_format(example):
|
402 |
+
return {
|
403 |
+
"input_ids": torch.tensor(example["input_ids"]),
|
404 |
+
"attention_mask": torch.tensor(example["attention_mask"]),
|
405 |
+
"label": torch.tensor(example["label"])
|
406 |
+
}
|
407 |
+
|
408 |
+
tokenized_dataset = tokenized_dataset.map(to_torch_format)
|
409 |
+
|
410 |
+
# 5. Define training arguments
|
411 |
+
training_args = TrainingArguments(
|
412 |
+
output_dir="./bert_emotion_results",
|
413 |
+
num_train_epochs=5,
|
414 |
+
per_device_train_batch_size=2,
|
415 |
+
logging_dir="./bert_emotion_logs",
|
416 |
+
logging_steps=10,
|
417 |
+
save_steps=100,
|
418 |
+
eval_strategy="no",
|
419 |
+
learning_rate=3e-5,
|
420 |
+
report_to="none" # Disable W&B auto-logging if not needed
|
421 |
+
)
|
422 |
+
|
423 |
+
# 6. Initialize Trainer
|
424 |
+
trainer = Trainer(
|
425 |
+
model=model,
|
426 |
+
args=training_args,
|
427 |
+
train_dataset=tokenized_dataset,
|
428 |
+
)
|
429 |
+
|
430 |
+
# 7. Fine-tune the model
|
431 |
+
trainer.train()
|
432 |
+
|
433 |
+
# 8. Save the fine-tuned model
|
434 |
+
model.save_pretrained("./fine_tuned_bert_emotion")
|
435 |
+
tokenizer.save_pretrained("./fine_tuned_bert_emotion")
|
436 |
+
|
437 |
+
# 9. Example inference
|
438 |
+
text = "I'm thrilled with the update!"
|
439 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
|
440 |
+
model.eval()
|
441 |
+
with torch.no_grad():
|
442 |
+
outputs = model(**inputs)
|
443 |
+
logits = outputs.logits
|
444 |
+
predicted_class = torch.argmax(logits, dim=1).item()
|
445 |
+
|
446 |
+
labels = ["Sadness", "Anger", "Love", "Surprise", "Fear", "Happiness", "Neutral", "Disgust", "Shame", "Guilt", "Confusion", "Desire", "Sarcasm"]
|
447 |
+
print(f"Predicted emotion for '{text}': {labels[predicted_class]}")
|
448 |
```
|
449 |
3. **Deploy**: Export the fine-tuned model to ONNX or TensorFlow Lite for edge devices.
|
450 |
|