boltuix
/

bert-emotion

Model card Files Files and versions Community

boltuix commited on 16 days ago

Commit

1212e0e

verified ·

1 Parent(s): 87b003a

Update README.md

Browse files

Files changed (1) hide show

README.md +80 -80

README.md CHANGED Viewed

@@ -365,86 +365,86 @@ To adapt BERT-Emotion for custom emotion detection tasks (e.g., specific chatbot
 1. **Prepare Dataset**: Collect labeled data with 13 emotion categories.
 2. **Fine-Tune with Hugging Face**:
    ```python
-      # !pip install transformers datasets torch --upgrade
-      import torch
-      from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
-      from datasets import Dataset
-      import pandas as pd
-      # 1. Prepare the sample emotion dataset
-      data = {
-          "text": [
-              "I love you so much!",
-              "This is absolutely disgusting!",
-              "I'm so happy with my new phone!",
-              "Why does this always break?",
-              "I feel so alone right now."
-          ],
-          "label": [2, 7, 5, 1, 0]  # Emotions: 0 to 12
-      }
-      df = pd.DataFrame(data)
-      dataset = Dataset.from_pandas(df)
-      # 2. Load tokenizer and model
-      model_name = "boltuix/bert-emotion"
-      tokenizer = BertTokenizer.from_pretrained(model_name)
-      model = BertForSequenceClassification.from_pretrained(model_name, num_labels=13)
-      # 3. Tokenize the dataset
-      def tokenize_function(examples):
-          return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)
-      tokenized_dataset = dataset.map(tokenize_function, batched=True)
-      # 4. Manually convert all fields to PyTorch tensors (NumPy 2.0 safe)
-      def to_torch_format(example):
-          return {
-              "input_ids": torch.tensor(example["input_ids"]),
-              "attention_mask": torch.tensor(example["attention_mask"]),
-              "label": torch.tensor(example["label"])
-          }
-      tokenized_dataset = tokenized_dataset.map(to_torch_format)
-      # 5. Define training arguments
-      training_args = TrainingArguments(
-          output_dir="./bert_emotion_results",
-          num_train_epochs=5,
-          per_device_train_batch_size=2,
-          logging_dir="./bert_emotion_logs",
-          logging_steps=10,
-          save_steps=100,
-          eval_strategy="no",
-          learning_rate=3e-5,
-          report_to="none"  # Disable W&B auto-logging if not needed
-      )
-      # 6. Initialize Trainer
-      trainer = Trainer(
-          model=model,
-          args=training_args,
-          train_dataset=tokenized_dataset,
-      )
-      # 7. Fine-tune the model
-      trainer.train()
-      # 8. Save the fine-tuned model
-      model.save_pretrained("./fine_tuned_bert_emotion")
-      tokenizer.save_pretrained("./fine_tuned_bert_emotion")
-      # 9. Example inference
-      text = "I'm thrilled with the update!"
-      inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
-      model.eval()
-      with torch.no_grad():
-          outputs = model(**inputs)
-          logits = outputs.logits
-          predicted_class = torch.argmax(logits, dim=1).item()
-      labels = ["Sadness", "Anger", "Love", "Surprise", "Fear", "Happiness", "Neutral", "Disgust", "Shame", "Guilt", "Confusion", "Desire", "Sarcasm"]
-      print(f"Predicted emotion for '{text}': {labels[predicted_class]}")
    ```
 3. **Deploy**: Export the fine-tuned model to ONNX or TensorFlow Lite for edge devices.

 1. **Prepare Dataset**: Collect labeled data with 13 emotion categories.
 2. **Fine-Tune with Hugging Face**:
    ```python
+    # !pip install transformers datasets torch --upgrade
+    import torch
+    from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
+    from datasets import Dataset
+    import pandas as pd
+    # 1. Prepare the sample emotion dataset
+    data = {
+        "text": [
+            "I love you so much!",
+            "This is absolutely disgusting!",
+            "I'm so happy with my new phone!",
+            "Why does this always break?",
+            "I feel so alone right now."
+        ],
+        "label": [2, 7, 5, 1, 0]  # Emotions: 0 to 12
+    }
+    df = pd.DataFrame(data)
+    dataset = Dataset.from_pandas(df)
+    # 2. Load tokenizer and model
+    model_name = "boltuix/bert-emotion"
+    tokenizer = BertTokenizer.from_pretrained(model_name)
+    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=13)
+    # 3. Tokenize the dataset
+    def tokenize_function(examples):
+        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)
+    tokenized_dataset = dataset.map(tokenize_function, batched=True)
+    # 4. Manually convert all fields to PyTorch tensors (NumPy 2.0 safe)
+    def to_torch_format(example):
+        return {
+            "input_ids": torch.tensor(example["input_ids"]),
+            "attention_mask": torch.tensor(example["attention_mask"]),
+            "label": torch.tensor(example["label"])
+        }
+    tokenized_dataset = tokenized_dataset.map(to_torch_format)
+    # 5. Define training arguments
+    training_args = TrainingArguments(
+        output_dir="./bert_emotion_results",
+        num_train_epochs=5,
+        per_device_train_batch_size=2,
+        logging_dir="./bert_emotion_logs",
+        logging_steps=10,
+        save_steps=100,
+        eval_strategy="no",
+        learning_rate=3e-5,
+        report_to="none"  # Disable W&B auto-logging if not needed
+    )
+    # 6. Initialize Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_dataset,
+    )
+    # 7. Fine-tune the model
+    trainer.train()
+    # 8. Save the fine-tuned model
+    model.save_pretrained("./fine_tuned_bert_emotion")
+    tokenizer.save_pretrained("./fine_tuned_bert_emotion")
+    # 9. Example inference
+    text = "I'm thrilled with the update!"
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
+    model.eval()
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+        predicted_class = torch.argmax(logits, dim=1).item()
+    labels = ["Sadness", "Anger", "Love", "Surprise", "Fear", "Happiness", "Neutral", "Disgust", "Shame", "Guilt", "Confusion", "Desire", "Sarcasm"]
+    print(f"Predicted emotion for '{text}': {labels[predicted_class]}")
    ```
 3. **Deploy**: Export the fine-tuned model to ONNX or TensorFlow Lite for edge devices.