Upload fine-tuned model, tokenizer, and supporting files for modernbert-imdb-sentiment

Browse files

Files changed (11) hide show

README.md +46 -3
classifiers.py +141 -0
config.json +45 -0
config.yaml +12 -0
inference.py +79 -0
models.py +172 -0
pytorch_model.bin +3 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +945 -0
train_utils.py +156 -0

README.md CHANGED Viewed

@@ -20,9 +20,52 @@ Fine-tuned ModernBERT model for sentiment analysis on IMDb movie reviews. Achiev
 ```python
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
-model = AutoModelForSequenceClassification.from_pretrained("{HF_USERNAME}/{MODEL_NAME}")
-tokenizer = AutoTokenizer.from_pretrained("{HF_USERNAME}/{MODEL_NAME}")
 # Input processing
 inputs = tokenizer("This movie was fantastic!", return_tensors="pt")
-outputs = model(**inputs)

 ```python
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
+model = AutoModelForSequenceClassification.from_pretrained("voxmenthe/modernbert-imdb-sentiment")
+tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
 # Input processing
 inputs = tokenizer("This movie was fantastic!", return_tensors="pt")
+outputs = model(**inputs)
+# Get the predicted class
+predicted_class_id = outputs.logits.argmax().item()
+# Convert class ID to label
+predicted_label = model.config.id2label[predicted_class_id]
+print(f"Predicted label: {predicted_label}")
+```
+## Model Card
+### Model Details
+- **Model Name**: ModernBERT IMDb Sentiment Analysis
+- **Base Model**: answerdotai/ModernBERT-base
+- **Task**: Sentiment Analysis
+- **Dataset**: IMDb Movie Reviews
+- **Training Epochs**: 5
+### Model Performance
+- **Test Accuracy**: 95.75%
+- **Test F1 Score**: 95.75%
+### Model Architecture
+- **Base Model**: answerdotai/ModernBERT-base
+- **Task-Specific Head**: ClassifierHead (from `classifiers.py`)
+- **Number of Labels**: 2 (Positive, Negative)
+### Model Inference
+- **Input Format**: Text (single review)
+- **Output Format**: Predicted sentiment label (Positive or Negative)
+### Model Version
+- **Version**: 1.0
+- **Date**: 2025-05-07
+### Model License
+- **License**: MIT License
+### Model Contact
+- **Contact**: [email protected]
+### Model Citation
+- **Citation**: voxmenthe/modernbert-imdb-sentiment

classifiers.py ADDED Viewed

	@@ -0,0 +1,141 @@

+from torch import nn
+import torch
+class ClassifierHead(nn.Module):
+    """Basically a fancy MLP: 3-layer classifier head with GELU, LayerNorm, and Skip Connections."""
+    def __init__(self, hidden_size, num_labels, dropout_prob):
+        super().__init__()
+        # Layer 1
+        self.dense1 = nn.Linear(hidden_size, hidden_size)
+        self.norm1 = nn.LayerNorm(hidden_size)
+        self.activation = nn.GELU()
+        self.dropout1 = nn.Dropout(dropout_prob)
+        # Layer 2
+        self.dense2 = nn.Linear(hidden_size, hidden_size)
+        self.norm2 = nn.LayerNorm(hidden_size)
+        self.dropout2 = nn.Dropout(dropout_prob)
+        # Output Layer
+        self.out_proj = nn.Linear(hidden_size, num_labels)
+    def forward(self, features):
+        # Layer 1
+        identity1 = features
+        x = self.norm1(features)
+        x = self.dense1(x)
+        x = self.activation(x)
+        x = self.dropout1(x)
+        x = x + identity1 # skip connection
+        # Layer 2
+        identity2 = x
+        x = self.norm2(x)
+        x = self.dense2(x)
+        x = self.activation(x)
+        x = self.dropout2(x)
+        x = x + identity2 # skip connection
+        # Output Layer
+        logits = self.out_proj(x)
+        return logits
+class ConcatClassifierHead(nn.Module):
+    """
+    An enhanced classifier head designed for concatenated CLS + Mean Pooling input.
+    Includes an initial projection layer before the standard enhanced block.
+    """
+    def __init__(self, input_size, hidden_size, num_labels, dropout_prob):
+        super().__init__()
+        # Initial projection from concatenated size (2*hidden) down to hidden_size
+        self.initial_projection = nn.Linear(input_size, hidden_size)
+        self.initial_norm = nn.LayerNorm(hidden_size) # Norm after projection
+        self.initial_activation = nn.GELU()
+        self.initial_dropout = nn.Dropout(dropout_prob)
+        # Layer 1
+        self.dense1 = nn.Linear(hidden_size, hidden_size)
+        self.norm1 = nn.LayerNorm(hidden_size)
+        self.activation = nn.GELU()
+        self.dropout1 = nn.Dropout(dropout_prob)
+        # Layer 2
+        self.dense2 = nn.Linear(hidden_size, hidden_size)
+        self.norm2 = nn.LayerNorm(hidden_size)
+        self.dropout2 = nn.Dropout(dropout_prob)
+        # Output Layer
+        self.out_proj = nn.Linear(hidden_size, num_labels)
+    def forward(self, features):
+        # Initial Projection Step
+        x = self.initial_projection(features)
+        x = self.initial_norm(x)
+        x = self.initial_activation(x)
+        x = self.initial_dropout(x)
+        # x should now be of shape (batch_size, hidden_size)
+        # Layer 1 + Skip
+        identity1 = x # Skip connection starts after initial projection
+        x_res = self.norm1(x)
+        x_res = self.dense1(x_res)
+        x_res = self.activation(x_res)
+        x_res = self.dropout1(x_res)
+        x = x + x_res # skip connection
+        # Layer 2 + Skip
+        identity2 = x
+        x_res = self.norm2(x)
+        x_res = self.dense2(x_res)
+        x_res = self.activation(x_res)
+        x_res = self.dropout2(x_res)
+        x = x + x_res # skip connection
+        # Output Layer
+        logits = self.out_proj(x)
+        return logits
+# ExpansionClassifierHead currently not used
+class ExpansionClassifierHead(nn.Module):
+    """
+    A classifier head using FFN-style expansion (input -> 4*hidden -> hidden -> labels).
+    Takes concatenated CLS + Mean Pooled features as input.
+    """
+    def __init__(self, input_size, hidden_size, num_labels, dropout_prob):
+        super().__init__()
+        intermediate_size = hidden_size * 4 # FFN expansion factor
+        # Layer 1 (Expansion)
+        self.norm1 = nn.LayerNorm(input_size)
+        self.dense1 = nn.Linear(input_size, intermediate_size)
+        self.activation = nn.GELU()
+        self.dropout1 = nn.Dropout(dropout_prob)
+        # Layer 2 (Projection back down)
+        self.norm2 = nn.LayerNorm(intermediate_size)
+        self.dense2 = nn.Linear(intermediate_size, hidden_size)
+        # Activation and Dropout applied after projection
+        self.dropout2 = nn.Dropout(dropout_prob)
+        # Output Layer
+        self.out_proj = nn.Linear(hidden_size, num_labels)
+    def forward(self, features):
+        # Layer 1
+        x = self.norm1(features)
+        x = self.dense1(x)
+        x = self.activation(x)
+        x = self.dropout1(x)
+        # Layer 2
+        x = self.norm2(x)
+        x = self.dense2(x)
+        x = self.activation(x)
+        x = self.dropout2(x)
+        # Output Layer
+        logits = self.out_proj(x)
+        return logits

config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "architectures": [
+    "ModernBertForMaskedLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 50281,
+  "classifier_activation": "gelu",
+  "classifier_bias": false,
+  "classifier_dropout": 0.0,
+  "classifier_pooling": "mean",
+  "cls_token_id": 50281,
+  "decoder_bias": true,
+  "deterministic_flash_attn": false,
+  "embedding_dropout": 0.0,
+  "eos_token_id": 50282,
+  "global_attn_every_n_layers": 3,
+  "global_rope_theta": 160000.0,
+  "gradient_checkpointing": false,
+  "hidden_activation": "gelu",
+  "hidden_size": 768,
+  "initializer_cutoff_factor": 2.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 1152,
+  "layer_norm_eps": 1e-05,
+  "local_attention": 128,
+  "local_rope_theta": 10000.0,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "mlp_dropout": 0.0,
+  "model_type": "modernbert",
+  "norm_bias": false,
+  "norm_eps": 1e-05,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 22,
+  "pad_token_id": 50283,
+  "position_embedding_type": "absolute",
+  "repad_logits_with_grad": false,
+  "sep_token_id": 50282,
+  "sparse_pred_ignore_index": -100,
+  "sparse_prediction": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.51.3",
+  "vocab_size": 50368
+}

config.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+model:
+  name: "voxmenthe/modernbert-imdb-sentiment"
+  output_dir: "checkpoints"
+  max_length: 880 # 256
+  dropout: 0.1
+  pooling_strategy: "mean" # Current default, change as needed
+inference:
+  # Default path, can be overridden
+  model_path: "checkpoints/mean_epoch5_0.9575acc_0.9575f1.pt"
+  # Using the same max_length as training for consistency
+  max_length: 880 # 256

inference.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from models import ModernBertForSentiment
+from transformers import ModernBertConfig
+from typing import Dict, Any
+import yaml
+import os
+class SentimentInference:
+    def __init__(self, config_path: str = "config.yaml"):
+        """Load configuration and initialize model and tokenizer."""
+        with open(config_path, 'r') as f:
+            config = yaml.safe_load(f)
+        model_cfg = config.get('model', {})
+        inference_cfg = config.get('inference', {})
+        # Path to the .pt model weights file
+        model_weights_path = inference_cfg.get('model_path',
+                                             os.path.join(model_cfg.get('output_dir', 'checkpoints'), 'best_model.pt'))
+        # Base model name from config (e.g., 'answerdotai/ModernBERT-base')
+        # This will be used for loading both tokenizer and base BERT config from Hugging Face Hub
+        base_model_name = model_cfg.get('name', 'answerdotai/ModernBERT-base')
+        self.max_length = inference_cfg.get('max_length', model_cfg.get('max_length', 256))
+        # Load tokenizer from the base model name (e.g., from Hugging Face Hub)
+        print(f"Loading tokenizer from: {base_model_name}")
+        self.tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+        # Load base BERT config from the base model name
+        print(f"Loading ModernBertConfig from: {base_model_name}")
+        bert_config = ModernBertConfig.from_pretrained(base_model_name)
+        # --- Apply any necessary overrides from your config to the loaded bert_config ---
+        # For example, if your ModernBertForSentiment expects specific config values beyond the base BERT model.
+        # Your current ModernBertForSentiment takes the entire config object, which might implicitly carry these.
+        # However, explicitly setting them on bert_config loaded from HF is safer if they are architecturally relevant.
+        bert_config.classifier_dropout = model_cfg.get('dropout', bert_config.classifier_dropout) # Example
+        # Ensure num_labels is set if your inference model needs it (usually for HF pipeline, less so for manual predict)
+        # bert_config.num_labels = model_cfg.get('num_labels', 1) # Typically 1 for binary sentiment regression-style output
+        # It's also important that pooling_strategy and num_weighted_layers are set on the config object
+        # that ModernBertForSentiment receives, as it uses these to build its layers.
+        # These are usually fine-tuning specific, not part of the base HF config, so they should come from your model_cfg.
+        bert_config.pooling_strategy = model_cfg.get('pooling_strategy', 'cls')
+        bert_config.num_weighted_layers = model_cfg.get('num_weighted_layers', 4)
+        bert_config.loss_function = model_cfg.get('loss_function', {'name': 'SentimentWeightedLoss', 'params': {}}) # Needed by model init
+        # Ensure num_labels is explicitly set for the model's classifier head
+        bert_config.num_labels = 1 # For sentiment (positive/negative) often treated as 1 logit output
+        print("Instantiating ModernBertForSentiment model structure...")
+        self.model = ModernBertForSentiment(bert_config)
+        print(f"Loading model weights from local checkpoint: {model_weights_path}")
+        # Load the entire checkpoint dictionary first
+        checkpoint = torch.load(model_weights_path, map_location=torch.device('cpu'))
+        # Extract the model_state_dict from the checkpoint
+        # This handles the case where the checkpoint saves more than just the model weights (e.g., optimizer state, epoch)
+        if 'model_state_dict' in checkpoint:
+            model_state_to_load = checkpoint['model_state_dict']
+        else:
+            # If the checkpoint is just the state_dict itself (older format or different saving convention)
+            model_state_to_load = checkpoint
+        self.model.load_state_dict(model_state_to_load)
+        self.model.eval()
+        print("Model loaded successfully.")
+    def predict(self, text: str) -> Dict[str, Any]:
+        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length)
+        with torch.no_grad():
+            outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
+        logits = outputs["logits"]
+        prob = torch.sigmoid(logits).item()
+        return {"sentiment": "positive" if prob > 0.5 else "negative", "confidence": prob}

models.py ADDED Viewed

	@@ -0,0 +1,172 @@

+from transformers import ModernBertModel, ModernBertPreTrainedModel
+from transformers.modeling_outputs import SequenceClassifierOutput
+from torch import nn
+import torch
+from train_utils import SentimentWeightedLoss, SentimentFocalLoss
+import torch.nn.functional as F
+from classifiers import ClassifierHead, ConcatClassifierHead
+class ModernBertForSentiment(ModernBertPreTrainedModel):
+    """ModernBERT encoder with a dynamically configurable classification head and pooling strategy."""
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.bert = ModernBertModel(config) # Base BERT model, config may have output_hidden_states=True
+        # Store pooling strategy from config
+        self.pooling_strategy = getattr(config, 'pooling_strategy', 'mean')
+        self.num_weighted_layers = getattr(config, 'num_weighted_layers', 4)
+        if self.pooling_strategy in ['weighted_layer', 'cls_weighted_concat'] and not config.output_hidden_states:
+            # This check is more of an assertion; train.py should set output_hidden_states=True
+            raise ValueError(
+                "output_hidden_states must be True in BertConfig for weighted_layer pooling."
+            )
+        # Initialize weights for weighted layer pooling
+        if self.pooling_strategy in ['weighted_layer', 'cls_weighted_concat']:
+            # num_weighted_layers specifies how many *top* layers of BERT to use.
+            # If num_weighted_layers is e.g. 4, we use the last 4 layers.
+            self.layer_weights = nn.Parameter(torch.ones(self.num_weighted_layers) / self.num_weighted_layers)
+        # Determine classifier input size and choose head
+        classifier_input_size = config.hidden_size
+        if self.pooling_strategy in ['cls_mean_concat', 'cls_weighted_concat']:
+            classifier_input_size = config.hidden_size * 2
+        # Dropout for features fed into the classifier head
+        classifier_dropout_prob = (
+            config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
+        )
+        self.features_dropout = nn.Dropout(classifier_dropout_prob)
+        # Select the appropriate classifier head based on input feature dimension
+        if classifier_input_size == config.hidden_size:
+            self.classifier = ClassifierHead(
+                hidden_size=config.hidden_size, # input_size for ClassifierHead is just hidden_size
+                num_labels=config.num_labels,
+                dropout_prob=classifier_dropout_prob
+            )
+        elif classifier_input_size == config.hidden_size * 2:
+            self.classifier = ConcatClassifierHead(
+                input_size=config.hidden_size * 2,
+                hidden_size=config.hidden_size, # Internal hidden size of the head
+                num_labels=config.num_labels,
+                dropout_prob=classifier_dropout_prob
+            )
+        else:
+            # This case should ideally not be reached with current strategies
+            raise ValueError(f"Unexpected classifier_input_size: {classifier_input_size}")
+        # Initialize loss function based on config
+        loss_config = getattr(config, 'loss_function', {'name': 'SentimentWeightedLoss', 'params': {}})
+        loss_name = loss_config.get('name', 'SentimentWeightedLoss')
+        loss_params = loss_config.get('params', {})
+        if loss_name == "SentimentWeightedLoss":
+            self.loss_fct = SentimentWeightedLoss() # SentimentWeightedLoss takes no arguments
+        elif loss_name == "SentimentFocalLoss":
+            # Ensure only relevant params are passed, or that loss_params is structured correctly for SentimentFocalLoss
+            # For SentimentFocalLoss, expected params are 'gamma_focal' and 'label_smoothing_epsilon'
+            self.loss_fct = SentimentFocalLoss(**loss_params)
+        else:
+            raise ValueError(f"Unsupported loss function: {loss_name}")
+        self.post_init() # Initialize weights and apply final processing
+    def _mean_pool(self, last_hidden_state, attention_mask):
+        if attention_mask is None:
+            attention_mask = torch.ones_like(last_hidden_state[:, :, 0]) # Assuming first dim of last hidden state is token ids
+        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
+        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
+        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
+        return sum_embeddings / sum_mask
+    def _weighted_layer_pool(self, all_hidden_states):
+        # all_hidden_states includes embeddings + output of each layer.
+        # We want the outputs of the last num_weighted_layers.
+        # Example: 12 layers -> all_hidden_states have 13 items (embeddings + 12 layers)
+        # num_weighted_layers = 4 -> use layers 9, 10, 11, 12 (indices -4, -3, -2, -1)
+        layers_to_weigh = torch.stack(all_hidden_states[-self.num_weighted_layers:], dim=0)
+        # layers_to_weigh shape: (num_weighted_layers, batch_size, sequence_length, hidden_size)
+        # Normalize weights to sum to 1 (softmax or simple division)
+        normalized_weights = F.softmax(self.layer_weights, dim=-1)
+        # Weighted sum across layers
+        # Reshape weights for broadcasting: (num_weighted_layers, 1, 1, 1)
+        weighted_hidden_states = layers_to_weigh * normalized_weights.view(-1, 1, 1, 1)
+        weighted_sum_hidden_states = torch.sum(weighted_hidden_states, dim=0)
+        # weighted_sum_hidden_states shape: (batch_size, sequence_length, hidden_size)
+        # Pool the result (e.g., take [CLS] token of this weighted sum)
+        return weighted_sum_hidden_states[:, 0] # Return CLS token of the weighted sum
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        labels=None,
+        lengths=None,
+        return_dict=None,
+        **kwargs
+    ):
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        bert_outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            return_dict=return_dict,
+            output_hidden_states=self.config.output_hidden_states # Controlled by train.py
+        )
+        last_hidden_state = bert_outputs[0] # Or bert_outputs.last_hidden_state
+        pooled_features = None
+        if self.pooling_strategy == 'cls':
+            pooled_features = last_hidden_state[:, 0] # CLS token
+        elif self.pooling_strategy == 'mean':
+            pooled_features = self._mean_pool(last_hidden_state, attention_mask)
+        elif self.pooling_strategy == 'cls_mean_concat':
+            cls_output = last_hidden_state[:, 0]
+            mean_output = self._mean_pool(last_hidden_state, attention_mask)
+            pooled_features = torch.cat((cls_output, mean_output), dim=1)
+        elif self.pooling_strategy == 'weighted_layer':
+            if not self.config.output_hidden_states or bert_outputs.hidden_states is None:
+                raise ValueError("Weighted layer pooling requires output_hidden_states=True and hidden_states in BERT output.")
+            all_hidden_states = bert_outputs.hidden_states
+            pooled_features = self._weighted_layer_pool(all_hidden_states)
+        elif self.pooling_strategy == 'cls_weighted_concat':
+            if not self.config.output_hidden_states or bert_outputs.hidden_states is None:
+                raise ValueError("Weighted layer pooling requires output_hidden_states=True and hidden_states in BERT output.")
+            cls_output = last_hidden_state[:, 0]
+            all_hidden_states = bert_outputs.hidden_states
+            weighted_output = self._weighted_layer_pool(all_hidden_states)
+            pooled_features = torch.cat((cls_output, weighted_output), dim=1)
+        else:
+            raise ValueError(f"Unknown pooling_strategy: {self.pooling_strategy}")
+        pooled_features = self.features_dropout(pooled_features)
+        logits = self.classifier(pooled_features)
+        loss = None
+        if labels is not None:
+            if lengths is None:
+                raise ValueError("lengths must be provided when labels are specified for loss calculation.")
+            loss = self.loss_fct(logits.squeeze(-1), labels, lengths)
+        if not return_dict:
+            # Ensure 'outputs' from BERT is appropriately handled. If it's a tuple:
+            bert_model_outputs = bert_outputs[1:] if isinstance(bert_outputs, tuple) else (bert_outputs.hidden_states, bert_outputs.attentions)
+            output = (logits,) + bert_model_outputs
+            return ((loss,) + output) if loss is not None else output
+        return SequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=bert_outputs.hidden_states,
+            attentions=bert_outputs.attentions,
+        )

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c95a2ef6b7a06191e4db8fe7f5975f7c8228ec9754d5222ffb3984b6b48010a
+size 1802582665

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,945 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "|||IP_ADDRESS|||",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "1": {
+      "content": "<|padding|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50254": {
+      "content": "                        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50255": {
+      "content": "                       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50256": {
+      "content": "                      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50257": {
+      "content": "                     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50258": {
+      "content": "                    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50259": {
+      "content": "                   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50260": {
+      "content": "                  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50261": {
+      "content": "                 ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50262": {
+      "content": "                ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50263": {
+      "content": "               ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50264": {
+      "content": "              ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50265": {
+      "content": "             ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50266": {
+      "content": "            ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50267": {
+      "content": "           ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50268": {
+      "content": "          ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50269": {
+      "content": "         ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50270": {
+      "content": "        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50271": {
+      "content": "       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50272": {
+      "content": "      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50273": {
+      "content": "     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50274": {
+      "content": "    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50275": {
+      "content": "   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50276": {
+      "content": "  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50277": {
+      "content": "|||EMAIL_ADDRESS|||",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50278": {
+      "content": "|||PHONE_NUMBER|||",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50279": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50280": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50281": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50282": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50283": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50284": {
+      "content": "[MASK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50285": {
+      "content": "[unused0]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50286": {
+      "content": "[unused1]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50287": {
+      "content": "[unused2]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50288": {
+      "content": "[unused3]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50289": {
+      "content": "[unused4]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50290": {
+      "content": "[unused5]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50291": {
+      "content": "[unused6]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50292": {
+      "content": "[unused7]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50293": {
+      "content": "[unused8]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50294": {
+      "content": "[unused9]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50295": {
+      "content": "[unused10]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50296": {
+      "content": "[unused11]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50297": {
+      "content": "[unused12]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50298": {
+      "content": "[unused13]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50299": {
+      "content": "[unused14]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50300": {
+      "content": "[unused15]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50301": {
+      "content": "[unused16]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50302": {
+      "content": "[unused17]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50303": {
+      "content": "[unused18]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50304": {
+      "content": "[unused19]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50305": {
+      "content": "[unused20]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50306": {
+      "content": "[unused21]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50307": {
+      "content": "[unused22]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50308": {
+      "content": "[unused23]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50309": {
+      "content": "[unused24]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50310": {
+      "content": "[unused25]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50311": {
+      "content": "[unused26]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50312": {
+      "content": "[unused27]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50313": {
+      "content": "[unused28]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50314": {
+      "content": "[unused29]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50315": {
+      "content": "[unused30]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50316": {
+      "content": "[unused31]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50317": {
+      "content": "[unused32]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50318": {
+      "content": "[unused33]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50319": {
+      "content": "[unused34]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50320": {
+      "content": "[unused35]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50321": {
+      "content": "[unused36]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50322": {
+      "content": "[unused37]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50323": {
+      "content": "[unused38]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50324": {
+      "content": "[unused39]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50325": {
+      "content": "[unused40]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50326": {
+      "content": "[unused41]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50327": {
+      "content": "[unused42]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50328": {
+      "content": "[unused43]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50329": {
+      "content": "[unused44]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50330": {
+      "content": "[unused45]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50331": {
+      "content": "[unused46]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50332": {
+      "content": "[unused47]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50333": {
+      "content": "[unused48]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50334": {
+      "content": "[unused49]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50335": {
+      "content": "[unused50]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50336": {
+      "content": "[unused51]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50337": {
+      "content": "[unused52]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50338": {
+      "content": "[unused53]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50339": {
+      "content": "[unused54]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50340": {
+      "content": "[unused55]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50341": {
+      "content": "[unused56]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50342": {
+      "content": "[unused57]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50343": {
+      "content": "[unused58]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50344": {
+      "content": "[unused59]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50345": {
+      "content": "[unused60]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50346": {
+      "content": "[unused61]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50347": {
+      "content": "[unused62]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50348": {
+      "content": "[unused63]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50349": {
+      "content": "[unused64]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50350": {
+      "content": "[unused65]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50351": {
+      "content": "[unused66]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50352": {
+      "content": "[unused67]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50353": {
+      "content": "[unused68]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50354": {
+      "content": "[unused69]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50355": {
+      "content": "[unused70]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50356": {
+      "content": "[unused71]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50357": {
+      "content": "[unused72]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50358": {
+      "content": "[unused73]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50359": {
+      "content": "[unused74]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50360": {
+      "content": "[unused75]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50361": {
+      "content": "[unused76]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50362": {
+      "content": "[unused77]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50363": {
+      "content": "[unused78]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50364": {
+      "content": "[unused79]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50365": {
+      "content": "[unused80]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50366": {
+      "content": "[unused81]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50367": {
+      "content": "[unused82]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 8192,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "tokenizer_class": "PreTrainedTokenizer",
+  "unk_token": "[UNK]"
+}

train_utils.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import math
+from torch import nn
+import torch
+import torch.nn.functional as F
+class SentimentWeightedLoss(nn.Module):
+    """BCEWithLogits + dynamic weighting.
+    We weight each sample by:
+      • length_weight:  sqrt(num_tokens) / sqrt(max_tokens)
+      • confidence_weight: |sigmoid(logits) - 0.5|  (higher confidence ⇒ larger weight)
+    The two weights are combined multiplicatively then normalized.
+    """
+    def __init__(self):
+        super().__init__()
+        # Initialize BCE loss without reduction, since we're applying per-sample weights
+        self.bce = nn.BCEWithLogitsLoss(reduction="none")
+        self.min_len_weight_sqrt = 0.1  # Minimum length weight
+    def forward(self, logits, targets, lengths):
+        base_loss = self.bce(logits.view(-1), targets.float())  # shape [B]
+        prob = torch.sigmoid(logits.view(-1))
+        confidence_weight = (prob - 0.5).abs() * 2  # ∈ [0,1]
+        if lengths.numel() == 0:
+            # Handle empty batch: return 0.0 loss or mean of base_loss if it's also empty (becomes nan then)
+            # If base_loss on empty input is empty tensor, mean is nan. So return 0.0 is safer.
+            return torch.tensor(0.0, device=logits.device, requires_grad=logits.requires_grad)
+        length_weight = torch.sqrt(lengths.float()) / math.sqrt(lengths.max().item())
+        length_weight = length_weight.clamp(self.min_len_weight_sqrt, 1.0) # Clamp to avoid extreme weights
+        weights = confidence_weight * length_weight
+        weights = weights / (weights.mean() + 1e-8)  # normalize so E[w]=1
+        return (base_loss * weights).mean()
+class SentimentFocalLoss(nn.Module):
+    """
+    This loss function incorporates:
+    1. Base BCEWithLogitsLoss.
+    2. Label Smoothing.
+    3. Focal Loss modulation to focus more on hard examples (can be reversed to focus on easy examples).
+    4. Sample weighting based on review length.
+    5. Sample weighting based on prediction confidence.
+    The final loss for each sample is calculated roughly as:
+    Loss_sample = FocalModulator(pt, gamma) * BCE(logits, smoothed_targets) * NormalizedExternalWeight
+    NormalizedExternalWeight = (ConfidenceWeight * LengthWeight) / Mean(ConfidenceWeight * LengthWeight)
+    """
+    def __init__(self, gamma_focal: float = 0.1, label_smoothing_epsilon: float = 0.05):
+        """
+        Args:
+            gamma_focal (float): Gamma parameter for Focal Loss.
+                - If gamma_focal > 0 (e.g., 2.0), applies standard Focal Loss,
+                  down-weighting easy examples (focus on hard examples).
+                - If gamma_focal < 0 (e.g., -2.0), applies a reversed Focal Loss,
+                  down-weighting hard examples (focus on easy examples by up-weighting pt).
+                - If gamma_focal = 0, no Focal Loss modulation is applied.
+            label_smoothing_epsilon (float): Epsilon for label smoothing. (0.0 <= epsilon < 1.0)
+                - If 0.0, no label smoothing is applied. Converts hard labels (0, 1)
+                  to soft labels (epsilon, 1-epsilon).
+        """
+        super().__init__()
+        if not (0.0 <= label_smoothing_epsilon < 1.0):
+            raise ValueError("label_smoothing_epsilon must be between 0.0 and <1.0.")
+        self.gamma_focal = gamma_focal
+        self.label_smoothing_epsilon = label_smoothing_epsilon
+        # Initialize BCE loss without reduction, since we're applying per-sample weights
+        self.bce_loss_no_reduction = nn.BCEWithLogitsLoss(reduction="none")
+    def forward(self, logits: torch.Tensor, targets: torch.Tensor, lengths: torch.Tensor) -> torch.Tensor:
+        """
+        Computes the custom loss.
+        Args:
+            logits (torch.Tensor): Raw logits from the model. Expected shape [B] or [B, 1].
+            targets (torch.Tensor): Ground truth labels (0 or 1). Expected shape [B] or [B, 1].
+            lengths (torch.Tensor): Number of tokens in each review. Expected shape [B].
+        Returns:
+            torch.Tensor: The computed scalar loss.
+        """
+        B = logits.size(0)
+        if B == 0: # Handle empty batch case
+            return torch.tensor(0.0, device=logits.device, requires_grad=True)
+        logits_flat = logits.view(-1)
+        original_targets_flat = targets.view(-1).float() # Ensure targets are float
+        # 1. Label Smoothing
+        if self.label_smoothing_epsilon > 0:
+            # Smooth 1 to (1 - epsilon), and 0 to epsilon
+            targets_for_bce = original_targets_flat * (1.0 - self.label_smoothing_epsilon) + \
+                              (1.0 - original_targets_flat) * self.label_smoothing_epsilon
+        else:
+            targets_for_bce = original_targets_flat
+        # 2. Calculate Base BCE loss terms (using potentially smoothed targets)
+        base_bce_loss_terms = self.bce_loss_no_reduction(logits_flat, targets_for_bce)
+        # 3. Focal Loss Modulation Component
+        # For the focal modulator, 'pt' is the probability assigned by the model to the *original* ground truth class.
+        probs = torch.sigmoid(logits_flat)
+        # pt: probability of the original true class
+        pt = torch.where(original_targets_flat.bool(), probs, 1.0 - probs)
+        focal_modulator = torch.ones_like(pt) # Default to 1 (no modulation if gamma_focal is 0)
+        if self.gamma_focal > 0:  # Standard Focal Loss: (1-pt)^gamma. Focus on hard examples (pt is small).
+            focal_modulator = (1.0 - pt + 1e-8).pow(self.gamma_focal) # Epsilon for stability if pt is 1
+        elif self.gamma_focal < 0:  # Reversed Focal: (pt)^|gamma|. Focus on easy examples (pt is large).
+            focal_modulator = (pt + 1e-8).pow(abs(self.gamma_focal)) # Epsilon for stability if pt is 0
+        modulated_loss_terms = focal_modulator * base_bce_loss_terms
+        # 4. Confidence Weighting (based on how far probability is from 0.5)
+        # Uses the same `probs` calculated for focal `pt`.
+        confidence_w = (probs - 0.5).abs() * 2.0  # Scales to range [0, 1]
+        # 5. Length Weighting (longer reviews potentially weighted more)
+        lengths_flat = lengths.view(-1).float()
+        max_len_in_batch = lengths_flat.max().item()
+        if max_len_in_batch == 0: # Edge case: if all reviews in batch have 0 length
+            length_w = torch.ones_like(lengths_flat)
+        else:
+            # Normalize by sqrt of max length in the current batch. Add epsilon for stability.
+            length_w = torch.sqrt(lengths_flat) / (math.sqrt(max_len_in_batch) + 1e-8)
+            length_w = torch.clamp(length_w, 0.0, 1.0) # Ensure weights are capped at 1
+        # 6. Combine External Weights (Confidence and Length)
+        # These weights are applied ON TOP of the focal-modulated loss terms.
+        external_weights = confidence_w * length_w
+        # Normalize these combined external_weights so their mean is approximately 1.
+        # This prevents the weighting scheme from drastically changing the overall loss magnitude.
+        if external_weights.sum() > 1e-8: # Avoid division by zero if all weights are zero
+             normalized_external_weights = external_weights / (external_weights.mean() + 1e-8)
+        else: # If all external weights are zero, use ones to not nullify the loss.
+             normalized_external_weights = torch.ones_like(external_weights)
+        # 7. Apply Normalized External Weights to the (Focal) Modulated Loss Terms
+        final_loss_terms_per_sample = modulated_loss_terms * normalized_external_weights
+        # 8. Final Reduction: Mean of the per-sample losses
+        loss = final_loss_terms_per_sample.mean()
+        return loss