Upload fine-tuned model, tokenizer, and supporting files for modernbert-imdb-sentiment
Browse files- README.md +46 -3
- classifiers.py +141 -0
- config.json +45 -0
- config.yaml +12 -0
- inference.py +79 -0
- models.py +172 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +945 -0
- train_utils.py +156 -0
    	
        README.md
    CHANGED
    
    | @@ -20,9 +20,52 @@ Fine-tuned ModernBERT model for sentiment analysis on IMDb movie reviews. Achiev | |
| 20 | 
             
            ```python
         | 
| 21 | 
             
            from transformers import AutoModelForSequenceClassification, AutoTokenizer
         | 
| 22 |  | 
| 23 | 
            -
            model = AutoModelForSequenceClassification.from_pretrained(" | 
| 24 | 
            -
            tokenizer = AutoTokenizer.from_pretrained(" | 
| 25 |  | 
| 26 | 
             
            # Input processing
         | 
| 27 | 
             
            inputs = tokenizer("This movie was fantastic!", return_tensors="pt")
         | 
| 28 | 
            -
            outputs = model(**inputs)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 20 | 
             
            ```python
         | 
| 21 | 
             
            from transformers import AutoModelForSequenceClassification, AutoTokenizer
         | 
| 22 |  | 
| 23 | 
            +
            model = AutoModelForSequenceClassification.from_pretrained("voxmenthe/modernbert-imdb-sentiment")
         | 
| 24 | 
            +
            tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base")
         | 
| 25 |  | 
| 26 | 
             
            # Input processing
         | 
| 27 | 
             
            inputs = tokenizer("This movie was fantastic!", return_tensors="pt")
         | 
| 28 | 
            +
            outputs = model(**inputs)
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            # Get the predicted class
         | 
| 31 | 
            +
            predicted_class_id = outputs.logits.argmax().item()
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            # Convert class ID to label
         | 
| 34 | 
            +
            predicted_label = model.config.id2label[predicted_class_id]
         | 
| 35 | 
            +
            print(f"Predicted label: {predicted_label}")
         | 
| 36 | 
            +
            ```
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            ## Model Card
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            ### Model Details
         | 
| 41 | 
            +
            - **Model Name**: ModernBERT IMDb Sentiment Analysis
         | 
| 42 | 
            +
            - **Base Model**: answerdotai/ModernBERT-base
         | 
| 43 | 
            +
            - **Task**: Sentiment Analysis
         | 
| 44 | 
            +
            - **Dataset**: IMDb Movie Reviews
         | 
| 45 | 
            +
            - **Training Epochs**: 5
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            ### Model Performance
         | 
| 48 | 
            +
            - **Test Accuracy**: 95.75%
         | 
| 49 | 
            +
            - **Test F1 Score**: 95.75%
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            ### Model Architecture
         | 
| 52 | 
            +
            - **Base Model**: answerdotai/ModernBERT-base
         | 
| 53 | 
            +
            - **Task-Specific Head**: ClassifierHead (from `classifiers.py`)
         | 
| 54 | 
            +
            - **Number of Labels**: 2 (Positive, Negative)
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            ### Model Inference
         | 
| 57 | 
            +
            - **Input Format**: Text (single review)
         | 
| 58 | 
            +
            - **Output Format**: Predicted sentiment label (Positive or Negative)
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            ### Model Version
         | 
| 61 | 
            +
            - **Version**: 1.0
         | 
| 62 | 
            +
            - **Date**: 2025-05-07
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            ### Model License
         | 
| 65 | 
            +
            - **License**: MIT License
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            ### Model Contact
         | 
| 68 | 
            +
            - **Contact**: [email protected]
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            ### Model Citation
         | 
| 71 | 
            +
            - **Citation**: voxmenthe/modernbert-imdb-sentiment
         | 
    	
        classifiers.py
    ADDED
    
    | @@ -0,0 +1,141 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from torch import nn
         | 
| 2 | 
            +
            import torch
         | 
| 3 | 
            +
             | 
| 4 | 
            +
             | 
| 5 | 
            +
            class ClassifierHead(nn.Module):
         | 
| 6 | 
            +
                """Basically a fancy MLP: 3-layer classifier head with GELU, LayerNorm, and Skip Connections."""
         | 
| 7 | 
            +
                def __init__(self, hidden_size, num_labels, dropout_prob):
         | 
| 8 | 
            +
                    super().__init__()
         | 
| 9 | 
            +
                    # Layer 1
         | 
| 10 | 
            +
                    self.dense1 = nn.Linear(hidden_size, hidden_size)
         | 
| 11 | 
            +
                    self.norm1 = nn.LayerNorm(hidden_size)
         | 
| 12 | 
            +
                    self.activation = nn.GELU()
         | 
| 13 | 
            +
                    self.dropout1 = nn.Dropout(dropout_prob)
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                    # Layer 2
         | 
| 16 | 
            +
                    self.dense2 = nn.Linear(hidden_size, hidden_size)
         | 
| 17 | 
            +
                    self.norm2 = nn.LayerNorm(hidden_size)
         | 
| 18 | 
            +
                    self.dropout2 = nn.Dropout(dropout_prob)
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                    # Output Layer
         | 
| 21 | 
            +
                    self.out_proj = nn.Linear(hidden_size, num_labels)
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def forward(self, features):
         | 
| 24 | 
            +
                    # Layer 1
         | 
| 25 | 
            +
                    identity1 = features
         | 
| 26 | 
            +
                    x = self.norm1(features)
         | 
| 27 | 
            +
                    x = self.dense1(x)
         | 
| 28 | 
            +
                    x = self.activation(x)
         | 
| 29 | 
            +
                    x = self.dropout1(x)
         | 
| 30 | 
            +
                    x = x + identity1 # skip connection
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                    # Layer 2
         | 
| 33 | 
            +
                    identity2 = x
         | 
| 34 | 
            +
                    x = self.norm2(x)
         | 
| 35 | 
            +
                    x = self.dense2(x)
         | 
| 36 | 
            +
                    x = self.activation(x)
         | 
| 37 | 
            +
                    x = self.dropout2(x)
         | 
| 38 | 
            +
                    x = x + identity2 # skip connection
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                    # Output Layer
         | 
| 41 | 
            +
                    logits = self.out_proj(x)
         | 
| 42 | 
            +
                    return logits
         | 
| 43 | 
            +
             | 
| 44 | 
            +
             | 
| 45 | 
            +
            class ConcatClassifierHead(nn.Module):
         | 
| 46 | 
            +
                """
         | 
| 47 | 
            +
                An enhanced classifier head designed for concatenated CLS + Mean Pooling input.
         | 
| 48 | 
            +
                Includes an initial projection layer before the standard enhanced block.
         | 
| 49 | 
            +
                """
         | 
| 50 | 
            +
                def __init__(self, input_size, hidden_size, num_labels, dropout_prob):
         | 
| 51 | 
            +
                    super().__init__()
         | 
| 52 | 
            +
                    # Initial projection from concatenated size (2*hidden) down to hidden_size
         | 
| 53 | 
            +
                    self.initial_projection = nn.Linear(input_size, hidden_size)
         | 
| 54 | 
            +
                    self.initial_norm = nn.LayerNorm(hidden_size) # Norm after projection
         | 
| 55 | 
            +
                    self.initial_activation = nn.GELU()
         | 
| 56 | 
            +
                    self.initial_dropout = nn.Dropout(dropout_prob)
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                    # Layer 1
         | 
| 59 | 
            +
                    self.dense1 = nn.Linear(hidden_size, hidden_size)
         | 
| 60 | 
            +
                    self.norm1 = nn.LayerNorm(hidden_size)
         | 
| 61 | 
            +
                    self.activation = nn.GELU()
         | 
| 62 | 
            +
                    self.dropout1 = nn.Dropout(dropout_prob)
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                    # Layer 2
         | 
| 65 | 
            +
                    self.dense2 = nn.Linear(hidden_size, hidden_size)
         | 
| 66 | 
            +
                    self.norm2 = nn.LayerNorm(hidden_size)
         | 
| 67 | 
            +
                    self.dropout2 = nn.Dropout(dropout_prob)
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                    # Output Layer
         | 
| 70 | 
            +
                    self.out_proj = nn.Linear(hidden_size, num_labels)
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                def forward(self, features):
         | 
| 73 | 
            +
                    # Initial Projection Step
         | 
| 74 | 
            +
                    x = self.initial_projection(features)
         | 
| 75 | 
            +
                    x = self.initial_norm(x)
         | 
| 76 | 
            +
                    x = self.initial_activation(x)
         | 
| 77 | 
            +
                    x = self.initial_dropout(x)
         | 
| 78 | 
            +
                    # x should now be of shape (batch_size, hidden_size)
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    # Layer 1 + Skip
         | 
| 81 | 
            +
                    identity1 = x # Skip connection starts after initial projection
         | 
| 82 | 
            +
                    x_res = self.norm1(x)
         | 
| 83 | 
            +
                    x_res = self.dense1(x_res)
         | 
| 84 | 
            +
                    x_res = self.activation(x_res)
         | 
| 85 | 
            +
                    x_res = self.dropout1(x_res)
         | 
| 86 | 
            +
                    x = x + x_res # skip connection
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                    # Layer 2 + Skip
         | 
| 89 | 
            +
                    identity2 = x
         | 
| 90 | 
            +
                    x_res = self.norm2(x)
         | 
| 91 | 
            +
                    x_res = self.dense2(x_res)
         | 
| 92 | 
            +
                    x_res = self.activation(x_res)
         | 
| 93 | 
            +
                    x_res = self.dropout2(x_res)
         | 
| 94 | 
            +
                    x = x + x_res # skip connection
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                    # Output Layer
         | 
| 97 | 
            +
                    logits = self.out_proj(x)
         | 
| 98 | 
            +
                    return logits
         | 
| 99 | 
            +
             | 
| 100 | 
            +
             | 
| 101 | 
            +
            # ExpansionClassifierHead currently not used
         | 
| 102 | 
            +
            class ExpansionClassifierHead(nn.Module):
         | 
| 103 | 
            +
                """
         | 
| 104 | 
            +
                A classifier head using FFN-style expansion (input -> 4*hidden -> hidden -> labels).
         | 
| 105 | 
            +
                Takes concatenated CLS + Mean Pooled features as input.
         | 
| 106 | 
            +
                """
         | 
| 107 | 
            +
                def __init__(self, input_size, hidden_size, num_labels, dropout_prob):
         | 
| 108 | 
            +
                    super().__init__()
         | 
| 109 | 
            +
                    intermediate_size = hidden_size * 4 # FFN expansion factor
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                    # Layer 1 (Expansion)
         | 
| 112 | 
            +
                    self.norm1 = nn.LayerNorm(input_size)
         | 
| 113 | 
            +
                    self.dense1 = nn.Linear(input_size, intermediate_size)
         | 
| 114 | 
            +
                    self.activation = nn.GELU()
         | 
| 115 | 
            +
                    self.dropout1 = nn.Dropout(dropout_prob)
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                    # Layer 2 (Projection back down)
         | 
| 118 | 
            +
                    self.norm2 = nn.LayerNorm(intermediate_size)
         | 
| 119 | 
            +
                    self.dense2 = nn.Linear(intermediate_size, hidden_size)
         | 
| 120 | 
            +
                    # Activation and Dropout applied after projection
         | 
| 121 | 
            +
                    self.dropout2 = nn.Dropout(dropout_prob)
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                    # Output Layer
         | 
| 124 | 
            +
                    self.out_proj = nn.Linear(hidden_size, num_labels)
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                def forward(self, features):
         | 
| 127 | 
            +
                    # Layer 1
         | 
| 128 | 
            +
                    x = self.norm1(features)
         | 
| 129 | 
            +
                    x = self.dense1(x)
         | 
| 130 | 
            +
                    x = self.activation(x)
         | 
| 131 | 
            +
                    x = self.dropout1(x)
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                    # Layer 2
         | 
| 134 | 
            +
                    x = self.norm2(x)
         | 
| 135 | 
            +
                    x = self.dense2(x)
         | 
| 136 | 
            +
                    x = self.activation(x)
         | 
| 137 | 
            +
                    x = self.dropout2(x)
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                    # Output Layer
         | 
| 140 | 
            +
                    logits = self.out_proj(x)
         | 
| 141 | 
            +
                    return logits
         | 
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,45 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "architectures": [
         | 
| 3 | 
            +
                "ModernBertForMaskedLM"
         | 
| 4 | 
            +
              ],
         | 
| 5 | 
            +
              "attention_bias": false,
         | 
| 6 | 
            +
              "attention_dropout": 0.0,
         | 
| 7 | 
            +
              "bos_token_id": 50281,
         | 
| 8 | 
            +
              "classifier_activation": "gelu",
         | 
| 9 | 
            +
              "classifier_bias": false,
         | 
| 10 | 
            +
              "classifier_dropout": 0.0,
         | 
| 11 | 
            +
              "classifier_pooling": "mean",
         | 
| 12 | 
            +
              "cls_token_id": 50281,
         | 
| 13 | 
            +
              "decoder_bias": true,
         | 
| 14 | 
            +
              "deterministic_flash_attn": false,
         | 
| 15 | 
            +
              "embedding_dropout": 0.0,
         | 
| 16 | 
            +
              "eos_token_id": 50282,
         | 
| 17 | 
            +
              "global_attn_every_n_layers": 3,
         | 
| 18 | 
            +
              "global_rope_theta": 160000.0,
         | 
| 19 | 
            +
              "gradient_checkpointing": false,
         | 
| 20 | 
            +
              "hidden_activation": "gelu",
         | 
| 21 | 
            +
              "hidden_size": 768,
         | 
| 22 | 
            +
              "initializer_cutoff_factor": 2.0,
         | 
| 23 | 
            +
              "initializer_range": 0.02,
         | 
| 24 | 
            +
              "intermediate_size": 1152,
         | 
| 25 | 
            +
              "layer_norm_eps": 1e-05,
         | 
| 26 | 
            +
              "local_attention": 128,
         | 
| 27 | 
            +
              "local_rope_theta": 10000.0,
         | 
| 28 | 
            +
              "max_position_embeddings": 8192,
         | 
| 29 | 
            +
              "mlp_bias": false,
         | 
| 30 | 
            +
              "mlp_dropout": 0.0,
         | 
| 31 | 
            +
              "model_type": "modernbert",
         | 
| 32 | 
            +
              "norm_bias": false,
         | 
| 33 | 
            +
              "norm_eps": 1e-05,
         | 
| 34 | 
            +
              "num_attention_heads": 12,
         | 
| 35 | 
            +
              "num_hidden_layers": 22,
         | 
| 36 | 
            +
              "pad_token_id": 50283,
         | 
| 37 | 
            +
              "position_embedding_type": "absolute",
         | 
| 38 | 
            +
              "repad_logits_with_grad": false,
         | 
| 39 | 
            +
              "sep_token_id": 50282,
         | 
| 40 | 
            +
              "sparse_pred_ignore_index": -100,
         | 
| 41 | 
            +
              "sparse_prediction": false,
         | 
| 42 | 
            +
              "torch_dtype": "float32",
         | 
| 43 | 
            +
              "transformers_version": "4.51.3",
         | 
| 44 | 
            +
              "vocab_size": 50368
         | 
| 45 | 
            +
            }
         | 
    	
        config.yaml
    ADDED
    
    | @@ -0,0 +1,12 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            model:
         | 
| 2 | 
            +
              name: "voxmenthe/modernbert-imdb-sentiment"
         | 
| 3 | 
            +
              output_dir: "checkpoints"
         | 
| 4 | 
            +
              max_length: 880 # 256
         | 
| 5 | 
            +
              dropout: 0.1
         | 
| 6 | 
            +
              pooling_strategy: "mean" # Current default, change as needed
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            inference:
         | 
| 9 | 
            +
              # Default path, can be overridden
         | 
| 10 | 
            +
              model_path: "checkpoints/mean_epoch5_0.9575acc_0.9575f1.pt" 
         | 
| 11 | 
            +
              # Using the same max_length as training for consistency
         | 
| 12 | 
            +
              max_length: 880 # 256
         | 
    	
        inference.py
    ADDED
    
    | @@ -0,0 +1,79 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import torch
         | 
| 2 | 
            +
            from transformers import AutoTokenizer, AutoModelForSequenceClassification
         | 
| 3 | 
            +
            from models import ModernBertForSentiment
         | 
| 4 | 
            +
            from transformers import ModernBertConfig
         | 
| 5 | 
            +
            from typing import Dict, Any
         | 
| 6 | 
            +
            import yaml
         | 
| 7 | 
            +
            import os
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 10 | 
            +
            class SentimentInference:
         | 
| 11 | 
            +
                def __init__(self, config_path: str = "config.yaml"):
         | 
| 12 | 
            +
                    """Load configuration and initialize model and tokenizer."""
         | 
| 13 | 
            +
                    with open(config_path, 'r') as f:
         | 
| 14 | 
            +
                        config = yaml.safe_load(f)
         | 
| 15 | 
            +
                    
         | 
| 16 | 
            +
                    model_cfg = config.get('model', {})
         | 
| 17 | 
            +
                    inference_cfg = config.get('inference', {})
         | 
| 18 | 
            +
                    
         | 
| 19 | 
            +
                    # Path to the .pt model weights file
         | 
| 20 | 
            +
                    model_weights_path = inference_cfg.get('model_path', 
         | 
| 21 | 
            +
                                                         os.path.join(model_cfg.get('output_dir', 'checkpoints'), 'best_model.pt'))
         | 
| 22 | 
            +
                    
         | 
| 23 | 
            +
                    # Base model name from config (e.g., 'answerdotai/ModernBERT-base')
         | 
| 24 | 
            +
                    # This will be used for loading both tokenizer and base BERT config from Hugging Face Hub
         | 
| 25 | 
            +
                    base_model_name = model_cfg.get('name', 'answerdotai/ModernBERT-base')
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    self.max_length = inference_cfg.get('max_length', model_cfg.get('max_length', 256))
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    # Load tokenizer from the base model name (e.g., from Hugging Face Hub)
         | 
| 30 | 
            +
                    print(f"Loading tokenizer from: {base_model_name}")
         | 
| 31 | 
            +
                    self.tokenizer = AutoTokenizer.from_pretrained(base_model_name)
         | 
| 32 | 
            +
                    
         | 
| 33 | 
            +
                    # Load base BERT config from the base model name
         | 
| 34 | 
            +
                    print(f"Loading ModernBertConfig from: {base_model_name}")
         | 
| 35 | 
            +
                    bert_config = ModernBertConfig.from_pretrained(base_model_name) 
         | 
| 36 | 
            +
                    
         | 
| 37 | 
            +
                    # --- Apply any necessary overrides from your config to the loaded bert_config --- 
         | 
| 38 | 
            +
                    # For example, if your ModernBertForSentiment expects specific config values beyond the base BERT model.
         | 
| 39 | 
            +
                    # Your current ModernBertForSentiment takes the entire config object, which might implicitly carry these.
         | 
| 40 | 
            +
                    # However, explicitly setting them on bert_config loaded from HF is safer if they are architecturally relevant.
         | 
| 41 | 
            +
                    bert_config.classifier_dropout = model_cfg.get('dropout', bert_config.classifier_dropout) # Example
         | 
| 42 | 
            +
                    # Ensure num_labels is set if your inference model needs it (usually for HF pipeline, less so for manual predict)
         | 
| 43 | 
            +
                    # bert_config.num_labels = model_cfg.get('num_labels', 1) # Typically 1 for binary sentiment regression-style output
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                    # It's also important that pooling_strategy and num_weighted_layers are set on the config object 
         | 
| 46 | 
            +
                    # that ModernBertForSentiment receives, as it uses these to build its layers.
         | 
| 47 | 
            +
                    # These are usually fine-tuning specific, not part of the base HF config, so they should come from your model_cfg.
         | 
| 48 | 
            +
                    bert_config.pooling_strategy = model_cfg.get('pooling_strategy', 'cls')
         | 
| 49 | 
            +
                    bert_config.num_weighted_layers = model_cfg.get('num_weighted_layers', 4)
         | 
| 50 | 
            +
                    bert_config.loss_function = model_cfg.get('loss_function', {'name': 'SentimentWeightedLoss', 'params': {}}) # Needed by model init
         | 
| 51 | 
            +
                    # Ensure num_labels is explicitly set for the model's classifier head
         | 
| 52 | 
            +
                    bert_config.num_labels = 1 # For sentiment (positive/negative) often treated as 1 logit output
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                    print("Instantiating ModernBertForSentiment model structure...")
         | 
| 55 | 
            +
                    self.model = ModernBertForSentiment(bert_config)
         | 
| 56 | 
            +
                    
         | 
| 57 | 
            +
                    print(f"Loading model weights from local checkpoint: {model_weights_path}")
         | 
| 58 | 
            +
                    # Load the entire checkpoint dictionary first
         | 
| 59 | 
            +
                    checkpoint = torch.load(model_weights_path, map_location=torch.device('cpu'))
         | 
| 60 | 
            +
                    
         | 
| 61 | 
            +
                    # Extract the model_state_dict from the checkpoint
         | 
| 62 | 
            +
                    # This handles the case where the checkpoint saves more than just the model weights (e.g., optimizer state, epoch)
         | 
| 63 | 
            +
                    if 'model_state_dict' in checkpoint:
         | 
| 64 | 
            +
                        model_state_to_load = checkpoint['model_state_dict']
         | 
| 65 | 
            +
                    else:
         | 
| 66 | 
            +
                        # If the checkpoint is just the state_dict itself (older format or different saving convention)
         | 
| 67 | 
            +
                        model_state_to_load = checkpoint
         | 
| 68 | 
            +
                        
         | 
| 69 | 
            +
                    self.model.load_state_dict(model_state_to_load)
         | 
| 70 | 
            +
                    self.model.eval()
         | 
| 71 | 
            +
                    print("Model loaded successfully.")
         | 
| 72 | 
            +
                    
         | 
| 73 | 
            +
                def predict(self, text: str) -> Dict[str, Any]:
         | 
| 74 | 
            +
                    inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length)
         | 
| 75 | 
            +
                    with torch.no_grad():
         | 
| 76 | 
            +
                        outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
         | 
| 77 | 
            +
                    logits = outputs["logits"]
         | 
| 78 | 
            +
                    prob = torch.sigmoid(logits).item()
         | 
| 79 | 
            +
                    return {"sentiment": "positive" if prob > 0.5 else "negative", "confidence": prob}
         | 
    	
        models.py
    ADDED
    
    | @@ -0,0 +1,172 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from transformers import ModernBertModel, ModernBertPreTrainedModel
         | 
| 2 | 
            +
            from transformers.modeling_outputs import SequenceClassifierOutput
         | 
| 3 | 
            +
            from torch import nn
         | 
| 4 | 
            +
            import torch
         | 
| 5 | 
            +
            from train_utils import SentimentWeightedLoss, SentimentFocalLoss
         | 
| 6 | 
            +
            import torch.nn.functional as F
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            from classifiers import ClassifierHead, ConcatClassifierHead
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
            class ModernBertForSentiment(ModernBertPreTrainedModel):
         | 
| 12 | 
            +
                """ModernBERT encoder with a dynamically configurable classification head and pooling strategy."""
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                def __init__(self, config):
         | 
| 15 | 
            +
                    super().__init__(config)
         | 
| 16 | 
            +
                    self.num_labels = config.num_labels
         | 
| 17 | 
            +
                    self.bert = ModernBertModel(config) # Base BERT model, config may have output_hidden_states=True
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                    # Store pooling strategy from config
         | 
| 20 | 
            +
                    self.pooling_strategy = getattr(config, 'pooling_strategy', 'mean')
         | 
| 21 | 
            +
                    self.num_weighted_layers = getattr(config, 'num_weighted_layers', 4)
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                    if self.pooling_strategy in ['weighted_layer', 'cls_weighted_concat'] and not config.output_hidden_states:
         | 
| 24 | 
            +
                        # This check is more of an assertion; train.py should set output_hidden_states=True
         | 
| 25 | 
            +
                        raise ValueError(
         | 
| 26 | 
            +
                            "output_hidden_states must be True in BertConfig for weighted_layer pooling."
         | 
| 27 | 
            +
                        )
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    # Initialize weights for weighted layer pooling
         | 
| 30 | 
            +
                    if self.pooling_strategy in ['weighted_layer', 'cls_weighted_concat']:
         | 
| 31 | 
            +
                        # num_weighted_layers specifies how many *top* layers of BERT to use.
         | 
| 32 | 
            +
                        # If num_weighted_layers is e.g. 4, we use the last 4 layers.
         | 
| 33 | 
            +
                        self.layer_weights = nn.Parameter(torch.ones(self.num_weighted_layers) / self.num_weighted_layers)
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                    # Determine classifier input size and choose head
         | 
| 36 | 
            +
                    classifier_input_size = config.hidden_size
         | 
| 37 | 
            +
                    if self.pooling_strategy in ['cls_mean_concat', 'cls_weighted_concat']:
         | 
| 38 | 
            +
                        classifier_input_size = config.hidden_size * 2
         | 
| 39 | 
            +
                    
         | 
| 40 | 
            +
                    # Dropout for features fed into the classifier head
         | 
| 41 | 
            +
                    classifier_dropout_prob = (
         | 
| 42 | 
            +
                        config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
         | 
| 43 | 
            +
                    )
         | 
| 44 | 
            +
                    self.features_dropout = nn.Dropout(classifier_dropout_prob)
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                    # Select the appropriate classifier head based on input feature dimension
         | 
| 47 | 
            +
                    if classifier_input_size == config.hidden_size:
         | 
| 48 | 
            +
                        self.classifier = ClassifierHead(
         | 
| 49 | 
            +
                            hidden_size=config.hidden_size, # input_size for ClassifierHead is just hidden_size
         | 
| 50 | 
            +
                            num_labels=config.num_labels,
         | 
| 51 | 
            +
                            dropout_prob=classifier_dropout_prob
         | 
| 52 | 
            +
                        )
         | 
| 53 | 
            +
                    elif classifier_input_size == config.hidden_size * 2:
         | 
| 54 | 
            +
                        self.classifier = ConcatClassifierHead(
         | 
| 55 | 
            +
                            input_size=config.hidden_size * 2,
         | 
| 56 | 
            +
                            hidden_size=config.hidden_size, # Internal hidden size of the head
         | 
| 57 | 
            +
                            num_labels=config.num_labels,
         | 
| 58 | 
            +
                            dropout_prob=classifier_dropout_prob
         | 
| 59 | 
            +
                        )
         | 
| 60 | 
            +
                    else:
         | 
| 61 | 
            +
                        # This case should ideally not be reached with current strategies
         | 
| 62 | 
            +
                        raise ValueError(f"Unexpected classifier_input_size: {classifier_input_size}")
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                    # Initialize loss function based on config
         | 
| 65 | 
            +
                    loss_config = getattr(config, 'loss_function', {'name': 'SentimentWeightedLoss', 'params': {}})
         | 
| 66 | 
            +
                    loss_name = loss_config.get('name', 'SentimentWeightedLoss')
         | 
| 67 | 
            +
                    loss_params = loss_config.get('params', {})
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                    if loss_name == "SentimentWeightedLoss":
         | 
| 70 | 
            +
                        self.loss_fct = SentimentWeightedLoss() # SentimentWeightedLoss takes no arguments
         | 
| 71 | 
            +
                    elif loss_name == "SentimentFocalLoss":
         | 
| 72 | 
            +
                        # Ensure only relevant params are passed, or that loss_params is structured correctly for SentimentFocalLoss
         | 
| 73 | 
            +
                        # For SentimentFocalLoss, expected params are 'gamma_focal' and 'label_smoothing_epsilon'
         | 
| 74 | 
            +
                        self.loss_fct = SentimentFocalLoss(**loss_params)
         | 
| 75 | 
            +
                    else:
         | 
| 76 | 
            +
                        raise ValueError(f"Unsupported loss function: {loss_name}")
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                    self.post_init() # Initialize weights and apply final processing
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                def _mean_pool(self, last_hidden_state, attention_mask):
         | 
| 81 | 
            +
                    if attention_mask is None:
         | 
| 82 | 
            +
                        attention_mask = torch.ones_like(last_hidden_state[:, :, 0]) # Assuming first dim of last hidden state is token ids
         | 
| 83 | 
            +
                    input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
         | 
| 84 | 
            +
                    sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
         | 
| 85 | 
            +
                    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
         | 
| 86 | 
            +
                    return sum_embeddings / sum_mask
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                def _weighted_layer_pool(self, all_hidden_states):
         | 
| 89 | 
            +
                    # all_hidden_states includes embeddings + output of each layer.
         | 
| 90 | 
            +
                    # We want the outputs of the last num_weighted_layers.
         | 
| 91 | 
            +
                    # Example: 12 layers -> all_hidden_states have 13 items (embeddings + 12 layers)
         | 
| 92 | 
            +
                    # num_weighted_layers = 4 -> use layers 9, 10, 11, 12 (indices -4, -3, -2, -1)
         | 
| 93 | 
            +
                    layers_to_weigh = torch.stack(all_hidden_states[-self.num_weighted_layers:], dim=0)
         | 
| 94 | 
            +
                    # layers_to_weigh shape: (num_weighted_layers, batch_size, sequence_length, hidden_size)
         | 
| 95 | 
            +
                    
         | 
| 96 | 
            +
                    # Normalize weights to sum to 1 (softmax or simple division)
         | 
| 97 | 
            +
                    normalized_weights = F.softmax(self.layer_weights, dim=-1)
         | 
| 98 | 
            +
                    
         | 
| 99 | 
            +
                    # Weighted sum across layers
         | 
| 100 | 
            +
                    # Reshape weights for broadcasting: (num_weighted_layers, 1, 1, 1)
         | 
| 101 | 
            +
                    weighted_hidden_states = layers_to_weigh * normalized_weights.view(-1, 1, 1, 1)
         | 
| 102 | 
            +
                    weighted_sum_hidden_states = torch.sum(weighted_hidden_states, dim=0)
         | 
| 103 | 
            +
                    # weighted_sum_hidden_states shape: (batch_size, sequence_length, hidden_size)
         | 
| 104 | 
            +
                    
         | 
| 105 | 
            +
                    # Pool the result (e.g., take [CLS] token of this weighted sum)
         | 
| 106 | 
            +
                    return weighted_sum_hidden_states[:, 0] # Return CLS token of the weighted sum
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                def forward(
         | 
| 109 | 
            +
                    self,
         | 
| 110 | 
            +
                    input_ids=None,
         | 
| 111 | 
            +
                    attention_mask=None,
         | 
| 112 | 
            +
                    labels=None,
         | 
| 113 | 
            +
                    lengths=None,
         | 
| 114 | 
            +
                    return_dict=None,
         | 
| 115 | 
            +
                    **kwargs
         | 
| 116 | 
            +
                ):
         | 
| 117 | 
            +
                    return_dict = return_dict if return_dict is not None else self.config.use_return_dict
         | 
| 118 | 
            +
             | 
| 119 | 
            +
                    bert_outputs = self.bert(
         | 
| 120 | 
            +
                        input_ids,
         | 
| 121 | 
            +
                        attention_mask=attention_mask,
         | 
| 122 | 
            +
                        return_dict=return_dict,
         | 
| 123 | 
            +
                        output_hidden_states=self.config.output_hidden_states # Controlled by train.py
         | 
| 124 | 
            +
                    )
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                    last_hidden_state = bert_outputs[0] # Or bert_outputs.last_hidden_state
         | 
| 127 | 
            +
                    pooled_features = None
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                    if self.pooling_strategy == 'cls':
         | 
| 130 | 
            +
                        pooled_features = last_hidden_state[:, 0] # CLS token
         | 
| 131 | 
            +
                    elif self.pooling_strategy == 'mean':
         | 
| 132 | 
            +
                        pooled_features = self._mean_pool(last_hidden_state, attention_mask)
         | 
| 133 | 
            +
                    elif self.pooling_strategy == 'cls_mean_concat':
         | 
| 134 | 
            +
                        cls_output = last_hidden_state[:, 0]
         | 
| 135 | 
            +
                        mean_output = self._mean_pool(last_hidden_state, attention_mask)
         | 
| 136 | 
            +
                        pooled_features = torch.cat((cls_output, mean_output), dim=1)
         | 
| 137 | 
            +
                    elif self.pooling_strategy == 'weighted_layer':
         | 
| 138 | 
            +
                        if not self.config.output_hidden_states or bert_outputs.hidden_states is None:
         | 
| 139 | 
            +
                            raise ValueError("Weighted layer pooling requires output_hidden_states=True and hidden_states in BERT output.")
         | 
| 140 | 
            +
                        all_hidden_states = bert_outputs.hidden_states
         | 
| 141 | 
            +
                        pooled_features = self._weighted_layer_pool(all_hidden_states)
         | 
| 142 | 
            +
                    elif self.pooling_strategy == 'cls_weighted_concat':
         | 
| 143 | 
            +
                        if not self.config.output_hidden_states or bert_outputs.hidden_states is None:
         | 
| 144 | 
            +
                            raise ValueError("Weighted layer pooling requires output_hidden_states=True and hidden_states in BERT output.")
         | 
| 145 | 
            +
                        cls_output = last_hidden_state[:, 0]
         | 
| 146 | 
            +
                        all_hidden_states = bert_outputs.hidden_states
         | 
| 147 | 
            +
                        weighted_output = self._weighted_layer_pool(all_hidden_states)
         | 
| 148 | 
            +
                        pooled_features = torch.cat((cls_output, weighted_output), dim=1)
         | 
| 149 | 
            +
                    else:
         | 
| 150 | 
            +
                        raise ValueError(f"Unknown pooling_strategy: {self.pooling_strategy}")
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                    pooled_features = self.features_dropout(pooled_features)
         | 
| 153 | 
            +
                    logits = self.classifier(pooled_features)
         | 
| 154 | 
            +
             | 
| 155 | 
            +
                    loss = None
         | 
| 156 | 
            +
                    if labels is not None:
         | 
| 157 | 
            +
                        if lengths is None:
         | 
| 158 | 
            +
                            raise ValueError("lengths must be provided when labels are specified for loss calculation.")
         | 
| 159 | 
            +
                        loss = self.loss_fct(logits.squeeze(-1), labels, lengths)
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                    if not return_dict:
         | 
| 162 | 
            +
                        # Ensure 'outputs' from BERT is appropriately handled. If it's a tuple:            
         | 
| 163 | 
            +
                        bert_model_outputs = bert_outputs[1:] if isinstance(bert_outputs, tuple) else (bert_outputs.hidden_states, bert_outputs.attentions)
         | 
| 164 | 
            +
                        output = (logits,) + bert_model_outputs
         | 
| 165 | 
            +
                        return ((loss,) + output) if loss is not None else output
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                    return SequenceClassifierOutput(
         | 
| 168 | 
            +
                        loss=loss,
         | 
| 169 | 
            +
                        logits=logits,
         | 
| 170 | 
            +
                        hidden_states=bert_outputs.hidden_states,
         | 
| 171 | 
            +
                        attentions=bert_outputs.attentions,
         | 
| 172 | 
            +
                    )
         | 
    	
        pytorch_model.bin
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:6c95a2ef6b7a06191e4db8fe7f5975f7c8228ec9754d5222ffb3984b6b48010a
         | 
| 3 | 
            +
            size 1802582665
         | 
    	
        special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,37 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "cls_token": {
         | 
| 3 | 
            +
                "content": "[CLS]",
         | 
| 4 | 
            +
                "lstrip": false,
         | 
| 5 | 
            +
                "normalized": false,
         | 
| 6 | 
            +
                "rstrip": false,
         | 
| 7 | 
            +
                "single_word": false
         | 
| 8 | 
            +
              },
         | 
| 9 | 
            +
              "mask_token": {
         | 
| 10 | 
            +
                "content": "[MASK]",
         | 
| 11 | 
            +
                "lstrip": true,
         | 
| 12 | 
            +
                "normalized": false,
         | 
| 13 | 
            +
                "rstrip": false,
         | 
| 14 | 
            +
                "single_word": false
         | 
| 15 | 
            +
              },
         | 
| 16 | 
            +
              "pad_token": {
         | 
| 17 | 
            +
                "content": "[PAD]",
         | 
| 18 | 
            +
                "lstrip": false,
         | 
| 19 | 
            +
                "normalized": false,
         | 
| 20 | 
            +
                "rstrip": false,
         | 
| 21 | 
            +
                "single_word": false
         | 
| 22 | 
            +
              },
         | 
| 23 | 
            +
              "sep_token": {
         | 
| 24 | 
            +
                "content": "[SEP]",
         | 
| 25 | 
            +
                "lstrip": false,
         | 
| 26 | 
            +
                "normalized": false,
         | 
| 27 | 
            +
                "rstrip": false,
         | 
| 28 | 
            +
                "single_word": false
         | 
| 29 | 
            +
              },
         | 
| 30 | 
            +
              "unk_token": {
         | 
| 31 | 
            +
                "content": "[UNK]",
         | 
| 32 | 
            +
                "lstrip": false,
         | 
| 33 | 
            +
                "normalized": false,
         | 
| 34 | 
            +
                "rstrip": false,
         | 
| 35 | 
            +
                "single_word": false
         | 
| 36 | 
            +
              }
         | 
| 37 | 
            +
            }
         | 
    	
        tokenizer.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,945 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "added_tokens_decoder": {
         | 
| 3 | 
            +
                "0": {
         | 
| 4 | 
            +
                  "content": "|||IP_ADDRESS|||",
         | 
| 5 | 
            +
                  "lstrip": false,
         | 
| 6 | 
            +
                  "normalized": true,
         | 
| 7 | 
            +
                  "rstrip": false,
         | 
| 8 | 
            +
                  "single_word": false,
         | 
| 9 | 
            +
                  "special": false
         | 
| 10 | 
            +
                },
         | 
| 11 | 
            +
                "1": {
         | 
| 12 | 
            +
                  "content": "<|padding|>",
         | 
| 13 | 
            +
                  "lstrip": false,
         | 
| 14 | 
            +
                  "normalized": false,
         | 
| 15 | 
            +
                  "rstrip": false,
         | 
| 16 | 
            +
                  "single_word": false,
         | 
| 17 | 
            +
                  "special": true
         | 
| 18 | 
            +
                },
         | 
| 19 | 
            +
                "50254": {
         | 
| 20 | 
            +
                  "content": "                        ",
         | 
| 21 | 
            +
                  "lstrip": false,
         | 
| 22 | 
            +
                  "normalized": true,
         | 
| 23 | 
            +
                  "rstrip": false,
         | 
| 24 | 
            +
                  "single_word": false,
         | 
| 25 | 
            +
                  "special": false
         | 
| 26 | 
            +
                },
         | 
| 27 | 
            +
                "50255": {
         | 
| 28 | 
            +
                  "content": "                       ",
         | 
| 29 | 
            +
                  "lstrip": false,
         | 
| 30 | 
            +
                  "normalized": true,
         | 
| 31 | 
            +
                  "rstrip": false,
         | 
| 32 | 
            +
                  "single_word": false,
         | 
| 33 | 
            +
                  "special": false
         | 
| 34 | 
            +
                },
         | 
| 35 | 
            +
                "50256": {
         | 
| 36 | 
            +
                  "content": "                      ",
         | 
| 37 | 
            +
                  "lstrip": false,
         | 
| 38 | 
            +
                  "normalized": true,
         | 
| 39 | 
            +
                  "rstrip": false,
         | 
| 40 | 
            +
                  "single_word": false,
         | 
| 41 | 
            +
                  "special": false
         | 
| 42 | 
            +
                },
         | 
| 43 | 
            +
                "50257": {
         | 
| 44 | 
            +
                  "content": "                     ",
         | 
| 45 | 
            +
                  "lstrip": false,
         | 
| 46 | 
            +
                  "normalized": true,
         | 
| 47 | 
            +
                  "rstrip": false,
         | 
| 48 | 
            +
                  "single_word": false,
         | 
| 49 | 
            +
                  "special": false
         | 
| 50 | 
            +
                },
         | 
| 51 | 
            +
                "50258": {
         | 
| 52 | 
            +
                  "content": "                    ",
         | 
| 53 | 
            +
                  "lstrip": false,
         | 
| 54 | 
            +
                  "normalized": true,
         | 
| 55 | 
            +
                  "rstrip": false,
         | 
| 56 | 
            +
                  "single_word": false,
         | 
| 57 | 
            +
                  "special": false
         | 
| 58 | 
            +
                },
         | 
| 59 | 
            +
                "50259": {
         | 
| 60 | 
            +
                  "content": "                   ",
         | 
| 61 | 
            +
                  "lstrip": false,
         | 
| 62 | 
            +
                  "normalized": true,
         | 
| 63 | 
            +
                  "rstrip": false,
         | 
| 64 | 
            +
                  "single_word": false,
         | 
| 65 | 
            +
                  "special": false
         | 
| 66 | 
            +
                },
         | 
| 67 | 
            +
                "50260": {
         | 
| 68 | 
            +
                  "content": "                  ",
         | 
| 69 | 
            +
                  "lstrip": false,
         | 
| 70 | 
            +
                  "normalized": true,
         | 
| 71 | 
            +
                  "rstrip": false,
         | 
| 72 | 
            +
                  "single_word": false,
         | 
| 73 | 
            +
                  "special": false
         | 
| 74 | 
            +
                },
         | 
| 75 | 
            +
                "50261": {
         | 
| 76 | 
            +
                  "content": "                 ",
         | 
| 77 | 
            +
                  "lstrip": false,
         | 
| 78 | 
            +
                  "normalized": true,
         | 
| 79 | 
            +
                  "rstrip": false,
         | 
| 80 | 
            +
                  "single_word": false,
         | 
| 81 | 
            +
                  "special": false
         | 
| 82 | 
            +
                },
         | 
| 83 | 
            +
                "50262": {
         | 
| 84 | 
            +
                  "content": "                ",
         | 
| 85 | 
            +
                  "lstrip": false,
         | 
| 86 | 
            +
                  "normalized": true,
         | 
| 87 | 
            +
                  "rstrip": false,
         | 
| 88 | 
            +
                  "single_word": false,
         | 
| 89 | 
            +
                  "special": false
         | 
| 90 | 
            +
                },
         | 
| 91 | 
            +
                "50263": {
         | 
| 92 | 
            +
                  "content": "               ",
         | 
| 93 | 
            +
                  "lstrip": false,
         | 
| 94 | 
            +
                  "normalized": true,
         | 
| 95 | 
            +
                  "rstrip": false,
         | 
| 96 | 
            +
                  "single_word": false,
         | 
| 97 | 
            +
                  "special": false
         | 
| 98 | 
            +
                },
         | 
| 99 | 
            +
                "50264": {
         | 
| 100 | 
            +
                  "content": "              ",
         | 
| 101 | 
            +
                  "lstrip": false,
         | 
| 102 | 
            +
                  "normalized": true,
         | 
| 103 | 
            +
                  "rstrip": false,
         | 
| 104 | 
            +
                  "single_word": false,
         | 
| 105 | 
            +
                  "special": false
         | 
| 106 | 
            +
                },
         | 
| 107 | 
            +
                "50265": {
         | 
| 108 | 
            +
                  "content": "             ",
         | 
| 109 | 
            +
                  "lstrip": false,
         | 
| 110 | 
            +
                  "normalized": true,
         | 
| 111 | 
            +
                  "rstrip": false,
         | 
| 112 | 
            +
                  "single_word": false,
         | 
| 113 | 
            +
                  "special": false
         | 
| 114 | 
            +
                },
         | 
| 115 | 
            +
                "50266": {
         | 
| 116 | 
            +
                  "content": "            ",
         | 
| 117 | 
            +
                  "lstrip": false,
         | 
| 118 | 
            +
                  "normalized": true,
         | 
| 119 | 
            +
                  "rstrip": false,
         | 
| 120 | 
            +
                  "single_word": false,
         | 
| 121 | 
            +
                  "special": false
         | 
| 122 | 
            +
                },
         | 
| 123 | 
            +
                "50267": {
         | 
| 124 | 
            +
                  "content": "           ",
         | 
| 125 | 
            +
                  "lstrip": false,
         | 
| 126 | 
            +
                  "normalized": true,
         | 
| 127 | 
            +
                  "rstrip": false,
         | 
| 128 | 
            +
                  "single_word": false,
         | 
| 129 | 
            +
                  "special": false
         | 
| 130 | 
            +
                },
         | 
| 131 | 
            +
                "50268": {
         | 
| 132 | 
            +
                  "content": "          ",
         | 
| 133 | 
            +
                  "lstrip": false,
         | 
| 134 | 
            +
                  "normalized": true,
         | 
| 135 | 
            +
                  "rstrip": false,
         | 
| 136 | 
            +
                  "single_word": false,
         | 
| 137 | 
            +
                  "special": false
         | 
| 138 | 
            +
                },
         | 
| 139 | 
            +
                "50269": {
         | 
| 140 | 
            +
                  "content": "         ",
         | 
| 141 | 
            +
                  "lstrip": false,
         | 
| 142 | 
            +
                  "normalized": true,
         | 
| 143 | 
            +
                  "rstrip": false,
         | 
| 144 | 
            +
                  "single_word": false,
         | 
| 145 | 
            +
                  "special": false
         | 
| 146 | 
            +
                },
         | 
| 147 | 
            +
                "50270": {
         | 
| 148 | 
            +
                  "content": "        ",
         | 
| 149 | 
            +
                  "lstrip": false,
         | 
| 150 | 
            +
                  "normalized": true,
         | 
| 151 | 
            +
                  "rstrip": false,
         | 
| 152 | 
            +
                  "single_word": false,
         | 
| 153 | 
            +
                  "special": false
         | 
| 154 | 
            +
                },
         | 
| 155 | 
            +
                "50271": {
         | 
| 156 | 
            +
                  "content": "       ",
         | 
| 157 | 
            +
                  "lstrip": false,
         | 
| 158 | 
            +
                  "normalized": true,
         | 
| 159 | 
            +
                  "rstrip": false,
         | 
| 160 | 
            +
                  "single_word": false,
         | 
| 161 | 
            +
                  "special": false
         | 
| 162 | 
            +
                },
         | 
| 163 | 
            +
                "50272": {
         | 
| 164 | 
            +
                  "content": "      ",
         | 
| 165 | 
            +
                  "lstrip": false,
         | 
| 166 | 
            +
                  "normalized": true,
         | 
| 167 | 
            +
                  "rstrip": false,
         | 
| 168 | 
            +
                  "single_word": false,
         | 
| 169 | 
            +
                  "special": false
         | 
| 170 | 
            +
                },
         | 
| 171 | 
            +
                "50273": {
         | 
| 172 | 
            +
                  "content": "     ",
         | 
| 173 | 
            +
                  "lstrip": false,
         | 
| 174 | 
            +
                  "normalized": true,
         | 
| 175 | 
            +
                  "rstrip": false,
         | 
| 176 | 
            +
                  "single_word": false,
         | 
| 177 | 
            +
                  "special": false
         | 
| 178 | 
            +
                },
         | 
| 179 | 
            +
                "50274": {
         | 
| 180 | 
            +
                  "content": "    ",
         | 
| 181 | 
            +
                  "lstrip": false,
         | 
| 182 | 
            +
                  "normalized": true,
         | 
| 183 | 
            +
                  "rstrip": false,
         | 
| 184 | 
            +
                  "single_word": false,
         | 
| 185 | 
            +
                  "special": false
         | 
| 186 | 
            +
                },
         | 
| 187 | 
            +
                "50275": {
         | 
| 188 | 
            +
                  "content": "   ",
         | 
| 189 | 
            +
                  "lstrip": false,
         | 
| 190 | 
            +
                  "normalized": true,
         | 
| 191 | 
            +
                  "rstrip": false,
         | 
| 192 | 
            +
                  "single_word": false,
         | 
| 193 | 
            +
                  "special": false
         | 
| 194 | 
            +
                },
         | 
| 195 | 
            +
                "50276": {
         | 
| 196 | 
            +
                  "content": "  ",
         | 
| 197 | 
            +
                  "lstrip": false,
         | 
| 198 | 
            +
                  "normalized": true,
         | 
| 199 | 
            +
                  "rstrip": false,
         | 
| 200 | 
            +
                  "single_word": false,
         | 
| 201 | 
            +
                  "special": false
         | 
| 202 | 
            +
                },
         | 
| 203 | 
            +
                "50277": {
         | 
| 204 | 
            +
                  "content": "|||EMAIL_ADDRESS|||",
         | 
| 205 | 
            +
                  "lstrip": false,
         | 
| 206 | 
            +
                  "normalized": true,
         | 
| 207 | 
            +
                  "rstrip": false,
         | 
| 208 | 
            +
                  "single_word": false,
         | 
| 209 | 
            +
                  "special": false
         | 
| 210 | 
            +
                },
         | 
| 211 | 
            +
                "50278": {
         | 
| 212 | 
            +
                  "content": "|||PHONE_NUMBER|||",
         | 
| 213 | 
            +
                  "lstrip": false,
         | 
| 214 | 
            +
                  "normalized": true,
         | 
| 215 | 
            +
                  "rstrip": false,
         | 
| 216 | 
            +
                  "single_word": false,
         | 
| 217 | 
            +
                  "special": false
         | 
| 218 | 
            +
                },
         | 
| 219 | 
            +
                "50279": {
         | 
| 220 | 
            +
                  "content": "<|endoftext|>",
         | 
| 221 | 
            +
                  "lstrip": false,
         | 
| 222 | 
            +
                  "normalized": false,
         | 
| 223 | 
            +
                  "rstrip": false,
         | 
| 224 | 
            +
                  "single_word": false,
         | 
| 225 | 
            +
                  "special": true
         | 
| 226 | 
            +
                },
         | 
| 227 | 
            +
                "50280": {
         | 
| 228 | 
            +
                  "content": "[UNK]",
         | 
| 229 | 
            +
                  "lstrip": false,
         | 
| 230 | 
            +
                  "normalized": false,
         | 
| 231 | 
            +
                  "rstrip": false,
         | 
| 232 | 
            +
                  "single_word": false,
         | 
| 233 | 
            +
                  "special": true
         | 
| 234 | 
            +
                },
         | 
| 235 | 
            +
                "50281": {
         | 
| 236 | 
            +
                  "content": "[CLS]",
         | 
| 237 | 
            +
                  "lstrip": false,
         | 
| 238 | 
            +
                  "normalized": false,
         | 
| 239 | 
            +
                  "rstrip": false,
         | 
| 240 | 
            +
                  "single_word": false,
         | 
| 241 | 
            +
                  "special": true
         | 
| 242 | 
            +
                },
         | 
| 243 | 
            +
                "50282": {
         | 
| 244 | 
            +
                  "content": "[SEP]",
         | 
| 245 | 
            +
                  "lstrip": false,
         | 
| 246 | 
            +
                  "normalized": false,
         | 
| 247 | 
            +
                  "rstrip": false,
         | 
| 248 | 
            +
                  "single_word": false,
         | 
| 249 | 
            +
                  "special": true
         | 
| 250 | 
            +
                },
         | 
| 251 | 
            +
                "50283": {
         | 
| 252 | 
            +
                  "content": "[PAD]",
         | 
| 253 | 
            +
                  "lstrip": false,
         | 
| 254 | 
            +
                  "normalized": false,
         | 
| 255 | 
            +
                  "rstrip": false,
         | 
| 256 | 
            +
                  "single_word": false,
         | 
| 257 | 
            +
                  "special": true
         | 
| 258 | 
            +
                },
         | 
| 259 | 
            +
                "50284": {
         | 
| 260 | 
            +
                  "content": "[MASK]",
         | 
| 261 | 
            +
                  "lstrip": true,
         | 
| 262 | 
            +
                  "normalized": false,
         | 
| 263 | 
            +
                  "rstrip": false,
         | 
| 264 | 
            +
                  "single_word": false,
         | 
| 265 | 
            +
                  "special": true
         | 
| 266 | 
            +
                },
         | 
| 267 | 
            +
                "50285": {
         | 
| 268 | 
            +
                  "content": "[unused0]",
         | 
| 269 | 
            +
                  "lstrip": false,
         | 
| 270 | 
            +
                  "normalized": true,
         | 
| 271 | 
            +
                  "rstrip": false,
         | 
| 272 | 
            +
                  "single_word": false,
         | 
| 273 | 
            +
                  "special": false
         | 
| 274 | 
            +
                },
         | 
| 275 | 
            +
                "50286": {
         | 
| 276 | 
            +
                  "content": "[unused1]",
         | 
| 277 | 
            +
                  "lstrip": false,
         | 
| 278 | 
            +
                  "normalized": true,
         | 
| 279 | 
            +
                  "rstrip": false,
         | 
| 280 | 
            +
                  "single_word": false,
         | 
| 281 | 
            +
                  "special": false
         | 
| 282 | 
            +
                },
         | 
| 283 | 
            +
                "50287": {
         | 
| 284 | 
            +
                  "content": "[unused2]",
         | 
| 285 | 
            +
                  "lstrip": false,
         | 
| 286 | 
            +
                  "normalized": true,
         | 
| 287 | 
            +
                  "rstrip": false,
         | 
| 288 | 
            +
                  "single_word": false,
         | 
| 289 | 
            +
                  "special": false
         | 
| 290 | 
            +
                },
         | 
| 291 | 
            +
                "50288": {
         | 
| 292 | 
            +
                  "content": "[unused3]",
         | 
| 293 | 
            +
                  "lstrip": false,
         | 
| 294 | 
            +
                  "normalized": true,
         | 
| 295 | 
            +
                  "rstrip": false,
         | 
| 296 | 
            +
                  "single_word": false,
         | 
| 297 | 
            +
                  "special": false
         | 
| 298 | 
            +
                },
         | 
| 299 | 
            +
                "50289": {
         | 
| 300 | 
            +
                  "content": "[unused4]",
         | 
| 301 | 
            +
                  "lstrip": false,
         | 
| 302 | 
            +
                  "normalized": true,
         | 
| 303 | 
            +
                  "rstrip": false,
         | 
| 304 | 
            +
                  "single_word": false,
         | 
| 305 | 
            +
                  "special": false
         | 
| 306 | 
            +
                },
         | 
| 307 | 
            +
                "50290": {
         | 
| 308 | 
            +
                  "content": "[unused5]",
         | 
| 309 | 
            +
                  "lstrip": false,
         | 
| 310 | 
            +
                  "normalized": true,
         | 
| 311 | 
            +
                  "rstrip": false,
         | 
| 312 | 
            +
                  "single_word": false,
         | 
| 313 | 
            +
                  "special": false
         | 
| 314 | 
            +
                },
         | 
| 315 | 
            +
                "50291": {
         | 
| 316 | 
            +
                  "content": "[unused6]",
         | 
| 317 | 
            +
                  "lstrip": false,
         | 
| 318 | 
            +
                  "normalized": true,
         | 
| 319 | 
            +
                  "rstrip": false,
         | 
| 320 | 
            +
                  "single_word": false,
         | 
| 321 | 
            +
                  "special": false
         | 
| 322 | 
            +
                },
         | 
| 323 | 
            +
                "50292": {
         | 
| 324 | 
            +
                  "content": "[unused7]",
         | 
| 325 | 
            +
                  "lstrip": false,
         | 
| 326 | 
            +
                  "normalized": true,
         | 
| 327 | 
            +
                  "rstrip": false,
         | 
| 328 | 
            +
                  "single_word": false,
         | 
| 329 | 
            +
                  "special": false
         | 
| 330 | 
            +
                },
         | 
| 331 | 
            +
                "50293": {
         | 
| 332 | 
            +
                  "content": "[unused8]",
         | 
| 333 | 
            +
                  "lstrip": false,
         | 
| 334 | 
            +
                  "normalized": true,
         | 
| 335 | 
            +
                  "rstrip": false,
         | 
| 336 | 
            +
                  "single_word": false,
         | 
| 337 | 
            +
                  "special": false
         | 
| 338 | 
            +
                },
         | 
| 339 | 
            +
                "50294": {
         | 
| 340 | 
            +
                  "content": "[unused9]",
         | 
| 341 | 
            +
                  "lstrip": false,
         | 
| 342 | 
            +
                  "normalized": true,
         | 
| 343 | 
            +
                  "rstrip": false,
         | 
| 344 | 
            +
                  "single_word": false,
         | 
| 345 | 
            +
                  "special": false
         | 
| 346 | 
            +
                },
         | 
| 347 | 
            +
                "50295": {
         | 
| 348 | 
            +
                  "content": "[unused10]",
         | 
| 349 | 
            +
                  "lstrip": false,
         | 
| 350 | 
            +
                  "normalized": true,
         | 
| 351 | 
            +
                  "rstrip": false,
         | 
| 352 | 
            +
                  "single_word": false,
         | 
| 353 | 
            +
                  "special": false
         | 
| 354 | 
            +
                },
         | 
| 355 | 
            +
                "50296": {
         | 
| 356 | 
            +
                  "content": "[unused11]",
         | 
| 357 | 
            +
                  "lstrip": false,
         | 
| 358 | 
            +
                  "normalized": true,
         | 
| 359 | 
            +
                  "rstrip": false,
         | 
| 360 | 
            +
                  "single_word": false,
         | 
| 361 | 
            +
                  "special": false
         | 
| 362 | 
            +
                },
         | 
| 363 | 
            +
                "50297": {
         | 
| 364 | 
            +
                  "content": "[unused12]",
         | 
| 365 | 
            +
                  "lstrip": false,
         | 
| 366 | 
            +
                  "normalized": true,
         | 
| 367 | 
            +
                  "rstrip": false,
         | 
| 368 | 
            +
                  "single_word": false,
         | 
| 369 | 
            +
                  "special": false
         | 
| 370 | 
            +
                },
         | 
| 371 | 
            +
                "50298": {
         | 
| 372 | 
            +
                  "content": "[unused13]",
         | 
| 373 | 
            +
                  "lstrip": false,
         | 
| 374 | 
            +
                  "normalized": true,
         | 
| 375 | 
            +
                  "rstrip": false,
         | 
| 376 | 
            +
                  "single_word": false,
         | 
| 377 | 
            +
                  "special": false
         | 
| 378 | 
            +
                },
         | 
| 379 | 
            +
                "50299": {
         | 
| 380 | 
            +
                  "content": "[unused14]",
         | 
| 381 | 
            +
                  "lstrip": false,
         | 
| 382 | 
            +
                  "normalized": true,
         | 
| 383 | 
            +
                  "rstrip": false,
         | 
| 384 | 
            +
                  "single_word": false,
         | 
| 385 | 
            +
                  "special": false
         | 
| 386 | 
            +
                },
         | 
| 387 | 
            +
                "50300": {
         | 
| 388 | 
            +
                  "content": "[unused15]",
         | 
| 389 | 
            +
                  "lstrip": false,
         | 
| 390 | 
            +
                  "normalized": true,
         | 
| 391 | 
            +
                  "rstrip": false,
         | 
| 392 | 
            +
                  "single_word": false,
         | 
| 393 | 
            +
                  "special": false
         | 
| 394 | 
            +
                },
         | 
| 395 | 
            +
                "50301": {
         | 
| 396 | 
            +
                  "content": "[unused16]",
         | 
| 397 | 
            +
                  "lstrip": false,
         | 
| 398 | 
            +
                  "normalized": true,
         | 
| 399 | 
            +
                  "rstrip": false,
         | 
| 400 | 
            +
                  "single_word": false,
         | 
| 401 | 
            +
                  "special": false
         | 
| 402 | 
            +
                },
         | 
| 403 | 
            +
                "50302": {
         | 
| 404 | 
            +
                  "content": "[unused17]",
         | 
| 405 | 
            +
                  "lstrip": false,
         | 
| 406 | 
            +
                  "normalized": true,
         | 
| 407 | 
            +
                  "rstrip": false,
         | 
| 408 | 
            +
                  "single_word": false,
         | 
| 409 | 
            +
                  "special": false
         | 
| 410 | 
            +
                },
         | 
| 411 | 
            +
                "50303": {
         | 
| 412 | 
            +
                  "content": "[unused18]",
         | 
| 413 | 
            +
                  "lstrip": false,
         | 
| 414 | 
            +
                  "normalized": true,
         | 
| 415 | 
            +
                  "rstrip": false,
         | 
| 416 | 
            +
                  "single_word": false,
         | 
| 417 | 
            +
                  "special": false
         | 
| 418 | 
            +
                },
         | 
| 419 | 
            +
                "50304": {
         | 
| 420 | 
            +
                  "content": "[unused19]",
         | 
| 421 | 
            +
                  "lstrip": false,
         | 
| 422 | 
            +
                  "normalized": true,
         | 
| 423 | 
            +
                  "rstrip": false,
         | 
| 424 | 
            +
                  "single_word": false,
         | 
| 425 | 
            +
                  "special": false
         | 
| 426 | 
            +
                },
         | 
| 427 | 
            +
                "50305": {
         | 
| 428 | 
            +
                  "content": "[unused20]",
         | 
| 429 | 
            +
                  "lstrip": false,
         | 
| 430 | 
            +
                  "normalized": true,
         | 
| 431 | 
            +
                  "rstrip": false,
         | 
| 432 | 
            +
                  "single_word": false,
         | 
| 433 | 
            +
                  "special": false
         | 
| 434 | 
            +
                },
         | 
| 435 | 
            +
                "50306": {
         | 
| 436 | 
            +
                  "content": "[unused21]",
         | 
| 437 | 
            +
                  "lstrip": false,
         | 
| 438 | 
            +
                  "normalized": true,
         | 
| 439 | 
            +
                  "rstrip": false,
         | 
| 440 | 
            +
                  "single_word": false,
         | 
| 441 | 
            +
                  "special": false
         | 
| 442 | 
            +
                },
         | 
| 443 | 
            +
                "50307": {
         | 
| 444 | 
            +
                  "content": "[unused22]",
         | 
| 445 | 
            +
                  "lstrip": false,
         | 
| 446 | 
            +
                  "normalized": true,
         | 
| 447 | 
            +
                  "rstrip": false,
         | 
| 448 | 
            +
                  "single_word": false,
         | 
| 449 | 
            +
                  "special": false
         | 
| 450 | 
            +
                },
         | 
| 451 | 
            +
                "50308": {
         | 
| 452 | 
            +
                  "content": "[unused23]",
         | 
| 453 | 
            +
                  "lstrip": false,
         | 
| 454 | 
            +
                  "normalized": true,
         | 
| 455 | 
            +
                  "rstrip": false,
         | 
| 456 | 
            +
                  "single_word": false,
         | 
| 457 | 
            +
                  "special": false
         | 
| 458 | 
            +
                },
         | 
| 459 | 
            +
                "50309": {
         | 
| 460 | 
            +
                  "content": "[unused24]",
         | 
| 461 | 
            +
                  "lstrip": false,
         | 
| 462 | 
            +
                  "normalized": true,
         | 
| 463 | 
            +
                  "rstrip": false,
         | 
| 464 | 
            +
                  "single_word": false,
         | 
| 465 | 
            +
                  "special": false
         | 
| 466 | 
            +
                },
         | 
| 467 | 
            +
                "50310": {
         | 
| 468 | 
            +
                  "content": "[unused25]",
         | 
| 469 | 
            +
                  "lstrip": false,
         | 
| 470 | 
            +
                  "normalized": true,
         | 
| 471 | 
            +
                  "rstrip": false,
         | 
| 472 | 
            +
                  "single_word": false,
         | 
| 473 | 
            +
                  "special": false
         | 
| 474 | 
            +
                },
         | 
| 475 | 
            +
                "50311": {
         | 
| 476 | 
            +
                  "content": "[unused26]",
         | 
| 477 | 
            +
                  "lstrip": false,
         | 
| 478 | 
            +
                  "normalized": true,
         | 
| 479 | 
            +
                  "rstrip": false,
         | 
| 480 | 
            +
                  "single_word": false,
         | 
| 481 | 
            +
                  "special": false
         | 
| 482 | 
            +
                },
         | 
| 483 | 
            +
                "50312": {
         | 
| 484 | 
            +
                  "content": "[unused27]",
         | 
| 485 | 
            +
                  "lstrip": false,
         | 
| 486 | 
            +
                  "normalized": true,
         | 
| 487 | 
            +
                  "rstrip": false,
         | 
| 488 | 
            +
                  "single_word": false,
         | 
| 489 | 
            +
                  "special": false
         | 
| 490 | 
            +
                },
         | 
| 491 | 
            +
                "50313": {
         | 
| 492 | 
            +
                  "content": "[unused28]",
         | 
| 493 | 
            +
                  "lstrip": false,
         | 
| 494 | 
            +
                  "normalized": true,
         | 
| 495 | 
            +
                  "rstrip": false,
         | 
| 496 | 
            +
                  "single_word": false,
         | 
| 497 | 
            +
                  "special": false
         | 
| 498 | 
            +
                },
         | 
| 499 | 
            +
                "50314": {
         | 
| 500 | 
            +
                  "content": "[unused29]",
         | 
| 501 | 
            +
                  "lstrip": false,
         | 
| 502 | 
            +
                  "normalized": true,
         | 
| 503 | 
            +
                  "rstrip": false,
         | 
| 504 | 
            +
                  "single_word": false,
         | 
| 505 | 
            +
                  "special": false
         | 
| 506 | 
            +
                },
         | 
| 507 | 
            +
                "50315": {
         | 
| 508 | 
            +
                  "content": "[unused30]",
         | 
| 509 | 
            +
                  "lstrip": false,
         | 
| 510 | 
            +
                  "normalized": true,
         | 
| 511 | 
            +
                  "rstrip": false,
         | 
| 512 | 
            +
                  "single_word": false,
         | 
| 513 | 
            +
                  "special": false
         | 
| 514 | 
            +
                },
         | 
| 515 | 
            +
                "50316": {
         | 
| 516 | 
            +
                  "content": "[unused31]",
         | 
| 517 | 
            +
                  "lstrip": false,
         | 
| 518 | 
            +
                  "normalized": true,
         | 
| 519 | 
            +
                  "rstrip": false,
         | 
| 520 | 
            +
                  "single_word": false,
         | 
| 521 | 
            +
                  "special": false
         | 
| 522 | 
            +
                },
         | 
| 523 | 
            +
                "50317": {
         | 
| 524 | 
            +
                  "content": "[unused32]",
         | 
| 525 | 
            +
                  "lstrip": false,
         | 
| 526 | 
            +
                  "normalized": true,
         | 
| 527 | 
            +
                  "rstrip": false,
         | 
| 528 | 
            +
                  "single_word": false,
         | 
| 529 | 
            +
                  "special": false
         | 
| 530 | 
            +
                },
         | 
| 531 | 
            +
                "50318": {
         | 
| 532 | 
            +
                  "content": "[unused33]",
         | 
| 533 | 
            +
                  "lstrip": false,
         | 
| 534 | 
            +
                  "normalized": true,
         | 
| 535 | 
            +
                  "rstrip": false,
         | 
| 536 | 
            +
                  "single_word": false,
         | 
| 537 | 
            +
                  "special": false
         | 
| 538 | 
            +
                },
         | 
| 539 | 
            +
                "50319": {
         | 
| 540 | 
            +
                  "content": "[unused34]",
         | 
| 541 | 
            +
                  "lstrip": false,
         | 
| 542 | 
            +
                  "normalized": true,
         | 
| 543 | 
            +
                  "rstrip": false,
         | 
| 544 | 
            +
                  "single_word": false,
         | 
| 545 | 
            +
                  "special": false
         | 
| 546 | 
            +
                },
         | 
| 547 | 
            +
                "50320": {
         | 
| 548 | 
            +
                  "content": "[unused35]",
         | 
| 549 | 
            +
                  "lstrip": false,
         | 
| 550 | 
            +
                  "normalized": true,
         | 
| 551 | 
            +
                  "rstrip": false,
         | 
| 552 | 
            +
                  "single_word": false,
         | 
| 553 | 
            +
                  "special": false
         | 
| 554 | 
            +
                },
         | 
| 555 | 
            +
                "50321": {
         | 
| 556 | 
            +
                  "content": "[unused36]",
         | 
| 557 | 
            +
                  "lstrip": false,
         | 
| 558 | 
            +
                  "normalized": true,
         | 
| 559 | 
            +
                  "rstrip": false,
         | 
| 560 | 
            +
                  "single_word": false,
         | 
| 561 | 
            +
                  "special": false
         | 
| 562 | 
            +
                },
         | 
| 563 | 
            +
                "50322": {
         | 
| 564 | 
            +
                  "content": "[unused37]",
         | 
| 565 | 
            +
                  "lstrip": false,
         | 
| 566 | 
            +
                  "normalized": true,
         | 
| 567 | 
            +
                  "rstrip": false,
         | 
| 568 | 
            +
                  "single_word": false,
         | 
| 569 | 
            +
                  "special": false
         | 
| 570 | 
            +
                },
         | 
| 571 | 
            +
                "50323": {
         | 
| 572 | 
            +
                  "content": "[unused38]",
         | 
| 573 | 
            +
                  "lstrip": false,
         | 
| 574 | 
            +
                  "normalized": true,
         | 
| 575 | 
            +
                  "rstrip": false,
         | 
| 576 | 
            +
                  "single_word": false,
         | 
| 577 | 
            +
                  "special": false
         | 
| 578 | 
            +
                },
         | 
| 579 | 
            +
                "50324": {
         | 
| 580 | 
            +
                  "content": "[unused39]",
         | 
| 581 | 
            +
                  "lstrip": false,
         | 
| 582 | 
            +
                  "normalized": true,
         | 
| 583 | 
            +
                  "rstrip": false,
         | 
| 584 | 
            +
                  "single_word": false,
         | 
| 585 | 
            +
                  "special": false
         | 
| 586 | 
            +
                },
         | 
| 587 | 
            +
                "50325": {
         | 
| 588 | 
            +
                  "content": "[unused40]",
         | 
| 589 | 
            +
                  "lstrip": false,
         | 
| 590 | 
            +
                  "normalized": true,
         | 
| 591 | 
            +
                  "rstrip": false,
         | 
| 592 | 
            +
                  "single_word": false,
         | 
| 593 | 
            +
                  "special": false
         | 
| 594 | 
            +
                },
         | 
| 595 | 
            +
                "50326": {
         | 
| 596 | 
            +
                  "content": "[unused41]",
         | 
| 597 | 
            +
                  "lstrip": false,
         | 
| 598 | 
            +
                  "normalized": true,
         | 
| 599 | 
            +
                  "rstrip": false,
         | 
| 600 | 
            +
                  "single_word": false,
         | 
| 601 | 
            +
                  "special": false
         | 
| 602 | 
            +
                },
         | 
| 603 | 
            +
                "50327": {
         | 
| 604 | 
            +
                  "content": "[unused42]",
         | 
| 605 | 
            +
                  "lstrip": false,
         | 
| 606 | 
            +
                  "normalized": true,
         | 
| 607 | 
            +
                  "rstrip": false,
         | 
| 608 | 
            +
                  "single_word": false,
         | 
| 609 | 
            +
                  "special": false
         | 
| 610 | 
            +
                },
         | 
| 611 | 
            +
                "50328": {
         | 
| 612 | 
            +
                  "content": "[unused43]",
         | 
| 613 | 
            +
                  "lstrip": false,
         | 
| 614 | 
            +
                  "normalized": true,
         | 
| 615 | 
            +
                  "rstrip": false,
         | 
| 616 | 
            +
                  "single_word": false,
         | 
| 617 | 
            +
                  "special": false
         | 
| 618 | 
            +
                },
         | 
| 619 | 
            +
                "50329": {
         | 
| 620 | 
            +
                  "content": "[unused44]",
         | 
| 621 | 
            +
                  "lstrip": false,
         | 
| 622 | 
            +
                  "normalized": true,
         | 
| 623 | 
            +
                  "rstrip": false,
         | 
| 624 | 
            +
                  "single_word": false,
         | 
| 625 | 
            +
                  "special": false
         | 
| 626 | 
            +
                },
         | 
| 627 | 
            +
                "50330": {
         | 
| 628 | 
            +
                  "content": "[unused45]",
         | 
| 629 | 
            +
                  "lstrip": false,
         | 
| 630 | 
            +
                  "normalized": true,
         | 
| 631 | 
            +
                  "rstrip": false,
         | 
| 632 | 
            +
                  "single_word": false,
         | 
| 633 | 
            +
                  "special": false
         | 
| 634 | 
            +
                },
         | 
| 635 | 
            +
                "50331": {
         | 
| 636 | 
            +
                  "content": "[unused46]",
         | 
| 637 | 
            +
                  "lstrip": false,
         | 
| 638 | 
            +
                  "normalized": true,
         | 
| 639 | 
            +
                  "rstrip": false,
         | 
| 640 | 
            +
                  "single_word": false,
         | 
| 641 | 
            +
                  "special": false
         | 
| 642 | 
            +
                },
         | 
| 643 | 
            +
                "50332": {
         | 
| 644 | 
            +
                  "content": "[unused47]",
         | 
| 645 | 
            +
                  "lstrip": false,
         | 
| 646 | 
            +
                  "normalized": true,
         | 
| 647 | 
            +
                  "rstrip": false,
         | 
| 648 | 
            +
                  "single_word": false,
         | 
| 649 | 
            +
                  "special": false
         | 
| 650 | 
            +
                },
         | 
| 651 | 
            +
                "50333": {
         | 
| 652 | 
            +
                  "content": "[unused48]",
         | 
| 653 | 
            +
                  "lstrip": false,
         | 
| 654 | 
            +
                  "normalized": true,
         | 
| 655 | 
            +
                  "rstrip": false,
         | 
| 656 | 
            +
                  "single_word": false,
         | 
| 657 | 
            +
                  "special": false
         | 
| 658 | 
            +
                },
         | 
| 659 | 
            +
                "50334": {
         | 
| 660 | 
            +
                  "content": "[unused49]",
         | 
| 661 | 
            +
                  "lstrip": false,
         | 
| 662 | 
            +
                  "normalized": true,
         | 
| 663 | 
            +
                  "rstrip": false,
         | 
| 664 | 
            +
                  "single_word": false,
         | 
| 665 | 
            +
                  "special": false
         | 
| 666 | 
            +
                },
         | 
| 667 | 
            +
                "50335": {
         | 
| 668 | 
            +
                  "content": "[unused50]",
         | 
| 669 | 
            +
                  "lstrip": false,
         | 
| 670 | 
            +
                  "normalized": true,
         | 
| 671 | 
            +
                  "rstrip": false,
         | 
| 672 | 
            +
                  "single_word": false,
         | 
| 673 | 
            +
                  "special": false
         | 
| 674 | 
            +
                },
         | 
| 675 | 
            +
                "50336": {
         | 
| 676 | 
            +
                  "content": "[unused51]",
         | 
| 677 | 
            +
                  "lstrip": false,
         | 
| 678 | 
            +
                  "normalized": true,
         | 
| 679 | 
            +
                  "rstrip": false,
         | 
| 680 | 
            +
                  "single_word": false,
         | 
| 681 | 
            +
                  "special": false
         | 
| 682 | 
            +
                },
         | 
| 683 | 
            +
                "50337": {
         | 
| 684 | 
            +
                  "content": "[unused52]",
         | 
| 685 | 
            +
                  "lstrip": false,
         | 
| 686 | 
            +
                  "normalized": true,
         | 
| 687 | 
            +
                  "rstrip": false,
         | 
| 688 | 
            +
                  "single_word": false,
         | 
| 689 | 
            +
                  "special": false
         | 
| 690 | 
            +
                },
         | 
| 691 | 
            +
                "50338": {
         | 
| 692 | 
            +
                  "content": "[unused53]",
         | 
| 693 | 
            +
                  "lstrip": false,
         | 
| 694 | 
            +
                  "normalized": true,
         | 
| 695 | 
            +
                  "rstrip": false,
         | 
| 696 | 
            +
                  "single_word": false,
         | 
| 697 | 
            +
                  "special": false
         | 
| 698 | 
            +
                },
         | 
| 699 | 
            +
                "50339": {
         | 
| 700 | 
            +
                  "content": "[unused54]",
         | 
| 701 | 
            +
                  "lstrip": false,
         | 
| 702 | 
            +
                  "normalized": true,
         | 
| 703 | 
            +
                  "rstrip": false,
         | 
| 704 | 
            +
                  "single_word": false,
         | 
| 705 | 
            +
                  "special": false
         | 
| 706 | 
            +
                },
         | 
| 707 | 
            +
                "50340": {
         | 
| 708 | 
            +
                  "content": "[unused55]",
         | 
| 709 | 
            +
                  "lstrip": false,
         | 
| 710 | 
            +
                  "normalized": true,
         | 
| 711 | 
            +
                  "rstrip": false,
         | 
| 712 | 
            +
                  "single_word": false,
         | 
| 713 | 
            +
                  "special": false
         | 
| 714 | 
            +
                },
         | 
| 715 | 
            +
                "50341": {
         | 
| 716 | 
            +
                  "content": "[unused56]",
         | 
| 717 | 
            +
                  "lstrip": false,
         | 
| 718 | 
            +
                  "normalized": true,
         | 
| 719 | 
            +
                  "rstrip": false,
         | 
| 720 | 
            +
                  "single_word": false,
         | 
| 721 | 
            +
                  "special": false
         | 
| 722 | 
            +
                },
         | 
| 723 | 
            +
                "50342": {
         | 
| 724 | 
            +
                  "content": "[unused57]",
         | 
| 725 | 
            +
                  "lstrip": false,
         | 
| 726 | 
            +
                  "normalized": true,
         | 
| 727 | 
            +
                  "rstrip": false,
         | 
| 728 | 
            +
                  "single_word": false,
         | 
| 729 | 
            +
                  "special": false
         | 
| 730 | 
            +
                },
         | 
| 731 | 
            +
                "50343": {
         | 
| 732 | 
            +
                  "content": "[unused58]",
         | 
| 733 | 
            +
                  "lstrip": false,
         | 
| 734 | 
            +
                  "normalized": true,
         | 
| 735 | 
            +
                  "rstrip": false,
         | 
| 736 | 
            +
                  "single_word": false,
         | 
| 737 | 
            +
                  "special": false
         | 
| 738 | 
            +
                },
         | 
| 739 | 
            +
                "50344": {
         | 
| 740 | 
            +
                  "content": "[unused59]",
         | 
| 741 | 
            +
                  "lstrip": false,
         | 
| 742 | 
            +
                  "normalized": true,
         | 
| 743 | 
            +
                  "rstrip": false,
         | 
| 744 | 
            +
                  "single_word": false,
         | 
| 745 | 
            +
                  "special": false
         | 
| 746 | 
            +
                },
         | 
| 747 | 
            +
                "50345": {
         | 
| 748 | 
            +
                  "content": "[unused60]",
         | 
| 749 | 
            +
                  "lstrip": false,
         | 
| 750 | 
            +
                  "normalized": true,
         | 
| 751 | 
            +
                  "rstrip": false,
         | 
| 752 | 
            +
                  "single_word": false,
         | 
| 753 | 
            +
                  "special": false
         | 
| 754 | 
            +
                },
         | 
| 755 | 
            +
                "50346": {
         | 
| 756 | 
            +
                  "content": "[unused61]",
         | 
| 757 | 
            +
                  "lstrip": false,
         | 
| 758 | 
            +
                  "normalized": true,
         | 
| 759 | 
            +
                  "rstrip": false,
         | 
| 760 | 
            +
                  "single_word": false,
         | 
| 761 | 
            +
                  "special": false
         | 
| 762 | 
            +
                },
         | 
| 763 | 
            +
                "50347": {
         | 
| 764 | 
            +
                  "content": "[unused62]",
         | 
| 765 | 
            +
                  "lstrip": false,
         | 
| 766 | 
            +
                  "normalized": true,
         | 
| 767 | 
            +
                  "rstrip": false,
         | 
| 768 | 
            +
                  "single_word": false,
         | 
| 769 | 
            +
                  "special": false
         | 
| 770 | 
            +
                },
         | 
| 771 | 
            +
                "50348": {
         | 
| 772 | 
            +
                  "content": "[unused63]",
         | 
| 773 | 
            +
                  "lstrip": false,
         | 
| 774 | 
            +
                  "normalized": true,
         | 
| 775 | 
            +
                  "rstrip": false,
         | 
| 776 | 
            +
                  "single_word": false,
         | 
| 777 | 
            +
                  "special": false
         | 
| 778 | 
            +
                },
         | 
| 779 | 
            +
                "50349": {
         | 
| 780 | 
            +
                  "content": "[unused64]",
         | 
| 781 | 
            +
                  "lstrip": false,
         | 
| 782 | 
            +
                  "normalized": true,
         | 
| 783 | 
            +
                  "rstrip": false,
         | 
| 784 | 
            +
                  "single_word": false,
         | 
| 785 | 
            +
                  "special": false
         | 
| 786 | 
            +
                },
         | 
| 787 | 
            +
                "50350": {
         | 
| 788 | 
            +
                  "content": "[unused65]",
         | 
| 789 | 
            +
                  "lstrip": false,
         | 
| 790 | 
            +
                  "normalized": true,
         | 
| 791 | 
            +
                  "rstrip": false,
         | 
| 792 | 
            +
                  "single_word": false,
         | 
| 793 | 
            +
                  "special": false
         | 
| 794 | 
            +
                },
         | 
| 795 | 
            +
                "50351": {
         | 
| 796 | 
            +
                  "content": "[unused66]",
         | 
| 797 | 
            +
                  "lstrip": false,
         | 
| 798 | 
            +
                  "normalized": true,
         | 
| 799 | 
            +
                  "rstrip": false,
         | 
| 800 | 
            +
                  "single_word": false,
         | 
| 801 | 
            +
                  "special": false
         | 
| 802 | 
            +
                },
         | 
| 803 | 
            +
                "50352": {
         | 
| 804 | 
            +
                  "content": "[unused67]",
         | 
| 805 | 
            +
                  "lstrip": false,
         | 
| 806 | 
            +
                  "normalized": true,
         | 
| 807 | 
            +
                  "rstrip": false,
         | 
| 808 | 
            +
                  "single_word": false,
         | 
| 809 | 
            +
                  "special": false
         | 
| 810 | 
            +
                },
         | 
| 811 | 
            +
                "50353": {
         | 
| 812 | 
            +
                  "content": "[unused68]",
         | 
| 813 | 
            +
                  "lstrip": false,
         | 
| 814 | 
            +
                  "normalized": true,
         | 
| 815 | 
            +
                  "rstrip": false,
         | 
| 816 | 
            +
                  "single_word": false,
         | 
| 817 | 
            +
                  "special": false
         | 
| 818 | 
            +
                },
         | 
| 819 | 
            +
                "50354": {
         | 
| 820 | 
            +
                  "content": "[unused69]",
         | 
| 821 | 
            +
                  "lstrip": false,
         | 
| 822 | 
            +
                  "normalized": true,
         | 
| 823 | 
            +
                  "rstrip": false,
         | 
| 824 | 
            +
                  "single_word": false,
         | 
| 825 | 
            +
                  "special": false
         | 
| 826 | 
            +
                },
         | 
| 827 | 
            +
                "50355": {
         | 
| 828 | 
            +
                  "content": "[unused70]",
         | 
| 829 | 
            +
                  "lstrip": false,
         | 
| 830 | 
            +
                  "normalized": true,
         | 
| 831 | 
            +
                  "rstrip": false,
         | 
| 832 | 
            +
                  "single_word": false,
         | 
| 833 | 
            +
                  "special": false
         | 
| 834 | 
            +
                },
         | 
| 835 | 
            +
                "50356": {
         | 
| 836 | 
            +
                  "content": "[unused71]",
         | 
| 837 | 
            +
                  "lstrip": false,
         | 
| 838 | 
            +
                  "normalized": true,
         | 
| 839 | 
            +
                  "rstrip": false,
         | 
| 840 | 
            +
                  "single_word": false,
         | 
| 841 | 
            +
                  "special": false
         | 
| 842 | 
            +
                },
         | 
| 843 | 
            +
                "50357": {
         | 
| 844 | 
            +
                  "content": "[unused72]",
         | 
| 845 | 
            +
                  "lstrip": false,
         | 
| 846 | 
            +
                  "normalized": true,
         | 
| 847 | 
            +
                  "rstrip": false,
         | 
| 848 | 
            +
                  "single_word": false,
         | 
| 849 | 
            +
                  "special": false
         | 
| 850 | 
            +
                },
         | 
| 851 | 
            +
                "50358": {
         | 
| 852 | 
            +
                  "content": "[unused73]",
         | 
| 853 | 
            +
                  "lstrip": false,
         | 
| 854 | 
            +
                  "normalized": true,
         | 
| 855 | 
            +
                  "rstrip": false,
         | 
| 856 | 
            +
                  "single_word": false,
         | 
| 857 | 
            +
                  "special": false
         | 
| 858 | 
            +
                },
         | 
| 859 | 
            +
                "50359": {
         | 
| 860 | 
            +
                  "content": "[unused74]",
         | 
| 861 | 
            +
                  "lstrip": false,
         | 
| 862 | 
            +
                  "normalized": true,
         | 
| 863 | 
            +
                  "rstrip": false,
         | 
| 864 | 
            +
                  "single_word": false,
         | 
| 865 | 
            +
                  "special": false
         | 
| 866 | 
            +
                },
         | 
| 867 | 
            +
                "50360": {
         | 
| 868 | 
            +
                  "content": "[unused75]",
         | 
| 869 | 
            +
                  "lstrip": false,
         | 
| 870 | 
            +
                  "normalized": true,
         | 
| 871 | 
            +
                  "rstrip": false,
         | 
| 872 | 
            +
                  "single_word": false,
         | 
| 873 | 
            +
                  "special": false
         | 
| 874 | 
            +
                },
         | 
| 875 | 
            +
                "50361": {
         | 
| 876 | 
            +
                  "content": "[unused76]",
         | 
| 877 | 
            +
                  "lstrip": false,
         | 
| 878 | 
            +
                  "normalized": true,
         | 
| 879 | 
            +
                  "rstrip": false,
         | 
| 880 | 
            +
                  "single_word": false,
         | 
| 881 | 
            +
                  "special": false
         | 
| 882 | 
            +
                },
         | 
| 883 | 
            +
                "50362": {
         | 
| 884 | 
            +
                  "content": "[unused77]",
         | 
| 885 | 
            +
                  "lstrip": false,
         | 
| 886 | 
            +
                  "normalized": true,
         | 
| 887 | 
            +
                  "rstrip": false,
         | 
| 888 | 
            +
                  "single_word": false,
         | 
| 889 | 
            +
                  "special": false
         | 
| 890 | 
            +
                },
         | 
| 891 | 
            +
                "50363": {
         | 
| 892 | 
            +
                  "content": "[unused78]",
         | 
| 893 | 
            +
                  "lstrip": false,
         | 
| 894 | 
            +
                  "normalized": true,
         | 
| 895 | 
            +
                  "rstrip": false,
         | 
| 896 | 
            +
                  "single_word": false,
         | 
| 897 | 
            +
                  "special": false
         | 
| 898 | 
            +
                },
         | 
| 899 | 
            +
                "50364": {
         | 
| 900 | 
            +
                  "content": "[unused79]",
         | 
| 901 | 
            +
                  "lstrip": false,
         | 
| 902 | 
            +
                  "normalized": true,
         | 
| 903 | 
            +
                  "rstrip": false,
         | 
| 904 | 
            +
                  "single_word": false,
         | 
| 905 | 
            +
                  "special": false
         | 
| 906 | 
            +
                },
         | 
| 907 | 
            +
                "50365": {
         | 
| 908 | 
            +
                  "content": "[unused80]",
         | 
| 909 | 
            +
                  "lstrip": false,
         | 
| 910 | 
            +
                  "normalized": true,
         | 
| 911 | 
            +
                  "rstrip": false,
         | 
| 912 | 
            +
                  "single_word": false,
         | 
| 913 | 
            +
                  "special": false
         | 
| 914 | 
            +
                },
         | 
| 915 | 
            +
                "50366": {
         | 
| 916 | 
            +
                  "content": "[unused81]",
         | 
| 917 | 
            +
                  "lstrip": false,
         | 
| 918 | 
            +
                  "normalized": true,
         | 
| 919 | 
            +
                  "rstrip": false,
         | 
| 920 | 
            +
                  "single_word": false,
         | 
| 921 | 
            +
                  "special": false
         | 
| 922 | 
            +
                },
         | 
| 923 | 
            +
                "50367": {
         | 
| 924 | 
            +
                  "content": "[unused82]",
         | 
| 925 | 
            +
                  "lstrip": false,
         | 
| 926 | 
            +
                  "normalized": true,
         | 
| 927 | 
            +
                  "rstrip": false,
         | 
| 928 | 
            +
                  "single_word": false,
         | 
| 929 | 
            +
                  "special": false
         | 
| 930 | 
            +
                }
         | 
| 931 | 
            +
              },
         | 
| 932 | 
            +
              "clean_up_tokenization_spaces": true,
         | 
| 933 | 
            +
              "cls_token": "[CLS]",
         | 
| 934 | 
            +
              "extra_special_tokens": {},
         | 
| 935 | 
            +
              "mask_token": "[MASK]",
         | 
| 936 | 
            +
              "model_input_names": [
         | 
| 937 | 
            +
                "input_ids",
         | 
| 938 | 
            +
                "attention_mask"
         | 
| 939 | 
            +
              ],
         | 
| 940 | 
            +
              "model_max_length": 8192,
         | 
| 941 | 
            +
              "pad_token": "[PAD]",
         | 
| 942 | 
            +
              "sep_token": "[SEP]",
         | 
| 943 | 
            +
              "tokenizer_class": "PreTrainedTokenizer",
         | 
| 944 | 
            +
              "unk_token": "[UNK]"
         | 
| 945 | 
            +
            }
         | 
    	
        train_utils.py
    ADDED
    
    | @@ -0,0 +1,156 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import math
         | 
| 2 | 
            +
            from torch import nn
         | 
| 3 | 
            +
            import torch
         | 
| 4 | 
            +
            import torch.nn.functional as F
         | 
| 5 | 
            +
             | 
| 6 | 
            +
             | 
| 7 | 
            +
            class SentimentWeightedLoss(nn.Module):
         | 
| 8 | 
            +
                """BCEWithLogits + dynamic weighting.
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                We weight each sample by:
         | 
| 11 | 
            +
                  • length_weight:  sqrt(num_tokens) / sqrt(max_tokens)
         | 
| 12 | 
            +
                  • confidence_weight: |sigmoid(logits) - 0.5|  (higher confidence ⇒ larger weight)
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                The two weights are combined multiplicatively then normalized.
         | 
| 15 | 
            +
                """
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def __init__(self):
         | 
| 18 | 
            +
                    super().__init__()
         | 
| 19 | 
            +
                    # Initialize BCE loss without reduction, since we're applying per-sample weights
         | 
| 20 | 
            +
                    self.bce = nn.BCEWithLogitsLoss(reduction="none")
         | 
| 21 | 
            +
                    self.min_len_weight_sqrt = 0.1  # Minimum length weight
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def forward(self, logits, targets, lengths):
         | 
| 24 | 
            +
                    base_loss = self.bce(logits.view(-1), targets.float())  # shape [B]
         | 
| 25 | 
            +
                
         | 
| 26 | 
            +
                    prob = torch.sigmoid(logits.view(-1))
         | 
| 27 | 
            +
                    confidence_weight = (prob - 0.5).abs() * 2  # ∈ [0,1]
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    if lengths.numel() == 0:
         | 
| 30 | 
            +
                        # Handle empty batch: return 0.0 loss or mean of base_loss if it's also empty (becomes nan then)
         | 
| 31 | 
            +
                        # If base_loss on empty input is empty tensor, mean is nan. So return 0.0 is safer.
         | 
| 32 | 
            +
                        return torch.tensor(0.0, device=logits.device, requires_grad=logits.requires_grad)
         | 
| 33 | 
            +
                    
         | 
| 34 | 
            +
                    length_weight = torch.sqrt(lengths.float()) / math.sqrt(lengths.max().item())
         | 
| 35 | 
            +
                    length_weight = length_weight.clamp(self.min_len_weight_sqrt, 1.0) # Clamp to avoid extreme weights
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    weights = confidence_weight * length_weight
         | 
| 38 | 
            +
                    weights = weights / (weights.mean() + 1e-8)  # normalize so E[w]=1
         | 
| 39 | 
            +
                    return (base_loss * weights).mean()
         | 
| 40 | 
            +
             | 
| 41 | 
            +
             | 
| 42 | 
            +
             | 
| 43 | 
            +
             | 
| 44 | 
            +
            class SentimentFocalLoss(nn.Module):
         | 
| 45 | 
            +
                """
         | 
| 46 | 
            +
                This loss function incorporates:
         | 
| 47 | 
            +
                1. Base BCEWithLogitsLoss.
         | 
| 48 | 
            +
                2. Label Smoothing.
         | 
| 49 | 
            +
                3. Focal Loss modulation to focus more on hard examples (can be reversed to focus on easy examples).
         | 
| 50 | 
            +
                4. Sample weighting based on review length.
         | 
| 51 | 
            +
                5. Sample weighting based on prediction confidence.
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                The final loss for each sample is calculated roughly as:
         | 
| 54 | 
            +
                Loss_sample = FocalModulator(pt, gamma) * BCE(logits, smoothed_targets) * NormalizedExternalWeight
         | 
| 55 | 
            +
                NormalizedExternalWeight = (ConfidenceWeight * LengthWeight) / Mean(ConfidenceWeight * LengthWeight)
         | 
| 56 | 
            +
                """
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                def __init__(self, gamma_focal: float = 0.1, label_smoothing_epsilon: float = 0.05):
         | 
| 59 | 
            +
                    """
         | 
| 60 | 
            +
                    Args:
         | 
| 61 | 
            +
                        gamma_focal (float): Gamma parameter for Focal Loss.
         | 
| 62 | 
            +
                            - If gamma_focal > 0 (e.g., 2.0), applies standard Focal Loss,
         | 
| 63 | 
            +
                              down-weighting easy examples (focus on hard examples).
         | 
| 64 | 
            +
                            - If gamma_focal < 0 (e.g., -2.0), applies a reversed Focal Loss,
         | 
| 65 | 
            +
                              down-weighting hard examples (focus on easy examples by up-weighting pt).
         | 
| 66 | 
            +
                            - If gamma_focal = 0, no Focal Loss modulation is applied.
         | 
| 67 | 
            +
                        label_smoothing_epsilon (float): Epsilon for label smoothing. (0.0 <= epsilon < 1.0)
         | 
| 68 | 
            +
                            - If 0.0, no label smoothing is applied. Converts hard labels (0, 1)
         | 
| 69 | 
            +
                              to soft labels (epsilon, 1-epsilon).
         | 
| 70 | 
            +
                    """
         | 
| 71 | 
            +
                    super().__init__()
         | 
| 72 | 
            +
                    if not (0.0 <= label_smoothing_epsilon < 1.0):
         | 
| 73 | 
            +
                        raise ValueError("label_smoothing_epsilon must be between 0.0 and <1.0.")
         | 
| 74 | 
            +
                    
         | 
| 75 | 
            +
                    self.gamma_focal = gamma_focal
         | 
| 76 | 
            +
                    self.label_smoothing_epsilon = label_smoothing_epsilon
         | 
| 77 | 
            +
                    # Initialize BCE loss without reduction, since we're applying per-sample weights
         | 
| 78 | 
            +
                    self.bce_loss_no_reduction = nn.BCEWithLogitsLoss(reduction="none")
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                def forward(self, logits: torch.Tensor, targets: torch.Tensor, lengths: torch.Tensor) -> torch.Tensor:
         | 
| 81 | 
            +
                    """
         | 
| 82 | 
            +
                    Computes the custom loss.
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                    Args:
         | 
| 85 | 
            +
                        logits (torch.Tensor): Raw logits from the model. Expected shape [B] or [B, 1].
         | 
| 86 | 
            +
                        targets (torch.Tensor): Ground truth labels (0 or 1). Expected shape [B] or [B, 1].
         | 
| 87 | 
            +
                        lengths (torch.Tensor): Number of tokens in each review. Expected shape [B].
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                    Returns:
         | 
| 90 | 
            +
                        torch.Tensor: The computed scalar loss.
         | 
| 91 | 
            +
                    """
         | 
| 92 | 
            +
                    B = logits.size(0)
         | 
| 93 | 
            +
                    if B == 0: # Handle empty batch case
         | 
| 94 | 
            +
                        return torch.tensor(0.0, device=logits.device, requires_grad=True)
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                    logits_flat = logits.view(-1)
         | 
| 97 | 
            +
                    original_targets_flat = targets.view(-1).float() # Ensure targets are float
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                    # 1. Label Smoothing
         | 
| 100 | 
            +
                    if self.label_smoothing_epsilon > 0:
         | 
| 101 | 
            +
                        # Smooth 1 to (1 - epsilon), and 0 to epsilon
         | 
| 102 | 
            +
                        targets_for_bce = original_targets_flat * (1.0 - self.label_smoothing_epsilon) + \
         | 
| 103 | 
            +
                                          (1.0 - original_targets_flat) * self.label_smoothing_epsilon
         | 
| 104 | 
            +
                    else:
         | 
| 105 | 
            +
                        targets_for_bce = original_targets_flat
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    # 2. Calculate Base BCE loss terms (using potentially smoothed targets)
         | 
| 108 | 
            +
                    base_bce_loss_terms = self.bce_loss_no_reduction(logits_flat, targets_for_bce)
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                    # 3. Focal Loss Modulation Component
         | 
| 111 | 
            +
                    # For the focal modulator, 'pt' is the probability assigned by the model to the *original* ground truth class.
         | 
| 112 | 
            +
                    probs = torch.sigmoid(logits_flat)
         | 
| 113 | 
            +
                    # pt: probability of the original true class
         | 
| 114 | 
            +
                    pt = torch.where(original_targets_flat.bool(), probs, 1.0 - probs)
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                    focal_modulator = torch.ones_like(pt) # Default to 1 (no modulation if gamma_focal is 0)
         | 
| 117 | 
            +
                    if self.gamma_focal > 0:  # Standard Focal Loss: (1-pt)^gamma. Focus on hard examples (pt is small).
         | 
| 118 | 
            +
                        focal_modulator = (1.0 - pt + 1e-8).pow(self.gamma_focal) # Epsilon for stability if pt is 1
         | 
| 119 | 
            +
                    elif self.gamma_focal < 0:  # Reversed Focal: (pt)^|gamma|. Focus on easy examples (pt is large).
         | 
| 120 | 
            +
                        focal_modulator = (pt + 1e-8).pow(abs(self.gamma_focal)) # Epsilon for stability if pt is 0
         | 
| 121 | 
            +
                    
         | 
| 122 | 
            +
                    modulated_loss_terms = focal_modulator * base_bce_loss_terms
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                    # 4. Confidence Weighting (based on how far probability is from 0.5)
         | 
| 125 | 
            +
                    # Uses the same `probs` calculated for focal `pt`.
         | 
| 126 | 
            +
                    confidence_w = (probs - 0.5).abs() * 2.0  # Scales to range [0, 1]
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                    # 5. Length Weighting (longer reviews potentially weighted more)
         | 
| 129 | 
            +
                    lengths_flat = lengths.view(-1).float()
         | 
| 130 | 
            +
                    max_len_in_batch = lengths_flat.max().item()
         | 
| 131 | 
            +
                    
         | 
| 132 | 
            +
                    if max_len_in_batch == 0: # Edge case: if all reviews in batch have 0 length
         | 
| 133 | 
            +
                        length_w = torch.ones_like(lengths_flat)
         | 
| 134 | 
            +
                    else:
         | 
| 135 | 
            +
                        # Normalize by sqrt of max length in the current batch. Add epsilon for stability.
         | 
| 136 | 
            +
                        length_w = torch.sqrt(lengths_flat) / (math.sqrt(max_len_in_batch) + 1e-8)
         | 
| 137 | 
            +
                        length_w = torch.clamp(length_w, 0.0, 1.0) # Ensure weights are capped at 1
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                    # 6. Combine External Weights (Confidence and Length)
         | 
| 140 | 
            +
                    # These weights are applied ON TOP of the focal-modulated loss terms.
         | 
| 141 | 
            +
                    external_weights = confidence_w * length_w
         | 
| 142 | 
            +
                    
         | 
| 143 | 
            +
                    # Normalize these combined external_weights so their mean is approximately 1.
         | 
| 144 | 
            +
                    # This prevents the weighting scheme from drastically changing the overall loss magnitude.
         | 
| 145 | 
            +
                    if external_weights.sum() > 1e-8: # Avoid division by zero if all weights are zero
         | 
| 146 | 
            +
                         normalized_external_weights = external_weights / (external_weights.mean() + 1e-8)
         | 
| 147 | 
            +
                    else: # If all external weights are zero, use ones to not nullify the loss.
         | 
| 148 | 
            +
                         normalized_external_weights = torch.ones_like(external_weights)
         | 
| 149 | 
            +
             | 
| 150 | 
            +
                    # 7. Apply Normalized External Weights to the (Focal) Modulated Loss Terms
         | 
| 151 | 
            +
                    final_loss_terms_per_sample = modulated_loss_terms * normalized_external_weights
         | 
| 152 | 
            +
                    
         | 
| 153 | 
            +
                    # 8. Final Reduction: Mean of the per-sample losses
         | 
| 154 | 
            +
                    loss = final_loss_terms_per_sample.mean()
         | 
| 155 | 
            +
                    
         | 
| 156 | 
            +
                    return loss
         |