Upload fine-tuned model, tokenizer, and supporting files for modernbert-imdb-sentiment

Browse files

Files changed (4) hide show

config.json +53 -0
config.yaml +13 -0
inference.py +114 -0
pytorch_model.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "architectures": [
+    "ModernBertForSentiment"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 50281,
+  "classifier_activation": "gelu",
+  "classifier_bias": false,
+  "classifier_dropout": 0.0,
+  "classifier_pooling": "mean",
+  "cls_token_id": 50281,
+  "decoder_bias": true,
+  "deterministic_flash_attn": false,
+  "embedding_dropout": 0.0,
+  "eos_token_id": 50282,
+  "global_attn_every_n_layers": 3,
+  "global_rope_theta": 160000.0,
+  "gradient_checkpointing": false,
+  "hidden_activation": "gelu",
+  "hidden_size": 768,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE"
+  },
+  "initializer_cutoff_factor": 2.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 1152,
+  "label2id": {
+    "NEGATIVE": 0,
+    "POSITIVE": 1
+  },
+  "layer_norm_eps": 1e-05,
+  "local_attention": 128,
+  "local_rope_theta": 10000.0,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "mlp_dropout": 0.0,
+  "model_type": "modernbert",
+  "norm_bias": false,
+  "norm_eps": 1e-05,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 22,
+  "pad_token_id": 50283,
+  "position_embedding_type": "absolute",
+  "repad_logits_with_grad": false,
+  "sep_token_id": 50282,
+  "sparse_pred_ignore_index": -100,
+  "sparse_prediction": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.51.3",
+  "vocab_size": 50368
+}

config.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+model:
+  name_or_path: "voxmenthe/modernbert-imdb-sentiment"
+  tokenizer_name_or_path: "answerdotai/ModernBERT-base"
+  max_length: 880 # 256
+  dropout: 0.1
+  pooling_strategy: "mean" # Current default, change as needed
+  num_weighted_layers: 6 # Match original training config
+inference:
+  # Default path, can be overridden
+  model_path: "voxmenthe/modernbert-imdb-sentiment"
+  # Using the same max_length as training for consistency
+  max_length: 880 # 256

inference.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, ModernBertConfig
+from typing import Dict, Any
+import yaml
+import os
+from models import ModernBertForSentiment
+class SentimentInference:
+    def __init__(self, config_path: str = "config.yaml"):
+        """Load configuration and initialize model and tokenizer from local checkpoint or Hugging Face Hub."""
+        print(f"--- Debug: SentimentInference __init__ received config_path: {config_path} ---") # Add this
+        with open(config_path, 'r') as f:
+            config_data = yaml.safe_load(f)
+        print(f"--- Debug: SentimentInference loaded config_data: {config_data} ---") # Add this
+        model_yaml_cfg = config_data.get('model', {})
+        inference_yaml_cfg = config_data.get('inference', {})
+        model_hf_repo_id = model_yaml_cfg.get('name_or_path')
+        tokenizer_hf_repo_id = model_yaml_cfg.get('tokenizer_name_or_path', model_hf_repo_id)
+        local_model_weights_path = inference_yaml_cfg.get('model_path') # Path for local .pt file
+        print(f"--- Debug: model_hf_repo_id: {model_hf_repo_id} ---") # Add this
+        print(f"--- Debug: local_model_weights_path: {local_model_weights_path} ---") # Add this
+        self.max_length = inference_yaml_cfg.get('max_length', model_yaml_cfg.get('max_length', 512))
+        # --- Tokenizer Loading (always from Hub for now, or could be made conditional) ---
+        if not tokenizer_hf_repo_id and not model_hf_repo_id:
+            raise ValueError("Either model.tokenizer_name_or_path or model.name_or_path (as fallback for tokenizer) must be specified in config.yaml")
+        effective_tokenizer_repo_id = tokenizer_hf_repo_id or model_hf_repo_id
+        print(f"Loading tokenizer from: {effective_tokenizer_repo_id}")
+        self.tokenizer = AutoTokenizer.from_pretrained(effective_tokenizer_repo_id)
+        # --- Model Loading --- #
+        # Determine if we are loading from a local .pt file or from Hugging Face Hub
+        load_from_local_pt = False
+        if local_model_weights_path and os.path.isfile(local_model_weights_path):
+            print(f"Found local model weights path: {local_model_weights_path}")
+            print(f"--- Debug: Found local model weights path: {local_model_weights_path} ---") # Add this
+            load_from_local_pt = True
+        elif not model_hf_repo_id:
+            raise ValueError("No local model_path found and model.name_or_path (for Hub) is not specified in config.yaml")
+        print(f"--- Debug: load_from_local_pt is: {load_from_local_pt} ---") # Add this
+        if load_from_local_pt:
+            print("Attempting to load model from local .pt checkpoint...")
+            print("--- Debug: Entering LOCAL .pt loading path ---") # Add this
+            # Base BERT config must still be loaded, usually from a Hub ID (e.g., original base model)
+            # This base_model_for_config_id is crucial for building the correct ModernBertForSentiment structure.
+            base_model_for_config_id = model_yaml_cfg.get('base_model_for_config', model_hf_repo_id or tokenizer_hf_repo_id)
+            print(f"--- Debug: base_model_for_config_id (for local .pt): {base_model_for_config_id} ---") # Add this
+            if not base_model_for_config_id:
+                 raise ValueError("For local .pt loading, model.base_model_for_config must be specified in config.yaml (e.g., 'answerdotai/ModernBERT-base') to build the model structure.")
+            print(f"Loading ModernBertConfig for structure from: {base_model_for_config_id}")
+            bert_config = ModernBertConfig.from_pretrained(base_model_for_config_id)
+            # Augment config with parameters from model_yaml_cfg
+            bert_config.pooling_strategy = model_yaml_cfg.get('pooling_strategy', 'mean')
+            bert_config.num_weighted_layers = model_yaml_cfg.get('num_weighted_layers', 4)
+            bert_config.classifier_dropout = model_yaml_cfg.get('dropout')
+            bert_config.num_labels = model_yaml_cfg.get('num_labels', 1)
+            # bert_config.loss_function = model_yaml_cfg.get('loss_function') # If needed by __init__
+            print("Instantiating ModernBertForSentiment model structure...")
+            self.model = ModernBertForSentiment(bert_config)
+            print(f"Loading model weights from local checkpoint: {local_model_weights_path}")
+            checkpoint = torch.load(local_model_weights_path, map_location=torch.device('cpu'))
+            if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
+                model_state_to_load = checkpoint['model_state_dict']
+            else:
+                model_state_to_load = checkpoint # Assume it's the state_dict itself
+            self.model.load_state_dict(model_state_to_load)
+            print(f"Model loaded successfully from local checkpoint: {local_model_weights_path}.")
+        else: # Load from Hugging Face Hub
+            print(f"Attempting to load model from Hugging Face Hub: {model_hf_repo_id}...")
+            print(f"--- Debug: Entering HUGGING FACE HUB loading path ---") # Add this
+            print(f"--- Debug: model_hf_repo_id (for Hub loading): {model_hf_repo_id} ---") # Add this
+            if not model_hf_repo_id:
+                raise ValueError("model.name_or_path must be specified in config.yaml for Hub loading.")
+            print(f"Loading base ModernBertConfig from: {model_hf_repo_id}")
+            loaded_config = ModernBertConfig.from_pretrained(model_hf_repo_id)
+            # Augment loaded_config
+            loaded_config.pooling_strategy = model_yaml_cfg.get('pooling_strategy', 'mean')
+            loaded_config.num_weighted_layers = model_yaml_cfg.get('num_weighted_layers', 6) # Default to 6 now
+            loaded_config.classifier_dropout = model_yaml_cfg.get('dropout')
+            loaded_config.num_labels = model_yaml_cfg.get('num_labels', 1)
+            print(f"Instantiating and loading model weights for {model_hf_repo_id}...")
+            self.model = AutoModelForSequenceClassification.from_pretrained(
+                model_hf_repo_id,
+                config=loaded_config,
+                trust_remote_code=True,
+                force_download=True  # <--- TEMPORARY - remove when everything is working
+            )
+            print(f"Model {model_hf_repo_id} loaded successfully from Hugging Face Hub.")
+        self.model.eval()
+    def predict(self, text: str) -> Dict[str, Any]:
+        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length, padding=True)
+        with torch.no_grad():
+            outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
+        logits = outputs.get("logits") # Use .get for safety
+        if logits is None:
+            raise ValueError("Model output did not contain 'logits'. Check model's forward pass.")
+        prob = torch.sigmoid(logits).item()
+        return {"sentiment": "positive" if prob > 0.5 else "negative", "confidence": prob}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4df4e2674170e253063392d4bd819656062aede34efe70fafe650ee033c46044
+size 600842787