voxmenthe commited on
Commit
012ebc4
·
verified ·
1 Parent(s): adca32d

Upload fine-tuned model, tokenizer, and supporting files for modernbert-imdb-sentiment

Browse files
Files changed (4) hide show
  1. config.json +53 -0
  2. config.yaml +13 -0
  3. inference.py +114 -0
  4. pytorch_model.bin +3 -0
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertForSentiment"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 50281,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "embedding_dropout": 0.0,
16
+ "eos_token_id": 50282,
17
+ "global_attn_every_n_layers": 3,
18
+ "global_rope_theta": 160000.0,
19
+ "gradient_checkpointing": false,
20
+ "hidden_activation": "gelu",
21
+ "hidden_size": 768,
22
+ "id2label": {
23
+ "0": "NEGATIVE",
24
+ "1": "POSITIVE"
25
+ },
26
+ "initializer_cutoff_factor": 2.0,
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 1152,
29
+ "label2id": {
30
+ "NEGATIVE": 0,
31
+ "POSITIVE": 1
32
+ },
33
+ "layer_norm_eps": 1e-05,
34
+ "local_attention": 128,
35
+ "local_rope_theta": 10000.0,
36
+ "max_position_embeddings": 8192,
37
+ "mlp_bias": false,
38
+ "mlp_dropout": 0.0,
39
+ "model_type": "modernbert",
40
+ "norm_bias": false,
41
+ "norm_eps": 1e-05,
42
+ "num_attention_heads": 12,
43
+ "num_hidden_layers": 22,
44
+ "pad_token_id": 50283,
45
+ "position_embedding_type": "absolute",
46
+ "repad_logits_with_grad": false,
47
+ "sep_token_id": 50282,
48
+ "sparse_pred_ignore_index": -100,
49
+ "sparse_prediction": false,
50
+ "torch_dtype": "float32",
51
+ "transformers_version": "4.51.3",
52
+ "vocab_size": 50368
53
+ }
config.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name_or_path: "voxmenthe/modernbert-imdb-sentiment"
3
+ tokenizer_name_or_path: "answerdotai/ModernBERT-base"
4
+ max_length: 880 # 256
5
+ dropout: 0.1
6
+ pooling_strategy: "mean" # Current default, change as needed
7
+ num_weighted_layers: 6 # Match original training config
8
+
9
+ inference:
10
+ # Default path, can be overridden
11
+ model_path: "voxmenthe/modernbert-imdb-sentiment"
12
+ # Using the same max_length as training for consistency
13
+ max_length: 880 # 256
inference.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, ModernBertConfig
3
+ from typing import Dict, Any
4
+ import yaml
5
+ import os
6
+ from models import ModernBertForSentiment
7
+
8
+ class SentimentInference:
9
+ def __init__(self, config_path: str = "config.yaml"):
10
+ """Load configuration and initialize model and tokenizer from local checkpoint or Hugging Face Hub."""
11
+ print(f"--- Debug: SentimentInference __init__ received config_path: {config_path} ---") # Add this
12
+ with open(config_path, 'r') as f:
13
+ config_data = yaml.safe_load(f)
14
+ print(f"--- Debug: SentimentInference loaded config_data: {config_data} ---") # Add this
15
+
16
+ model_yaml_cfg = config_data.get('model', {})
17
+ inference_yaml_cfg = config_data.get('inference', {})
18
+
19
+ model_hf_repo_id = model_yaml_cfg.get('name_or_path')
20
+ tokenizer_hf_repo_id = model_yaml_cfg.get('tokenizer_name_or_path', model_hf_repo_id)
21
+ local_model_weights_path = inference_yaml_cfg.get('model_path') # Path for local .pt file
22
+
23
+ print(f"--- Debug: model_hf_repo_id: {model_hf_repo_id} ---") # Add this
24
+ print(f"--- Debug: local_model_weights_path: {local_model_weights_path} ---") # Add this
25
+
26
+ self.max_length = inference_yaml_cfg.get('max_length', model_yaml_cfg.get('max_length', 512))
27
+
28
+ # --- Tokenizer Loading (always from Hub for now, or could be made conditional) ---
29
+ if not tokenizer_hf_repo_id and not model_hf_repo_id:
30
+ raise ValueError("Either model.tokenizer_name_or_path or model.name_or_path (as fallback for tokenizer) must be specified in config.yaml")
31
+ effective_tokenizer_repo_id = tokenizer_hf_repo_id or model_hf_repo_id
32
+ print(f"Loading tokenizer from: {effective_tokenizer_repo_id}")
33
+ self.tokenizer = AutoTokenizer.from_pretrained(effective_tokenizer_repo_id)
34
+
35
+ # --- Model Loading --- #
36
+ # Determine if we are loading from a local .pt file or from Hugging Face Hub
37
+ load_from_local_pt = False
38
+ if local_model_weights_path and os.path.isfile(local_model_weights_path):
39
+ print(f"Found local model weights path: {local_model_weights_path}")
40
+ print(f"--- Debug: Found local model weights path: {local_model_weights_path} ---") # Add this
41
+ load_from_local_pt = True
42
+ elif not model_hf_repo_id:
43
+ raise ValueError("No local model_path found and model.name_or_path (for Hub) is not specified in config.yaml")
44
+
45
+ print(f"--- Debug: load_from_local_pt is: {load_from_local_pt} ---") # Add this
46
+
47
+ if load_from_local_pt:
48
+ print("Attempting to load model from local .pt checkpoint...")
49
+ print("--- Debug: Entering LOCAL .pt loading path ---") # Add this
50
+ # Base BERT config must still be loaded, usually from a Hub ID (e.g., original base model)
51
+ # This base_model_for_config_id is crucial for building the correct ModernBertForSentiment structure.
52
+ base_model_for_config_id = model_yaml_cfg.get('base_model_for_config', model_hf_repo_id or tokenizer_hf_repo_id)
53
+ print(f"--- Debug: base_model_for_config_id (for local .pt): {base_model_for_config_id} ---") # Add this
54
+ if not base_model_for_config_id:
55
+ raise ValueError("For local .pt loading, model.base_model_for_config must be specified in config.yaml (e.g., 'answerdotai/ModernBERT-base') to build the model structure.")
56
+
57
+ print(f"Loading ModernBertConfig for structure from: {base_model_for_config_id}")
58
+ bert_config = ModernBertConfig.from_pretrained(base_model_for_config_id)
59
+
60
+ # Augment config with parameters from model_yaml_cfg
61
+ bert_config.pooling_strategy = model_yaml_cfg.get('pooling_strategy', 'mean')
62
+ bert_config.num_weighted_layers = model_yaml_cfg.get('num_weighted_layers', 4)
63
+ bert_config.classifier_dropout = model_yaml_cfg.get('dropout')
64
+ bert_config.num_labels = model_yaml_cfg.get('num_labels', 1)
65
+ # bert_config.loss_function = model_yaml_cfg.get('loss_function') # If needed by __init__
66
+
67
+ print("Instantiating ModernBertForSentiment model structure...")
68
+ self.model = ModernBertForSentiment(bert_config)
69
+
70
+ print(f"Loading model weights from local checkpoint: {local_model_weights_path}")
71
+ checkpoint = torch.load(local_model_weights_path, map_location=torch.device('cpu'))
72
+ if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
73
+ model_state_to_load = checkpoint['model_state_dict']
74
+ else:
75
+ model_state_to_load = checkpoint # Assume it's the state_dict itself
76
+ self.model.load_state_dict(model_state_to_load)
77
+ print(f"Model loaded successfully from local checkpoint: {local_model_weights_path}.")
78
+
79
+ else: # Load from Hugging Face Hub
80
+ print(f"Attempting to load model from Hugging Face Hub: {model_hf_repo_id}...")
81
+ print(f"--- Debug: Entering HUGGING FACE HUB loading path ---") # Add this
82
+ print(f"--- Debug: model_hf_repo_id (for Hub loading): {model_hf_repo_id} ---") # Add this
83
+ if not model_hf_repo_id:
84
+ raise ValueError("model.name_or_path must be specified in config.yaml for Hub loading.")
85
+
86
+ print(f"Loading base ModernBertConfig from: {model_hf_repo_id}")
87
+ loaded_config = ModernBertConfig.from_pretrained(model_hf_repo_id)
88
+
89
+ # Augment loaded_config
90
+ loaded_config.pooling_strategy = model_yaml_cfg.get('pooling_strategy', 'mean')
91
+ loaded_config.num_weighted_layers = model_yaml_cfg.get('num_weighted_layers', 6) # Default to 6 now
92
+ loaded_config.classifier_dropout = model_yaml_cfg.get('dropout')
93
+ loaded_config.num_labels = model_yaml_cfg.get('num_labels', 1)
94
+
95
+ print(f"Instantiating and loading model weights for {model_hf_repo_id}...")
96
+ self.model = AutoModelForSequenceClassification.from_pretrained(
97
+ model_hf_repo_id,
98
+ config=loaded_config,
99
+ trust_remote_code=True,
100
+ force_download=True # <--- TEMPORARY - remove when everything is working
101
+ )
102
+ print(f"Model {model_hf_repo_id} loaded successfully from Hugging Face Hub.")
103
+
104
+ self.model.eval()
105
+
106
+ def predict(self, text: str) -> Dict[str, Any]:
107
+ inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length, padding=True)
108
+ with torch.no_grad():
109
+ outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
110
+ logits = outputs.get("logits") # Use .get for safety
111
+ if logits is None:
112
+ raise ValueError("Model output did not contain 'logits'. Check model's forward pass.")
113
+ prob = torch.sigmoid(logits).item()
114
+ return {"sentiment": "positive" if prob > 0.5 else "negative", "confidence": prob}
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df4e2674170e253063392d4bd819656062aede34efe70fafe650ee033c46044
3
+ size 600842787