Upload fine-tuned model, tokenizer, and supporting files for modernbert-imdb-sentiment
Browse files- config.json +53 -0
- config.yaml +13 -0
- inference.py +114 -0
- pytorch_model.bin +3 -0
config.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"ModernBertForSentiment"
|
4 |
+
],
|
5 |
+
"attention_bias": false,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 50281,
|
8 |
+
"classifier_activation": "gelu",
|
9 |
+
"classifier_bias": false,
|
10 |
+
"classifier_dropout": 0.0,
|
11 |
+
"classifier_pooling": "mean",
|
12 |
+
"cls_token_id": 50281,
|
13 |
+
"decoder_bias": true,
|
14 |
+
"deterministic_flash_attn": false,
|
15 |
+
"embedding_dropout": 0.0,
|
16 |
+
"eos_token_id": 50282,
|
17 |
+
"global_attn_every_n_layers": 3,
|
18 |
+
"global_rope_theta": 160000.0,
|
19 |
+
"gradient_checkpointing": false,
|
20 |
+
"hidden_activation": "gelu",
|
21 |
+
"hidden_size": 768,
|
22 |
+
"id2label": {
|
23 |
+
"0": "NEGATIVE",
|
24 |
+
"1": "POSITIVE"
|
25 |
+
},
|
26 |
+
"initializer_cutoff_factor": 2.0,
|
27 |
+
"initializer_range": 0.02,
|
28 |
+
"intermediate_size": 1152,
|
29 |
+
"label2id": {
|
30 |
+
"NEGATIVE": 0,
|
31 |
+
"POSITIVE": 1
|
32 |
+
},
|
33 |
+
"layer_norm_eps": 1e-05,
|
34 |
+
"local_attention": 128,
|
35 |
+
"local_rope_theta": 10000.0,
|
36 |
+
"max_position_embeddings": 8192,
|
37 |
+
"mlp_bias": false,
|
38 |
+
"mlp_dropout": 0.0,
|
39 |
+
"model_type": "modernbert",
|
40 |
+
"norm_bias": false,
|
41 |
+
"norm_eps": 1e-05,
|
42 |
+
"num_attention_heads": 12,
|
43 |
+
"num_hidden_layers": 22,
|
44 |
+
"pad_token_id": 50283,
|
45 |
+
"position_embedding_type": "absolute",
|
46 |
+
"repad_logits_with_grad": false,
|
47 |
+
"sep_token_id": 50282,
|
48 |
+
"sparse_pred_ignore_index": -100,
|
49 |
+
"sparse_prediction": false,
|
50 |
+
"torch_dtype": "float32",
|
51 |
+
"transformers_version": "4.51.3",
|
52 |
+
"vocab_size": 50368
|
53 |
+
}
|
config.yaml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model:
|
2 |
+
name_or_path: "voxmenthe/modernbert-imdb-sentiment"
|
3 |
+
tokenizer_name_or_path: "answerdotai/ModernBERT-base"
|
4 |
+
max_length: 880 # 256
|
5 |
+
dropout: 0.1
|
6 |
+
pooling_strategy: "mean" # Current default, change as needed
|
7 |
+
num_weighted_layers: 6 # Match original training config
|
8 |
+
|
9 |
+
inference:
|
10 |
+
# Default path, can be overridden
|
11 |
+
model_path: "voxmenthe/modernbert-imdb-sentiment"
|
12 |
+
# Using the same max_length as training for consistency
|
13 |
+
max_length: 880 # 256
|
inference.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification, ModernBertConfig
|
3 |
+
from typing import Dict, Any
|
4 |
+
import yaml
|
5 |
+
import os
|
6 |
+
from models import ModernBertForSentiment
|
7 |
+
|
8 |
+
class SentimentInference:
|
9 |
+
def __init__(self, config_path: str = "config.yaml"):
|
10 |
+
"""Load configuration and initialize model and tokenizer from local checkpoint or Hugging Face Hub."""
|
11 |
+
print(f"--- Debug: SentimentInference __init__ received config_path: {config_path} ---") # Add this
|
12 |
+
with open(config_path, 'r') as f:
|
13 |
+
config_data = yaml.safe_load(f)
|
14 |
+
print(f"--- Debug: SentimentInference loaded config_data: {config_data} ---") # Add this
|
15 |
+
|
16 |
+
model_yaml_cfg = config_data.get('model', {})
|
17 |
+
inference_yaml_cfg = config_data.get('inference', {})
|
18 |
+
|
19 |
+
model_hf_repo_id = model_yaml_cfg.get('name_or_path')
|
20 |
+
tokenizer_hf_repo_id = model_yaml_cfg.get('tokenizer_name_or_path', model_hf_repo_id)
|
21 |
+
local_model_weights_path = inference_yaml_cfg.get('model_path') # Path for local .pt file
|
22 |
+
|
23 |
+
print(f"--- Debug: model_hf_repo_id: {model_hf_repo_id} ---") # Add this
|
24 |
+
print(f"--- Debug: local_model_weights_path: {local_model_weights_path} ---") # Add this
|
25 |
+
|
26 |
+
self.max_length = inference_yaml_cfg.get('max_length', model_yaml_cfg.get('max_length', 512))
|
27 |
+
|
28 |
+
# --- Tokenizer Loading (always from Hub for now, or could be made conditional) ---
|
29 |
+
if not tokenizer_hf_repo_id and not model_hf_repo_id:
|
30 |
+
raise ValueError("Either model.tokenizer_name_or_path or model.name_or_path (as fallback for tokenizer) must be specified in config.yaml")
|
31 |
+
effective_tokenizer_repo_id = tokenizer_hf_repo_id or model_hf_repo_id
|
32 |
+
print(f"Loading tokenizer from: {effective_tokenizer_repo_id}")
|
33 |
+
self.tokenizer = AutoTokenizer.from_pretrained(effective_tokenizer_repo_id)
|
34 |
+
|
35 |
+
# --- Model Loading --- #
|
36 |
+
# Determine if we are loading from a local .pt file or from Hugging Face Hub
|
37 |
+
load_from_local_pt = False
|
38 |
+
if local_model_weights_path and os.path.isfile(local_model_weights_path):
|
39 |
+
print(f"Found local model weights path: {local_model_weights_path}")
|
40 |
+
print(f"--- Debug: Found local model weights path: {local_model_weights_path} ---") # Add this
|
41 |
+
load_from_local_pt = True
|
42 |
+
elif not model_hf_repo_id:
|
43 |
+
raise ValueError("No local model_path found and model.name_or_path (for Hub) is not specified in config.yaml")
|
44 |
+
|
45 |
+
print(f"--- Debug: load_from_local_pt is: {load_from_local_pt} ---") # Add this
|
46 |
+
|
47 |
+
if load_from_local_pt:
|
48 |
+
print("Attempting to load model from local .pt checkpoint...")
|
49 |
+
print("--- Debug: Entering LOCAL .pt loading path ---") # Add this
|
50 |
+
# Base BERT config must still be loaded, usually from a Hub ID (e.g., original base model)
|
51 |
+
# This base_model_for_config_id is crucial for building the correct ModernBertForSentiment structure.
|
52 |
+
base_model_for_config_id = model_yaml_cfg.get('base_model_for_config', model_hf_repo_id or tokenizer_hf_repo_id)
|
53 |
+
print(f"--- Debug: base_model_for_config_id (for local .pt): {base_model_for_config_id} ---") # Add this
|
54 |
+
if not base_model_for_config_id:
|
55 |
+
raise ValueError("For local .pt loading, model.base_model_for_config must be specified in config.yaml (e.g., 'answerdotai/ModernBERT-base') to build the model structure.")
|
56 |
+
|
57 |
+
print(f"Loading ModernBertConfig for structure from: {base_model_for_config_id}")
|
58 |
+
bert_config = ModernBertConfig.from_pretrained(base_model_for_config_id)
|
59 |
+
|
60 |
+
# Augment config with parameters from model_yaml_cfg
|
61 |
+
bert_config.pooling_strategy = model_yaml_cfg.get('pooling_strategy', 'mean')
|
62 |
+
bert_config.num_weighted_layers = model_yaml_cfg.get('num_weighted_layers', 4)
|
63 |
+
bert_config.classifier_dropout = model_yaml_cfg.get('dropout')
|
64 |
+
bert_config.num_labels = model_yaml_cfg.get('num_labels', 1)
|
65 |
+
# bert_config.loss_function = model_yaml_cfg.get('loss_function') # If needed by __init__
|
66 |
+
|
67 |
+
print("Instantiating ModernBertForSentiment model structure...")
|
68 |
+
self.model = ModernBertForSentiment(bert_config)
|
69 |
+
|
70 |
+
print(f"Loading model weights from local checkpoint: {local_model_weights_path}")
|
71 |
+
checkpoint = torch.load(local_model_weights_path, map_location=torch.device('cpu'))
|
72 |
+
if isinstance(checkpoint, dict) and 'model_state_dict' in checkpoint:
|
73 |
+
model_state_to_load = checkpoint['model_state_dict']
|
74 |
+
else:
|
75 |
+
model_state_to_load = checkpoint # Assume it's the state_dict itself
|
76 |
+
self.model.load_state_dict(model_state_to_load)
|
77 |
+
print(f"Model loaded successfully from local checkpoint: {local_model_weights_path}.")
|
78 |
+
|
79 |
+
else: # Load from Hugging Face Hub
|
80 |
+
print(f"Attempting to load model from Hugging Face Hub: {model_hf_repo_id}...")
|
81 |
+
print(f"--- Debug: Entering HUGGING FACE HUB loading path ---") # Add this
|
82 |
+
print(f"--- Debug: model_hf_repo_id (for Hub loading): {model_hf_repo_id} ---") # Add this
|
83 |
+
if not model_hf_repo_id:
|
84 |
+
raise ValueError("model.name_or_path must be specified in config.yaml for Hub loading.")
|
85 |
+
|
86 |
+
print(f"Loading base ModernBertConfig from: {model_hf_repo_id}")
|
87 |
+
loaded_config = ModernBertConfig.from_pretrained(model_hf_repo_id)
|
88 |
+
|
89 |
+
# Augment loaded_config
|
90 |
+
loaded_config.pooling_strategy = model_yaml_cfg.get('pooling_strategy', 'mean')
|
91 |
+
loaded_config.num_weighted_layers = model_yaml_cfg.get('num_weighted_layers', 6) # Default to 6 now
|
92 |
+
loaded_config.classifier_dropout = model_yaml_cfg.get('dropout')
|
93 |
+
loaded_config.num_labels = model_yaml_cfg.get('num_labels', 1)
|
94 |
+
|
95 |
+
print(f"Instantiating and loading model weights for {model_hf_repo_id}...")
|
96 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(
|
97 |
+
model_hf_repo_id,
|
98 |
+
config=loaded_config,
|
99 |
+
trust_remote_code=True,
|
100 |
+
force_download=True # <--- TEMPORARY - remove when everything is working
|
101 |
+
)
|
102 |
+
print(f"Model {model_hf_repo_id} loaded successfully from Hugging Face Hub.")
|
103 |
+
|
104 |
+
self.model.eval()
|
105 |
+
|
106 |
+
def predict(self, text: str) -> Dict[str, Any]:
|
107 |
+
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=self.max_length, padding=True)
|
108 |
+
with torch.no_grad():
|
109 |
+
outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
|
110 |
+
logits = outputs.get("logits") # Use .get for safety
|
111 |
+
if logits is None:
|
112 |
+
raise ValueError("Model output did not contain 'logits'. Check model's forward pass.")
|
113 |
+
prob = torch.sigmoid(logits).item()
|
114 |
+
return {"sentiment": "positive" if prob > 0.5 else "negative", "confidence": prob}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4df4e2674170e253063392d4bd819656062aede34efe70fafe650ee033c46044
|
3 |
+
size 600842787
|