spencercdz commited on
Commit
35c18f1
·
verified ·
1 Parent(s): 61a2749

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -20
README.md CHANGED
@@ -68,7 +68,9 @@ This model is intended for organizations and researchers involved in humanitaria
68
  **Important**: Due to its custom architecture, this model **cannot** be used with the standard `pipeline("text-classification")` function. Please see the usage code below for the correct implementation.
69
 
70
  ### How to Use
71
- This model requires custom code to handle its two-headed output. The following is a complete, self-contained Python script to run inference. You will need to have `transformers`, `torch`, and `safetensors` installed (`pip install transformers torch safetensors`).
 
 
72
 
73
  The script is broken into logical blocks:
74
 
@@ -89,6 +91,7 @@ from transformers import AutoTokenizer, AutoConfig, AutoModel, PreTrainedModel
89
  from huggingface_hub import hf_hub_download
90
  from typing import Dict, Any
91
  from safetensors.torch import load_file
 
92
 
93
  class MultiHeadClassificationModel(PreTrainedModel):
94
  def __init__(self, config, **kwargs):
@@ -169,48 +172,60 @@ def get_direct_report_labels() -> Dict[int, str]: return {0: 'no', 1: 'yes'}
169
  def get_sentiment_labels() -> Dict[int, str]: return {0: 'negative', 1: 'neutral', 2: 'positive'}
170
  ```
171
  ***
172
- 3. **Setup & Loading**: This setup function handles loading all components and reconstructing the necessary metadata.
173
  ```python
174
  def load_essentials():
175
  print("Loading model, tokenizer, and metadata... (This may take a moment on first run)")
 
176
  hub_repo_id = "spencercdz/xlm-roberta-sentiment-requests"
177
  subfolder = "final_model"
178
  device = "cuda" if torch.cuda.is_available() else "cpu"
179
  print(f"Using device: {device}")
180
 
181
- all_labels_map = get_all_labels()
182
-
183
- # --- FIX IS HERE ---
184
- # We must exclude 'sentiment' from the multiclass tasks for the multi-label head,
185
- # because sentiment has its own dedicated classification head.
186
- multiclass_tasks = {k: len(v) for k, v in all_labels_map.items() if len(v) > 2 and k != 'sentiment'}
187
- # -------------------
188
 
189
- binary_tasks = [k for k, v in all_labels_map.items() if len(v) == 2 and k not in ['related', 'sentiment']]
190
-
191
- column_names = [f"{t}_{i}" for t, n in multiclass_tasks.items() for i in range(n)] + binary_tasks
192
- multilabel_column_names = sorted(column_names)
193
- num_multilabels = len(multilabel_column_names) # This will now correctly be 41
194
  num_sentiment_labels = len(get_sentiment_labels())
195
 
 
196
  tokenizer = AutoTokenizer.from_pretrained(hub_repo_id, subfolder=subfolder)
197
  config = AutoConfig.from_pretrained(hub_repo_id, subfolder=subfolder)
 
 
198
  config.num_sentiment_labels = num_sentiment_labels
199
 
 
 
200
  model_shell = MultiHeadClassificationModel(config=config, num_multilabels=num_multilabels)
 
 
201
  weights_path = hf_hub_download(repo_id=hub_repo_id, filename="model.safetensors", subfolder=subfolder)
202
- state_dict = load_file(weights_path, device="cpu") # Load to CPU first
 
 
203
  model_shell.load_state_dict(state_dict, strict=False)
 
 
204
  model = model_shell.to(device)
205
  model.eval()
206
 
207
- metadata = {
208
- "binary_tasks": binary_tasks, "multiclass_tasks": multiclass_tasks,
 
 
209
  "multilabel_column_names": multilabel_column_names,
210
- "all_labels": all_labels_map, "device": device
 
211
  }
212
  print("Loading complete.")
213
- return model, tokenizer, metadata
214
  ```
215
  ***
216
  4. **Prediction Function**: The prediction function takes the loaded components and input text to produce a decoded dictionary.
@@ -255,7 +270,7 @@ if __name__ == "__main__":
255
 
256
  # Print the raw dictionary output
257
  print("\n--- RAW DICTIONARY OUTPUT ---")
258
- print(predictions)
259
  ```
260
 
261
  ### Sample Output
 
68
  **Important**: Due to its custom architecture, this model **cannot** be used with the standard `pipeline("text-classification")` function. Please see the usage code below for the correct implementation.
69
 
70
  ### How to Use
71
+ This model requires custom code to handle its two-headed output. The following is a complete, self-contained Python script to run inference. You will need to have `transformers`, `torch`, `safetensors`, and `huggingface_hub` installed (`pip install transformers torch safetensors huggingface_hub`).
72
+
73
+ The script automatically downloads all necessary files, including the model weights and metadata. Simply copy the code blocks below and run the script.
74
 
75
  The script is broken into logical blocks:
76
 
 
91
  from huggingface_hub import hf_hub_download
92
  from typing import Dict, Any
93
  from safetensors.torch import load_file
94
+ import json
95
 
96
  class MultiHeadClassificationModel(PreTrainedModel):
97
  def __init__(self, config, **kwargs):
 
172
  def get_sentiment_labels() -> Dict[int, str]: return {0: 'negative', 1: 'neutral', 2: 'positive'}
173
  ```
174
  ***
175
+ 3. **Setup & Loading**: This setup function downloads and loads all components, including `metadata.json`, from the Hub.
176
  ```python
177
  def load_essentials():
178
  print("Loading model, tokenizer, and metadata... (This may take a moment on first run)")
179
+
180
  hub_repo_id = "spencercdz/xlm-roberta-sentiment-requests"
181
  subfolder = "final_model"
182
  device = "cuda" if torch.cuda.is_available() else "cpu"
183
  print(f"Using device: {device}")
184
 
185
+ # Load the model's output structure from the metadata.json file.
186
+ metadata_path = hf_hub_download(repo_id=hub_repo_id, filename="metadata.json", subfolder=subfolder)
187
+ with open(metadata_path, "r") as f:
188
+ file_metadata = json.load(f)
 
 
 
189
 
190
+ # Use the metadata to define the number of output neurons for the classification heads.
191
+ binary_tasks = file_metadata["binary_tasks"]
192
+ multiclass_tasks = file_metadata["multiclass_tasks"]
193
+ multilabel_column_names = file_metadata["multilabel_column_names"]
194
+ num_multilabels = len(multilabel_column_names)
195
  num_sentiment_labels = len(get_sentiment_labels())
196
 
197
+ # Load the standard tokenizer and config.
198
  tokenizer = AutoTokenizer.from_pretrained(hub_repo_id, subfolder=subfolder)
199
  config = AutoConfig.from_pretrained(hub_repo_id, subfolder=subfolder)
200
+
201
+ # Add our custom sentiment label count to the config.
202
  config.num_sentiment_labels = num_sentiment_labels
203
 
204
+ # Manually load the custom model, as it's not a standard transformers architecture.
205
+ # Create a model 'shell' with our custom architecture.
206
  model_shell = MultiHeadClassificationModel(config=config, num_multilabels=num_multilabels)
207
+
208
+ # Download and load the trained weights.
209
  weights_path = hf_hub_download(repo_id=hub_repo_id, filename="model.safetensors", subfolder=subfolder)
210
+ state_dict = load_file(weights_path, device="cpu")
211
+
212
+ # Apply weights to the shell. `strict=False` is required for loading custom heads.
213
  model_shell.load_state_dict(state_dict, strict=False)
214
+
215
+ # Move model to the target device and set to evaluation mode.
216
  model = model_shell.to(device)
217
  model.eval()
218
 
219
+ # Package all components for use in the predict function.
220
+ metadata_for_prediction = {
221
+ "binary_tasks": binary_tasks,
222
+ "multiclass_tasks": multiclass_tasks,
223
  "multilabel_column_names": multilabel_column_names,
224
+ "all_labels": get_all_labels(),
225
+ "device": device
226
  }
227
  print("Loading complete.")
228
+ return model, tokenizer, metadata_for_prediction
229
  ```
230
  ***
231
  4. **Prediction Function**: The prediction function takes the loaded components and input text to produce a decoded dictionary.
 
270
 
271
  # Print the raw dictionary output
272
  print("\n--- RAW DICTIONARY OUTPUT ---")
273
+ print(json.dumps(predictions, indent=4))
274
  ```
275
 
276
  ### Sample Output