boltuix commited on
Commit
29d92f9
·
verified ·
1 Parent(s): 0cbb6d7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +20 -5
README.md CHANGED
@@ -539,18 +539,33 @@ model = AutoModelForTokenClassification.from_pretrained("boltuix/NeuroBERT-NER")
539
  texts = ["Barack Obama visited Microsoft in Seattle on January 2025."]
540
  true_labels = [["B-PERSON", "I-PERSON", "O", "B-ORG", "O", "B-GPE", "O", "B-DATE", "I-DATE", "O"]]
541
 
542
- # Predict
543
  pred_labels = []
 
544
  for text in texts:
545
- inputs = tokenizer(text, return_tensors="pt")
546
  with torch.no_grad():
547
  outputs = model(**inputs)
548
- predictions = outputs.logits.argmax(dim=-1)
549
- labels = [model.config.id2label[p.item()] for p in predictions[0]]
550
  tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
551
- pred_labels.append([lbl for tok, lbl in zip(tokens, labels) if tok not in tokenizer.all_special_tokens])
 
 
 
 
 
 
 
 
 
 
 
 
552
 
553
  # Evaluate
 
 
 
554
  print(classification_report(true_labels, pred_labels))
555
  ```
556
 
 
539
  texts = ["Barack Obama visited Microsoft in Seattle on January 2025."]
540
  true_labels = [["B-PERSON", "I-PERSON", "O", "B-ORG", "O", "B-GPE", "O", "B-DATE", "I-DATE", "O"]]
541
 
 
542
  pred_labels = []
543
+
544
  for text in texts:
545
+ inputs = tokenizer(text, return_tensors="pt", is_split_into_words=False, return_attention_mask=True)
546
  with torch.no_grad():
547
  outputs = model(**inputs)
548
+
549
+ predictions = outputs.logits.argmax(dim=-1)[0].cpu().numpy()
550
  tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
551
+ word_ids = inputs.word_ids(batch_index=0)
552
+
553
+ # Align prediction to word level (first token of each word)
554
+ word_preds = []
555
+ previous_word_idx = None
556
+ for idx, word_idx in enumerate(word_ids):
557
+ if word_idx is None or word_idx == previous_word_idx:
558
+ continue # Skip special tokens and subwords
559
+ label = model.config.id2label[predictions[idx]]
560
+ word_preds.append(label)
561
+ previous_word_idx = word_idx
562
+
563
+ pred_labels.append(word_preds)
564
 
565
  # Evaluate
566
+ print("Predicted:", pred_labels)
567
+ print("True :", true_labels)
568
+ print("\n📊 Evaluation Report:\n")
569
  print(classification_report(true_labels, pred_labels))
570
  ```
571