Token Classification
Transformers
Safetensors
English
bert
ner
named-entity-recognition
text-classification
sequence-labeling
transformer
nlp
pretrained-model
dataset-finetuning
deep-learning
huggingface
conll2025
real-time-inference
efficient-nlp
high-accuracy
gpu-optimized
chatbot
information-extraction
search-enhancement
knowledge-graph
legal-nlp
medical-nlp
financial-nlp
Update README.md
Browse files
README.md
CHANGED
@@ -539,18 +539,33 @@ model = AutoModelForTokenClassification.from_pretrained("boltuix/NeuroBERT-NER")
|
|
539 |
texts = ["Barack Obama visited Microsoft in Seattle on January 2025."]
|
540 |
true_labels = [["B-PERSON", "I-PERSON", "O", "B-ORG", "O", "B-GPE", "O", "B-DATE", "I-DATE", "O"]]
|
541 |
|
542 |
-
# Predict
|
543 |
pred_labels = []
|
|
|
544 |
for text in texts:
|
545 |
-
inputs = tokenizer(text, return_tensors="pt")
|
546 |
with torch.no_grad():
|
547 |
outputs = model(**inputs)
|
548 |
-
|
549 |
-
|
550 |
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
|
551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
|
553 |
# Evaluate
|
|
|
|
|
|
|
554 |
print(classification_report(true_labels, pred_labels))
|
555 |
```
|
556 |
|
|
|
539 |
texts = ["Barack Obama visited Microsoft in Seattle on January 2025."]
|
540 |
true_labels = [["B-PERSON", "I-PERSON", "O", "B-ORG", "O", "B-GPE", "O", "B-DATE", "I-DATE", "O"]]
|
541 |
|
|
|
542 |
pred_labels = []
|
543 |
+
|
544 |
for text in texts:
|
545 |
+
inputs = tokenizer(text, return_tensors="pt", is_split_into_words=False, return_attention_mask=True)
|
546 |
with torch.no_grad():
|
547 |
outputs = model(**inputs)
|
548 |
+
|
549 |
+
predictions = outputs.logits.argmax(dim=-1)[0].cpu().numpy()
|
550 |
tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
|
551 |
+
word_ids = inputs.word_ids(batch_index=0)
|
552 |
+
|
553 |
+
# Align prediction to word level (first token of each word)
|
554 |
+
word_preds = []
|
555 |
+
previous_word_idx = None
|
556 |
+
for idx, word_idx in enumerate(word_ids):
|
557 |
+
if word_idx is None or word_idx == previous_word_idx:
|
558 |
+
continue # Skip special tokens and subwords
|
559 |
+
label = model.config.id2label[predictions[idx]]
|
560 |
+
word_preds.append(label)
|
561 |
+
previous_word_idx = word_idx
|
562 |
+
|
563 |
+
pred_labels.append(word_preds)
|
564 |
|
565 |
# Evaluate
|
566 |
+
print("Predicted:", pred_labels)
|
567 |
+
print("True :", true_labels)
|
568 |
+
print("\n📊 Evaluation Report:\n")
|
569 |
print(classification_report(true_labels, pred_labels))
|
570 |
```
|
571 |
|