#!/usr/bin/env python3
"""
Indonesian NER BERT - Inference Example
========================================

This script demonstrates how to use the Indonesian NER BERT model
for named entity recognition on Indonesian text.

Usage:
    python inference_example.py
"""

from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import torch

def load_model(model_name_or_path="asmud/cahya-indonesian-ner-tuned"):
    """Load the Indonesian NER BERT model and tokenizer"""
    print("🔄 Loading Indonesian NER BERT model...")
    
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
        model = AutoModelForTokenClassification.from_pretrained(model_name_or_path)
        print("✅ Model loaded successfully!")
        return tokenizer, model
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        return None, None

def create_ner_pipeline(model, tokenizer):
    """Create a NER pipeline for easy inference"""
    return pipeline(
        "ner",
        model=model,
        tokenizer=tokenizer,
        aggregation_strategy="simple",
        device=0 if torch.cuda.is_available() else -1
    )

def demonstrate_basic_usage():
    """Demonstrate basic NER inference"""
    print("\n🎯 BASIC USAGE DEMONSTRATION")
    print("=" * 50)
    
    # Load model
    tokenizer, model = load_model()
    if not model or not tokenizer:
        return
    
    # Create pipeline
    ner_pipeline = create_ner_pipeline(model, tokenizer)
    
    # Example texts
    example_texts = [
        "Presiden Joko Widodo menghadiri rapat di Gedung DPR pada 15 Januari 2024.",
        "Bank Indonesia menetapkan suku bunga 5.75 persen untuk mendorong investasi.",
        "Kementerian Kesehatan mengalokasikan dana sebesar 10 miliar rupiah untuk program vaksinasi.",
        "Gubernur Jawa Barat meresmikan Bandara Internasional Kertajati di Majalengka.",
        "Mahkamah Konstitusi memutuskan UU No. 12 Tahun 2023 tentang Pemilu tidak bertentangan dengan konstitusi."
    ]
    
    for i, text in enumerate(example_texts, 1):
        print(f"\n📝 Example {i}:")
        print(f"Text: {text}")
        print("Entities found:")
        
        # Get NER results
        results = ner_pipeline(text)
        
        if results:
            for entity in results:
                print(f"  🏷️  {entity['entity_group']:>6}: {entity['word']:<20} (confidence: {entity['score']:.3f})")
        else:
            print("  No entities found.")
        
        print("-" * 80)

def demonstrate_custom_inference():
    """Demonstrate custom token-level inference"""
    print("\n🔧 CUSTOM INFERENCE DEMONSTRATION")
    print("=" * 50)
    
    # Load model components
    tokenizer, model = load_model()
    if not model or not tokenizer:
        return
    
    def predict_tokens(text):
        """Perform token-level NER prediction"""
        # Tokenize
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        
        # Predict
        with torch.no_grad():
            outputs = model(**inputs)
            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
            predicted_labels = torch.argmax(predictions, dim=-1)
        
        # Convert to readable format
        tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
        labels = [model.config.id2label[label_id.item()] for label_id in predicted_labels[0]]
        confidences = [torch.max(predictions[0][i]).item() for i in range(len(tokens))]
        
        # Filter out special tokens
        results = []
        for token, label, conf in zip(tokens, labels, confidences):
            if token not in ['[CLS]', '[SEP]', '[PAD]']:
                results.append((token, label, conf))
        
        return results
    
    # Example text
    text = "Menteri Retno Marsudi bertemu dengan delegasi ASEAN di Hotel Indonesia pada pukul 14.30 WIB."
    print(f"Text: {text}")
    print("\nToken-level predictions:")
    print(f"{'Token':<15} {'Label':<8} {'Confidence':<10}")
    print("-" * 35)
    
    results = predict_tokens(text)
    for token, label, conf in results:
        # Clean up subword tokens
        display_token = token.replace('##', '')
        print(f"{display_token:<15} {label:<8} {conf:<10.3f}")

def demonstrate_entity_types():
    """Demonstrate all supported entity types"""
    print("\n🏷️ SUPPORTED ENTITY TYPES DEMONSTRATION")
    print("=" * 50)
    
    # Load model
    tokenizer, model = load_model()
    if not model or not tokenizer:
        return
    
    ner_pipeline = create_ner_pipeline(model, tokenizer)
    
    # Examples showcasing different entity types
    entity_examples = {
        "Person (PER)": "Menteri Budi Gunadi Sadikin memberikan keterangan pers.",
        "Organization (ORG)": "PT Telkom Indonesia meluncurkan layanan 5G terbaru.",
        "Location (LOC)": "Wisatawan mengunjungi Danau Toba dan Gunung Bromo.",
        "Geopolitical (GPE)": "Delegasi dari Jakarta bertemu dengan perwakilan Surabaya.",
        "Date (DAT)": "Acara dilaksanakan pada 17 Agustus 2024.",
        "Time (TIM)": "Rapat dimulai pukul 09.00 WIB.",
        "Money (MON)": "Anggaran sebesar 50 miliar rupiah telah disetujui.",
        "Percentage (PCT)": "Inflasi naik 3.2 persen bulan ini.",
        "Quantity (QTY)": "Bantuan berupa 500 ton beras disalurkan.",
        "Facility (FAC)": "Peresmian Bandara Soekarno-Hatta Terminal 4.",
        "Law (LAW)": "UU No. 23 Tahun 2014 tentang Pemerintahan Daerah.",
        "Event (EVT)": "Konferensi Asia-Pasifik 2024 akan digelar bulan depan."
    }
    
    for category, text in entity_examples.items():
        print(f"\n📂 {category}:")
        print(f"   Text: {text}")
        print("   Entities:")
        
        results = ner_pipeline(text)
        if results:
            for entity in results:
                print(f"     • {entity['entity_group']}: {entity['word']} ({entity['score']:.3f})")
        else:
            print("     No entities detected")

def main():
    """Main demonstration function"""
    print("🇮🇩 Indonesian NER BERT - Inference Examples")
    print("=" * 60)
    print("This script demonstrates various ways to use the Indonesian NER BERT model")
    print("for named entity recognition in Indonesian text.")
    
    # Run demonstrations
    demonstrate_basic_usage()
    demonstrate_custom_inference()
    demonstrate_entity_types()
    
    print("\n🎉 Demonstration completed!")
    print("For more information, see the README.md file or visit the model page.")

if __name__ == "__main__":
    main()