#!/usr/bin/env python3
"""
Test script for model tracing integration.
Tests the p-value computation for a simple model comparison.
"""

import sys
import os

# Add src to path
sys.path.append('src')

from evaluation.model_trace_eval import compute_model_trace_p_value

def test_model_trace():
    """Test the model trace p-value computation with a simple example."""
    
    print("Testing model trace p-value computation...")
    
    # Test with a simple model (should be fast)
    test_model = "openai-community/gpt2"
    
    print(f"Computing p-value for {test_model} vs GPT-2...")
    
    try:
        p_value = compute_model_trace_p_value(test_model, "main", "float16")
        
        if p_value is not None:
            print(f"✅ Success! P-value: {p_value}")
            if 0 <= p_value <= 1:
                print("✅ P-value is in valid range [0, 1]")
            else:
                print(f"⚠️ Warning: P-value {p_value} is outside expected range [0, 1]")
        else:
            print("❌ Failed: P-value is None")
            
    except Exception as e:
        print(f"❌ Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    test_model_trace()