#!/usr/bin/env python3 """ Test script for model tracing integration. Tests the p-value computation for a simple model comparison. """ import sys import os # Add src to path sys.path.append('src') from evaluation.model_trace_eval import compute_model_trace_p_value def test_model_trace(): """Test the model trace p-value computation with a simple example.""" print("Testing model trace p-value computation...") # Test with a simple model (should be fast) test_model = "openai-community/gpt2" print(f"Computing p-value for {test_model} vs GPT-2...") try: p_value = compute_model_trace_p_value(test_model, "main", "float16") if p_value is not None: print(f"✅ Success! P-value: {p_value}") if 0 <= p_value <= 1: print("✅ P-value is in valid range [0, 1]") else: print(f"⚠️ Warning: P-value {p_value} is outside expected range [0, 1]") else: print("❌ Failed: P-value is None") except Exception as e: print(f"❌ Error: {e}") import traceback traceback.print_exc() if __name__ == "__main__": test_model_trace()