#!/usr/bin/env python3 """ Hugging Face Compatible NBA Performance Predictor Description: Wrapper for NBA XGBoost model to work with Hugging Face Hub """ import os import json import numpy as np import pandas as pd import xgboost as xgb import joblib from typing import Dict, List, Union, Any from huggingface_hub import PyTorchModelHubMixin class NBAPerformancePredictorHF(PyTorchModelHubMixin): """ Hugging Face compatible NBA Performance Predictor using XGBoost """ def __init__(self, model_dir: str = None, **kwargs): """ Initialize the Hugging Face compatible model Args: model_dir (str): Directory containing the saved model files """ super().__init__() self.model = None self.scaler = None self.feature_names = None self.target_column = 'PTS' self.model_metadata = {} if model_dir and os.path.exists(model_dir): self.load_model(model_dir) def load_model(self, model_dir: str): """ Load the saved XGBoost model and preprocessing components Args: model_dir (str): Directory containing the saved model files """ # Load metadata metadata_path = os.path.join(model_dir, "model_metadata.json") if os.path.exists(metadata_path): with open(metadata_path, 'r') as f: self.model_metadata = json.load(f) self.feature_names = self.model_metadata.get('feature_names', []) self.target_column = self.model_metadata.get('target_column', 'PTS') # Load the XGBoost model model_path = os.path.join(model_dir, "xgboost_model.json") if os.path.exists(model_path): self.model = xgb.XGBRegressor() self.model.load_model(model_path) # Load the scaler scaler_path = os.path.join(model_dir, "scaler.joblib") if os.path.exists(scaler_path): self.scaler = joblib.load(scaler_path) print(f"Model loaded successfully from {model_dir}/") def predict(self, player_stats: Union[Dict, List[Dict]]) -> Union[float, List[float]]: """ Make predictions for NBA player performance Args: player_stats: Dictionary or list of dictionaries with player statistics Returns: Predicted points per game (float or list of floats) """ if self.model is None: raise ValueError("Model not loaded! Please load a trained model first.") # Handle single input if isinstance(player_stats, dict): player_stats = [player_stats] single_input = True else: single_input = False predictions = [] for stats in player_stats: # Create DataFrame with the same structure as training data input_df = pd.DataFrame([stats]) # Ensure all required features are present for feature in self.feature_names: if feature not in input_df.columns: input_df[feature] = 0 # Default value for missing features # Select only the features used in training input_df = input_df[self.feature_names] # Make prediction prediction = self.model.predict(input_df)[0] predictions.append(float(prediction)) return predictions[0] if single_input else predictions def predict_batch(self, player_stats_list: List[Dict]) -> List[Dict]: """ Make batch predictions with detailed output Args: player_stats_list: List of player statistics dictionaries Returns: List of prediction results with metadata """ predictions = self.predict(player_stats_list) results = [] for i, (stats, pred) in enumerate(zip(player_stats_list, predictions)): result = { 'input_id': i, 'predicted_points': round(pred, 2), 'player_name': stats.get('Player', f'Player_{i}'), 'confidence': 'high' if pred > 0 else 'low', # Simple confidence measure 'input_features': len([k for k, v in stats.items() if v != 0]) } results.append(result) return results def get_feature_info(self) -> Dict: """ Get information about the features used by the model Returns: Dictionary with feature information """ return { 'total_features': len(self.feature_names) if self.feature_names else 0, 'feature_names': self.feature_names[:20] if self.feature_names else [], # First 20 'target_variable': self.target_column, 'model_type': self.model_metadata.get('model_type', 'XGBRegressor'), 'required_features': [ 'Age', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG_1', 'Pos_encoded', 'Team_encoded', 'Age_category_encoded' ] } def create_example_input(self) -> Dict: """ Create an example input for testing the model Returns: Dictionary with example player statistics """ return { 'Age': 27, 'G': 75, 'GS': 70, 'MP': 35.0, 'FG': 8.5, 'FGA': 18.0, 'FG_1': 0.472, 'Pos_encoded': 2, # Forward 'Team_encoded': 15, 'Age_category_encoded': 1, # Prime 'PTS_lag_1': 22.5, 'PTS_lag_2': 21.0, 'TRB_lag_1': 7.2, 'AST_lag_1': 4.8, 'Points_per_minute_lag_1': 0.64, 'Efficiency_lag_1': 1.0 } def _save_pretrained(self, save_directory: str, **kwargs): """ Save the model for Hugging Face Hub (required by PyTorchModelHubMixin) """ # Save the XGBoost model model_path = os.path.join(save_directory, "xgboost_model.json") if self.model: self.model.save_model(model_path) # Save preprocessing components and metadata if self.model_metadata: metadata_path = os.path.join(save_directory, "model_metadata.json") with open(metadata_path, 'w') as f: json.dump(self.model_metadata, f, indent=2) # Save the scaler if self.scaler: scaler_path = os.path.join(save_directory, "scaler.joblib") joblib.dump(self.scaler, scaler_path) print(f"Model saved to {save_directory}") def _from_pretrained(cls, *, model_id: str, revision: str, cache_dir: str, force_download: bool, proxies: Dict, resume_download: bool, local_files_only: bool, token: str, **model_kwargs): """ Load the model from Hugging Face Hub (required by PyTorchModelHubMixin) """ return cls(model_dir=cache_dir, **model_kwargs) def create_model_card(model_dir: str = "nba_model", output_path: str = "README.md"): """ Create a model card for Hugging Face Hub Args: model_dir (str): Directory containing the model output_path (str): Path to save the model card """ model_card_content = """ # NBA Player Performance Predictor ## Model Description This model predicts NBA player points per game (PPG) using XGBoost regression with time-series features. The model uses historical player statistics, lag features, and engineered metrics to make predictions. ## Model Details - **Model Type**: XGBoost Regressor - **Task**: Regression (Predicting NBA player points per game) - **Framework**: scikit-learn, XGBoost - **Performance**: RMSE ~3-5 points per game, R² ~0.6-0.8 ## Features The model uses various features including: - Basic stats: Age, Games, Minutes Played, Field Goals, etc. - Lag features: Previous season performance metrics - Rolling averages: 2-3 year performance averages - Efficiency metrics: Points per minute, overall efficiency - Categorical encodings: Position, Team, Age category ## Usage ```python from huggingface_model import NBAPerformancePredictorHF # Load the model model = NBAPerformancePredictorHF("path/to/model") # Example prediction player_stats = { 'Age': 27, 'G': 75, 'GS': 70, 'MP': 35.0, 'FG': 8.5, 'FGA': 18.0, 'FG_1': 0.472, 'Pos_encoded': 2, 'Team_encoded': 15, 'Age_category_encoded': 1, 'PTS_lag_1': 22.5, 'PTS_lag_2': 21.0, 'TRB_lag_1': 7.2, 'AST_lag_1': 4.8 } predicted_points = model.predict(player_stats) print(f"Predicted PPG: {predicted_points:.2f}") ``` ## Training Data The model was trained on NBA player statistics from multiple seasons, including: - Regular season statistics - Playoff performance data - Historical player performance trends ## Limitations - Requires historical data (lag features) for accurate predictions - Performance may vary for rookie players or players with limited history - Model is trained on specific NBA eras and may need retraining for different time periods ## Ethical Considerations This model is for educational and analytical purposes. It should not be used for: - Player salary negotiations - Draft decisions without additional context - Any form of discrimination or bias ## Citation ``` @misc{nba_performance_predictor, title={NBA Player Performance Predictor using XGBoost}, year={2024}, publisher={Hugging Face}, howpublished={\\url{https://huggingface.co/your-username/nba-performance-predictor}} } ``` """ with open(output_path, 'w') as f: f.write(model_card_content) print(f"Model card created: {output_path}") if __name__ == "__main__": # Example usage print("NBA Performance Predictor - Hugging Face Compatible Version") # Create model instance (assumes model is already trained and saved) model_dir = "nba_model" if os.path.exists(model_dir): model = NBAPerformancePredictorHF(model_dir) # Test prediction example_stats = model.create_example_input() prediction = model.predict(example_stats) print(f"Example prediction: {prediction:.2f} PPG") # Get feature info feature_info = model.get_feature_info() print(f"Model uses {feature_info['total_features']} features") else: print(f"Model directory '{model_dir}' not found. Train the model first using nba_xgboost_predictor.py")