Vijayendra
/

QST-CIFAR10-BestModel

Image Classification

English

Image

Classification

PyTorch

Model card Files Files and versions Community

Vijayendra commited on Oct 29, 2024

Commit

5d8d9be

verified ·

1 Parent(s): 0630c94

Update README.md

Browse files

Files changed (1) hide show

README.md +24 -74

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ datasets:
 language:
 - en
 metrics:
-- accuracy:96.7 %
 tags:
@@ -19,7 +19,6 @@ pipeline_tag: image-classification
 # Install necessary libraries
 ```python
-# Import necessary libraries
 import os
 import torch
 import torch.nn as nn
@@ -28,9 +27,7 @@ import numpy as np
 from torch.utils.data import DataLoader
 import torchvision.transforms as transforms
 import torchvision.datasets as datasets
-import xgboost as xgb
 from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
-from sklearn.model_selection import train_test_split
 import matplotlib.pyplot as plt
 from huggingface_hub import hf_hub_download
@@ -38,9 +35,8 @@ from huggingface_hub import hf_hub_download
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f'Using device: {device}')
-# Define Hugging Face username and repository names
 username = "Vijayendra"
-model_name_epoch_125 = "QST-CIFAR10-Epoch125"
 model_name_best = "QST-CIFAR10-BestModel"
 # Directory where the models will be downloaded
@@ -48,13 +44,13 @@ save_dir = './hf_models'
 os.makedirs(save_dir, exist_ok=True)
 # Data normalization for CIFAR-10
-transform_test = transforms.Compose([
     transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
 ])
 # Load CIFAR-10 test set
-cifar10_test = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
 test_loader = DataLoader(cifar10_test, batch_size=128, shuffle=False, num_workers=4)
 # Define Patch Embedding with optional convolutional layers
@@ -91,7 +87,6 @@ class SequentialAttentionBlock(nn.Module):
         self.dropout = nn.Dropout(dropout)
     def forward(self, x):
-        # x shape: [seq_length, batch_size, embed_dim]
         seq_length = x.size(0)
         attn_mask = torch.triu(torch.ones(seq_length, seq_length), diagonal=1).bool().to(x.device)
         attn_output, _ = self.attention(x, x, x, attn_mask=attn_mask)
@@ -155,7 +150,6 @@ class CombinedTransformerBlock(nn.Module):
         ff_output = self.ff(x)
         x = self.norm3(x + self.dropout(ff_output))
         return x
 # Decoder Block
 class DecoderBlock(nn.Module):
     def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
@@ -178,8 +172,7 @@ class DecoderBlock(nn.Module):
         ff_output = self.ff(x)
         x = self.norm2(x + self.dropout(ff_output))
         return x
-# Custom Transformer Model with increased depth and learnable positional encodings
 class CustomTransformer(nn.Module):
     def __init__(self, embed_dim, num_heads, ff_dim, num_classes, num_layers=6, dropconnect_p=0.5):
         super(CustomTransformer, self).__init__()
@@ -189,13 +182,13 @@ class CustomTransformer(nn.Module):
         self.positional_encoding = nn.Parameter(torch.zeros(1, self.num_patches, embed_dim))
         nn.init.trunc_normal_(self.positional_encoding, std=0.02)
-        # Create multiple encoder blocks
         self.encoder_blocks = nn.ModuleList([
             CombinedTransformerBlock(embed_dim, num_heads, ff_dim, dropconnect_p=dropconnect_p)
             for _ in range(num_layers)
         ])
-        # Create multiple decoder blocks
         self.decoder_blocks = nn.ModuleList([
             DecoderBlock(embed_dim, num_heads, ff_dim)
             for _ in range(num_layers)
@@ -208,10 +201,12 @@ class CustomTransformer(nn.Module):
         x += self.positional_encoding
         x = x.transpose(0, 1)  # Shape: [num_patches, batch_size, embed_dim]
         encoder_output = x
         for encoder in self.encoder_blocks:
             encoder_output = encoder(encoder_output)
         decoder_output = encoder_output
         for decoder in self.decoder_blocks:
             decoder_output = decoder(decoder_output, encoder_output)
@@ -220,95 +215,50 @@ class CustomTransformer(nn.Module):
         logits = self.fc(decoder_output)
         return logits
-# Initialize two instances of the model for 'model_epoch_125' and 'best_model'
 embed_dim = 512
 num_heads = 32
 ff_dim = 1024
 num_classes = 10
 num_layers = 10  # Ensure it matches the architecture
-model_epoch_125 = CustomTransformer(embed_dim, num_heads, ff_dim, num_classes, num_layers=num_layers).to(device)
 model_best = CustomTransformer(embed_dim, num_heads, ff_dim, num_classes, num_layers=num_layers).to(device)
-# Download the models from Hugging Face Hub
-from huggingface_hub import hf_hub_download
-# Paths where the models will be saved
-model_epoch_125_path = hf_hub_download(repo_id=f"{username}/{model_name_epoch_125}", filename="model_epoch_125.pth")
 model_best_path = hf_hub_download(repo_id=f"{username}/{model_name_best}", filename="model_best.pth")
-# Load the saved models from Hugging Face Hub
-model_epoch_125.load_state_dict(torch.load(model_epoch_125_path, map_location=device))
 model_best.load_state_dict(torch.load(model_best_path, map_location=device))
-# Set both models to evaluation mode
-model_epoch_125.eval()
-model_best.eval()
-# Prepare the feature and label arrays
-test_preds_epoch_125 = []
-test_preds_best = []
 test_labels = []
 with torch.no_grad():
     for images_test, labels_test in test_loader:
         images_test = images_test.to(device)
-        # Get predictions from model_epoch_125
-        logits_epoch_125 = model_epoch_125(images_test)
-        probs_epoch_125 = F.softmax(logits_epoch_125, dim=1).cpu().numpy()  # Convert to probabilities
-        # Get predictions from model_best
         logits_best = model_best(images_test)
         probs_best = F.softmax(logits_best, dim=1).cpu().numpy()  # Convert to probabilities
         # Store predictions and labels
-        test_preds_epoch_125.extend(probs_epoch_125)
         test_preds_best.extend(probs_best)
         test_labels.extend(labels_test.numpy())
-# Convert predictions to NumPy arrays
-test_preds_epoch_125 = np.array(test_preds_epoch_125)
-test_preds_best = np.array(test_preds_best)
 test_labels = np.array(test_labels)
-# Stack the predictions from both models to create meta-features
-meta_features = np.hstack((test_preds_epoch_125, test_preds_best))  # Shape: (num_samples, 20)
-# Split the data for training and validation of the XGBoost meta-learner
-X_train, X_val, y_train, y_val = train_test_split(meta_features, test_labels, test_size=0.2, random_state=42)
-# Train an XGBoost classifier as a meta-learner
-xgb_model = xgb.XGBClassifier(
-    objective='multi:softmax',
-    num_class=10,
-    eval_metric='mlogloss',
-    use_label_encoder=False
-)
-xgb_model.fit(X_train, y_train)
-# Validate the XGBoost model
-y_pred_val = xgb_model.predict(X_val)
-val_accuracy = accuracy_score(y_val, y_pred_val)
-print(f'Validation Accuracy of Meta-learner: {val_accuracy * 100:.2f}%')
-# Test the XGBoost model on the entire test set
-y_pred_test = xgb_model.predict(meta_features)
-test_accuracy = accuracy_score(test_labels, y_pred_test)
-print(f'Test Accuracy of Meta-learner: {test_accuracy * 100:.2f}%')
 # Plot the confusion matrix for the test set predictions
-cm = confusion_matrix(test_labels, y_pred_test)
 disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=cifar10_test.classes)
 disp.plot(cmap=plt.cm.Blues)
 # Rotate the x-axis labels to prevent overlapping
 plt.xticks(rotation=45, ha='right')
-plt.title('Confusion Matrix for XGBoost Meta-learner on CIFAR-10 Test Set')
-plt.savefig(os.path.join(save_dir, 'xgboost_meta_confusion_matrix.png'))
 plt.show()
-# Save the XGBoost model
-xgb_model.save_model(os.path.join(save_dir, 'xgboost_meta_learner.json'))
-print('Meta-learner model saved.')

 language:
 - en
 metrics:
 tags:
 # Install necessary libraries
 ```python
 import os
 import torch
 import torch.nn as nn
 from torch.utils.data import DataLoader
 import torchvision.transforms as transforms
 import torchvision.datasets as datasets
 from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
 import matplotlib.pyplot as plt
 from huggingface_hub import hf_hub_download
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f'Using device: {device}')
+# Define Hugging Face username and repository name for the best model
 username = "Vijayendra"
 model_name_best = "QST-CIFAR10-BestModel"
 # Directory where the models will be downloaded
 os.makedirs(save_dir, exist_ok=True)
 # Data normalization for CIFAR-10
+transform = transforms.Compose([
     transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))
 ])
 # Load CIFAR-10 test set
+cifar10_test = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
 test_loader = DataLoader(cifar10_test, batch_size=128, shuffle=False, num_workers=4)
 # Define Patch Embedding with optional convolutional layers
         self.dropout = nn.Dropout(dropout)
     def forward(self, x):
         seq_length = x.size(0)
         attn_mask = torch.triu(torch.ones(seq_length, seq_length), diagonal=1).bool().to(x.device)
         attn_output, _ = self.attention(x, x, x, attn_mask=attn_mask)
         ff_output = self.ff(x)
         x = self.norm3(x + self.dropout(ff_output))
         return x
 # Decoder Block
 class DecoderBlock(nn.Module):
     def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
         ff_output = self.ff(x)
         x = self.norm2(x + self.dropout(ff_output))
         return x
+# Custom Transformer Model with increased depth, encoder and decoder blocks, and learnable positional encodings
 class CustomTransformer(nn.Module):
     def __init__(self, embed_dim, num_heads, ff_dim, num_classes, num_layers=6, dropconnect_p=0.5):
         super(CustomTransformer, self).__init__()
         self.positional_encoding = nn.Parameter(torch.zeros(1, self.num_patches, embed_dim))
         nn.init.trunc_normal_(self.positional_encoding, std=0.02)
+        # Encoder blocks
         self.encoder_blocks = nn.ModuleList([
             CombinedTransformerBlock(embed_dim, num_heads, ff_dim, dropconnect_p=dropconnect_p)
             for _ in range(num_layers)
         ])
+        # Decoder blocks to match saved model structure
         self.decoder_blocks = nn.ModuleList([
             DecoderBlock(embed_dim, num_heads, ff_dim)
             for _ in range(num_layers)
         x += self.positional_encoding
         x = x.transpose(0, 1)  # Shape: [num_patches, batch_size, embed_dim]
+        # Pass through encoder blocks
         encoder_output = x
         for encoder in self.encoder_blocks:
             encoder_output = encoder(encoder_output)
+        # Pass through decoder blocks
         decoder_output = encoder_output
         for decoder in self.decoder_blocks:
             decoder_output = decoder(decoder_output, encoder_output)
         logits = self.fc(decoder_output)
         return logits
+# Initialize the best model for evaluation
 embed_dim = 512
 num_heads = 32
 ff_dim = 1024
 num_classes = 10
 num_layers = 10  # Ensure it matches the architecture
 model_best = CustomTransformer(embed_dim, num_heads, ff_dim, num_classes, num_layers=num_layers).to(device)
+# Download and load the best model from Hugging Face Hub
 model_best_path = hf_hub_download(repo_id=f"{username}/{model_name_best}", filename="model_best.pth")
 model_best.load_state_dict(torch.load(model_best_path, map_location=device))
+model_best.eval()  # Set to evaluation mode
+# Evaluate the best model directly on the test set
 test_labels = []
+test_preds_best = []
 with torch.no_grad():
     for images_test, labels_test in test_loader:
         images_test = images_test.to(device)
         logits_best = model_best(images_test)
         probs_best = F.softmax(logits_best, dim=1).cpu().numpy()  # Convert to probabilities
         # Store predictions and labels
         test_preds_best.extend(probs_best)
         test_labels.extend(labels_test.numpy())
+# Convert test set predictions to labels
+test_preds_best_labels = np.argmax(test_preds_best, axis=1)
 test_labels = np.array(test_labels)
+# Calculate and print test accuracy
+test_accuracy = accuracy_score(test_labels, test_preds_best_labels)
+print(f'Test Accuracy of Best Model: {test_accuracy * 100:.2f}%')
 # Plot the confusion matrix for the test set predictions
+cm = confusion_matrix(test_labels, test_preds_best_labels)
 disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=cifar10_test.classes)
 disp.plot(cmap=plt.cm.Blues)
 # Rotate the x-axis labels to prevent overlapping
 plt.xticks(rotation=45, ha='right')
+plt.title('Confusion Matrix for Best Model on CIFAR-10 Test Set')
+plt.savefig(os.path.join(save_dir, 'best_model_confusion_matrix.png'))
 plt.show()