# -*- coding: utf-8 -*-
"""CNN(mnist).ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1_4EIIBRbLBfS5tDz6VSgPIIpSv1t03Y7

CNN are mainly used to classify the images <br> 
A basic CNN requires two additional layers called convoluation and pooling before the FNN <br> 
CNN involves a Kernel<br>
Kernel is sliding/convuling matrix across the image with two operations<br> 
1. element-wise multiplication 
2. summation
<br>
Now comes the pooling part mainly there are two types of pooling<br> 
1. Max pooling- getting the max element after the kernel iteration over the image 
2. average pooling- getting the average of all the elements in the matrix 

Now comes stride- it means number of steps in each convulation. By default it is 1. <br> 
After using stride we can see that the input size becomes lesser so we add zeros symetrically in the matrix so the output becomes the same dimension of input <br> 

The dimension of the output after applying all these <br> 
O=(W-K+2P)/25  + 1<br> 
W=input <br>
K=kernel size <br>
P=padding=(K-1)/2<br>
S=stride

# Importing libraries
"""

import torch 
import torch.nn as nn 
from torchvision import transforms,datasets 
from torch.utils.data import dataset, DataLoader
import torchvision.datasets as dsets

"""# Loading the data """

#we will be using the mnist dataset for this purpose 
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
                           transform=transforms.ToTensor())

#making our dataset iterable 
batch_size = 100
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

"""# Defining our model """

class CNN(nn.Module): 
  def __init__(self): 
    super(CNN,self).__init__()

    #defining the layers 
    self.block1=nn.Sequential(nn.Conv2d(1,16,kernel_size=(5,5),stride=1,padding=2),
                              nn.ReLU(),
                              nn.MaxPool2d(kernel_size=2))
    #output after this operation 
    #(28-5+2/1 +1 =28 then max pooling 28/2=14)
    
    self.block2=nn.Sequential(nn.Conv2d(16,32,kernel_size=(5,5),stride=1,padding=2),
                              nn.ReLU(),
                              nn.MaxPool2d(kernel_size=2))
    
    #output after this 
    #(14-5+2*2/1 +1 = 13+1=14 then 14/2= 7)
    
    self.layer=nn.Linear(32*7*7,10)


  def forward(self,x): 
    x=self.block1(x)
    x=self.block2(x) 
    #flatteing the output 
    x = x.view(x.size(0), -1)
    #now feeding inot the linear network 
    x = self.layer(x)

    return x

#making instance 
model=CNN()
print(model)

"""# Training the model"""

#initialising the loss and optimizer 
criterion=nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

print(model.parameters())

print(len(list(model.parameters())))

# Convolution 1: 16 Kernels
print(list(model.parameters())[0].size())

# Convolution 1 Bias: 16 Kernels
print(list(model.parameters())[1].size())

# Convolution 2: 32 Kernels with depth = 16
print(list(model.parameters())[2].size())

# Convolution 2 Bias: 32 Kernels with depth = 16
print(list(model.parameters())[3].size())

# Fully Connected Layer 1
print(list(model.parameters())[4].size())

# Fully Connected Layer Bias
print(list(model.parameters())[5].size())

#lets begin the training 
iter=0 

for epochs in range(num_epochs): 
  for i,(images,labels) in enumerate(train_loader):

    #loading the images 
    images.requires_grad_() 

    #first clearning the parameters 
    optimizer.zero_grad()
     
    #calclauting the output and loss 
    output=model(images) 

    loss=criterion(output,labels)

    #backprapgating the loss 
    loss.backward() 

    #updating the parameters 
    optimizer.step() 

    iter+=1 

    #printing for every 500 iterations 
    if iter%500==0: 
      # Calculate Accuracy         
      correct = 0
      total = 0

    #now iterate through the test dataset 

      for images,labels in test_loader: 
        images = images.requires_grad_() 
        outputs = model(images) 
        _, predicted = torch.max(outputs.data, 1) 
        total += labels.size(0) 
        correct += (predicted == labels).sum()
      
      accuracy = 100 * correct / total

      # Print Loss
      print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

"""Accuracy came out to 96.63

# Model 2

This involves average pooling layer
"""

class CNN2(nn.Module): 
  def __init__(self): 
    super().__init__()

    #defining the layers 
    self.block1=nn.Sequential(nn.Conv2d(1,16,kernel_size=(5,5),stride=1,padding=2),
                              nn.ReLU(),
                              nn.AvgPool2d(kernel_size=2))
    #output after this operation 
    #(28-5+2/1 +1 =28 then max pooling 28/2=14)
    
    self.block2=nn.Sequential(nn.Conv2d(16,32,kernel_size=(5,5),stride=1,padding=2),
                              nn.ReLU(),
                              nn.AvgPool2d(kernel_size=2))
    
    #output after this 
    #(14-5+2*2/1 +1 = 13+1=14 then 14/2= 7)
    
    self.layer=nn.Linear(32*7*7,10)


  def forward(self,x): 
    x=self.block1(x)
    x=self.block2(x) 
    #flatteing the output 
    x = x.view(x.size(0), -1)
    #now feeding inot the linear network 
    x = self.layer(x)

    return x

#making instance 
model2=CNN2()
print(model2)

learning_rate = 0.01
optimizer = torch.optim.SGD(model2.parameters(), lr=learning_rate)

#lets begin the training 
iter=0 

for epochs in range(num_epochs): 
  for i,(images,labels) in enumerate(train_loader):

    #loading the images 
    images.requires_grad_() 

    #first clearning the parameters 
    optimizer.zero_grad()
     
    #calclauting the output and loss 
    output=model2(images) 

    loss=criterion(output,labels)

    #backprapgating the loss 
    loss.backward() 

    #updating the parameters 
    optimizer.step() 

    iter+=1 

    #printing for every 500 iterations 
    if iter%500==0: 
      # Calculate Accuracy         
      correct = 0
      total = 0

    #now iterate through the test dataset 

      for images,labels in test_loader: 
        images = images.requires_grad_() 
        outputs = model2(images) 
        _, predicted = torch.max(outputs.data, 1) 
        total += labels.size(0) 
        correct += (predicted == labels).sum()
      
      accuracy = 100 * correct / total

      # Print Loss
      print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

"""Accuracy came out to be 93 %

# Model 3
This involves vaild pooling which means smaller output size
"""

class CNN3(nn.Module):
    def __init__(self):
        super(CNN3, self).__init__()

        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.relu1 = nn.ReLU()

        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)

        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=0)
        self.relu2 = nn.ReLU()

        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)

        # Fully connected 1 (readout)
        self.fc1 = nn.Linear(32 * 4 * 4, 10) 

    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)

        # Max pool 1
        out = self.maxpool1(out)

        # Convolution 2 
        out = self.cnn2(out)
        out = self.relu2(out)

        # Max pool 2 
        out = self.maxpool2(out)

        # Resize
        # Original size: (100, 32, 7, 7)
        # out.size(0): 100
        # New out size: (100, 32*7*7)
        out = out.view(out.size(0), -1)

        # Linear function (readout)
        out = self.fc1(out)

        return out

#making instance 
model3=CNN3()
print(model3)

learning_rate = 0.01
optimizer = torch.optim.SGD(model3.parameters(), lr=learning_rate)

#lets begin the training 
iter=0 

for epochs in range(num_epochs): 
  for i,(images,labels) in enumerate(train_loader):

    #loading the images 
    images.requires_grad_() 

    #first clearning the parameters 
    optimizer.zero_grad()
     
    #calclauting the output and loss 
    output=model3(images) 

    loss=criterion(output,labels)

    #backprapgating the loss 
    loss.backward() 

    #updating the parameters 
    optimizer.step() 

    iter+=1 

    #printing for every 500 iterations 
    if iter%500==0: 
      # Calculate Accuracy         
      correct = 0
      total = 0

    #now iterate through the test dataset 

      for images,labels in test_loader: 
        images = images.requires_grad_() 
        outputs = model3(images) 
        _, predicted = torch.max(outputs.data, 1) 
        total += labels.size(0) 
        correct += (predicted == labels).sum()
      
      accuracy = 100 * correct / total

      # Print Loss
      print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

"""Accuracy is 96 for the model 3

We can see in the above models the model with max pooling and padding=1 gave the best accuracy
"""