File size: 127,636 Bytes
dd98f48
1
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.11.11","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"gpu","dataSources":[],"dockerImageVersionId":31011,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n    for filename in filenames:\n        print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:37:17.911488Z","iopub.execute_input":"2025-04-15T23:37:17.912289Z","iopub.status.idle":"2025-04-15T23:37:17.917565Z","shell.execute_reply.started":"2025-04-15T23:37:17.912246Z","shell.execute_reply":"2025-04-15T23:37:17.916600Z"}},"outputs":[],"execution_count":23},{"cell_type":"code","source":"import os\nimport numpy as np\nfrom PIL import Image\nfrom collections import Counter\nimport matplotlib.pyplot as plt  # at top of file\n\n\nimport torch\nimport torch.nn as nn\nimport torch.optim as optim\nfrom torch.utils.data import Dataset, DataLoader\n\nimport torchvision.transforms as transforms\n\n# Hugging Face dataset library\nfrom datasets import load_dataset\n\nimport timm\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:37:44.581681Z","iopub.execute_input":"2025-04-15T23:37:44.582473Z","iopub.status.idle":"2025-04-15T23:37:44.587163Z","shell.execute_reply.started":"2025-04-15T23:37:44.582444Z","shell.execute_reply":"2025-04-15T23:37:44.586251Z"}},"outputs":[],"execution_count":28},{"cell_type":"code","source":"# -------------------------------\n# Dataset Wrapper\n# -------------------------------\nclass DeepfakeDataset(Dataset):\n    \"\"\"\n    A custom Dataset wrapper for deepfake images.\n    It assumes that the HF dataset has keys \"image\" and \"label\".\n    \"\"\"\n    def __init__(self, hf_dataset, transform=None):\n        self.hf_dataset = hf_dataset\n        self.transform = transform\n\n    def __len__(self):\n        return len(self.hf_dataset)\n\n    def __getitem__(self, idx):\n        # Get the image and its label from the Hugging Face dataset\n        # (modify keys if needed based on the actual dataset structure)\n        sample = self.hf_dataset[idx]\n        image = sample['image']\n        # Convert the image to a PIL Image if it is not one\n        if not isinstance(image, Image.Image):\n            image = Image.fromarray(image)\n        label = sample['label']  # assuming labels are stored as 'label'\n        if self.transform:\n            image = self.transform(image)\n        return image, label\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:37:48.588239Z","iopub.execute_input":"2025-04-15T23:37:48.588512Z","iopub.status.idle":"2025-04-15T23:37:48.594462Z","shell.execute_reply.started":"2025-04-15T23:37:48.588495Z","shell.execute_reply":"2025-04-15T23:37:48.593558Z"}},"outputs":[],"execution_count":29},{"cell_type":"code","source":"# -------------------------------\n# Transformations for different model inputs\n# -------------------------------\n# For Model A and Model B, we use small images (50x50)\ntransform_small = transforms.Compose([\n    transforms.Resize((50, 50)),\n    transforms.ToTensor(),\n    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)\n])\n\n# For Model C, we use larger images (224x224)\ntransform_large = transforms.Compose([\n    transforms.Resize((224, 224)),\n    transforms.ToTensor(),\n    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)\n])","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:37:54.553741Z","iopub.execute_input":"2025-04-15T23:37:54.554079Z","iopub.status.idle":"2025-04-15T23:37:54.559777Z","shell.execute_reply.started":"2025-04-15T23:37:54.553992Z","shell.execute_reply":"2025-04-15T23:37:54.558844Z"}},"outputs":[],"execution_count":30},{"cell_type":"markdown","source":"# -------------------------------\n# Model Definitions\n# -------------------------------\n","metadata":{}},{"cell_type":"code","source":"\n# --- Model A: CNN-based network for eye and nose regions (12 layers) ---\nclass ModelA(nn.Module):\n    def __init__(self, num_classes=2):\n        super(ModelA, self).__init__()\n        # Three convolutional blocks, each with 2 conv layers + BN, ReLU, pooling and dropout\n        self.block1 = nn.Sequential(\n            nn.Conv2d(3, 32, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.BatchNorm2d(32),\n            nn.Conv2d(32, 32, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.MaxPool2d(kernel_size=2),\n            nn.Dropout(0.3)\n        )\n        self.block2 = nn.Sequential(\n            nn.Conv2d(32, 64, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.BatchNorm2d(64),\n            nn.Conv2d(64, 64, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.MaxPool2d(kernel_size=2),\n            nn.Dropout(0.3)\n        )\n        self.block3 = nn.Sequential(\n            nn.Conv2d(64, 128, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.BatchNorm2d(128),\n            nn.Conv2d(128, 128, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.MaxPool2d(kernel_size=2),\n            nn.Dropout(0.3)\n        )\n        # After three blocks, feature map size for 50x50 input: 50 -> 25 -> ~12 -> ~6\n        self.classifier = nn.Sequential(\n            nn.Flatten(),\n            nn.Linear(128 * 6 * 6, 512),\n            nn.ReLU(),\n            nn.Dropout(0.3),\n            nn.Linear(512, num_classes)\n        )\n\n    def forward(self, x):\n        x = self.block1(x)\n        x = self.block2(x)\n        x = self.block3(x)\n        x = self.classifier(x)\n        return x\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:37:58.551963Z","iopub.execute_input":"2025-04-15T23:37:58.552765Z","iopub.status.idle":"2025-04-15T23:37:58.560447Z","shell.execute_reply.started":"2025-04-15T23:37:58.552738Z","shell.execute_reply":"2025-04-15T23:37:58.559543Z"}},"outputs":[],"execution_count":31},{"cell_type":"code","source":"\n# --- Model B: Simpler CNN-based network (6 layers) ---\nclass ModelB(nn.Module):\n    def __init__(self, num_classes=2):\n        super(ModelB, self).__init__()\n        # A lighter CNN architecture: three conv layers with pooling and dropout\n        self.features = nn.Sequential(\n            nn.Conv2d(3, 32, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.MaxPool2d(2),\n            nn.Dropout(0.3),\n\n            nn.Conv2d(32, 64, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.MaxPool2d(2),\n            nn.Dropout(0.3),\n\n            nn.Conv2d(64, 128, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.MaxPool2d(2),\n            nn.Dropout(0.3)\n        )\n        self.classifier = nn.Sequential(\n            nn.Flatten(),\n            nn.Linear(128 * 6 * 6, 512),\n            nn.ReLU(),\n            nn.Dropout(0.3),\n            nn.Linear(512, num_classes)\n        )\n\n    def forward(self, x):\n        x = self.features(x)\n        x = self.classifier(x)\n        return x","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:38:02.629928Z","iopub.execute_input":"2025-04-15T23:38:02.630548Z","iopub.status.idle":"2025-04-15T23:38:02.637046Z","shell.execute_reply.started":"2025-04-15T23:38:02.630525Z","shell.execute_reply":"2025-04-15T23:38:02.636062Z"}},"outputs":[],"execution_count":32},{"cell_type":"code","source":"# --- Model C: CNN + ViT based network for the entire face ---\nclass ModelC(nn.Module):\n    def __init__(self, num_classes=2):\n        super(ModelC, self).__init__()\n        # Feature learning (FL) module: a deep CNN.\n        # For demonstration, we use a simpler CNN here.\n        self.cnn_feature_extractor = nn.Sequential(\n            nn.Conv2d(3, 64, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.MaxPool2d(2),\n            nn.Conv2d(64, 128, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.MaxPool2d(2),\n            nn.Conv2d(128, 256, kernel_size=3, padding=1),\n            nn.ReLU(),\n            nn.MaxPool2d(2)\n        )\n        # Assume feature map size is reduced appropriately (for 224x224, it becomes roughly 28x28)\n        # Now use a vision transformer module from the timm library.\n        # Note: You may need to install timm (pip install timm).\n        self.vit = timm.create_model('vit_base_patch16_224', pretrained=True)\n        # Replace the head of ViT to match our number of classes.\n        in_features = self.vit.head.in_features\n        self.vit.head = nn.Linear(in_features, num_classes)\n\n    def forward(self, x):\n        # Extract lower-level features (optional fusion)\n        features = self.cnn_feature_extractor(x)\n        # For this demonstration, we are feeding the original image to vit.\n        # In a more advanced implementation, you can fuse the CNN features with ViT.\n        out = self.vit(x)\n        return out\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:38:06.977377Z","iopub.execute_input":"2025-04-15T23:38:06.977681Z","iopub.status.idle":"2025-04-15T23:38:06.984796Z","shell.execute_reply.started":"2025-04-15T23:38:06.977659Z","shell.execute_reply":"2025-04-15T23:38:06.983917Z"}},"outputs":[],"execution_count":33},{"cell_type":"markdown","source":"# -------------------------------\n# Training and Evaluation Functions\n# -------------------------------","metadata":{}},{"cell_type":"code","source":"\ndef train_model(model, dataloader, criterion, optimizer, device, num_epochs=5):\n    model.train()\n    loss_history = []\n    acc_history  = []\n    for epoch in range(num_epochs):\n        running_loss = 0.0\n        correct = 0\n        total = 0\n        for inputs, labels in dataloader:\n            inputs, labels = inputs.to(device), labels.to(device)\n            optimizer.zero_grad()\n            outputs = model(inputs)\n            loss = criterion(outputs, labels)\n            loss.backward()\n            optimizer.step()\n\n            running_loss += loss.item() * inputs.size(0)\n            _, preds = torch.max(outputs, 1)\n            correct += (preds == labels).sum().item()\n            total   += labels.size(0)\n\n        epoch_loss = running_loss / total\n        epoch_acc  = correct / total\n        loss_history.append(epoch_loss)\n        acc_history.append(epoch_acc)\n        print(f\"Epoch {epoch+1}/{num_epochs} — Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}\")\n\n    return model, loss_history, acc_history\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:38:10.740265Z","iopub.execute_input":"2025-04-15T23:38:10.740561Z","iopub.status.idle":"2025-04-15T23:38:10.747783Z","shell.execute_reply.started":"2025-04-15T23:38:10.740540Z","shell.execute_reply":"2025-04-15T23:38:10.746836Z"}},"outputs":[],"execution_count":34},{"cell_type":"code","source":"def evaluate_model(model, dataloader, device):\n    model.eval()\n    preds_all = []\n    labels_all = []\n    with torch.no_grad():\n        for inputs, labels in dataloader:\n            inputs = inputs.to(device)\n            outputs = model(inputs)\n            _, preds = torch.max(outputs, 1)\n            preds_all.extend(preds.cpu().numpy())\n            labels_all.extend(labels.numpy())\n    return np.array(preds_all), np.array(labels_all)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:38:18.833911Z","iopub.execute_input":"2025-04-15T23:38:18.834258Z","iopub.status.idle":"2025-04-15T23:38:18.840470Z","shell.execute_reply.started":"2025-04-15T23:38:18.834234Z","shell.execute_reply":"2025-04-15T23:38:18.839597Z"}},"outputs":[],"execution_count":36},{"cell_type":"code","source":"def majority_vote(predictions_list):\n    \"\"\"Combine predictions (list of numpy arrays) via majority voting.\"\"\"\n    # predictions_list should be a list of arrays with same length.\n    predictions_list = np.array(predictions_list)  # shape: [n_models, n_samples]\n    final_preds = []\n    for preds in predictions_list.T:\n        # Compute the majority vote for each sample.\n        vote = Counter(preds).most_common(1)[0][0]\n        final_preds.append(vote)\n    return np.array(final_preds)","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:38:21.265259Z","iopub.execute_input":"2025-04-15T23:38:21.265774Z","iopub.status.idle":"2025-04-15T23:38:21.270920Z","shell.execute_reply.started":"2025-04-15T23:38:21.265750Z","shell.execute_reply":"2025-04-15T23:38:21.270068Z"}},"outputs":[],"execution_count":37},{"cell_type":"code","source":"def main():\n    # Check device\n    device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n    print(\"Using device:\", device)\n\n    # -------------------------------\n    # Load the dataset using Hugging Face datasets\n    # -------------------------------\n    hf_dataset = load_dataset('Hemg/deepfake-and-real-images', split=\"train\")\n    # For demonstration, use a subset (e.g., first 1000 samples)\n\n    # -------------------------------\n    # Create PyTorch Datasets for different input sizes\n    # -------------------------------\n    dataset_small = DeepfakeDataset(hf_dataset, transform=transform_small)\n    dataset_large = DeepfakeDataset(hf_dataset, transform=transform_large)\n    \n    # Split datasets into train and test (80/20 split)\n    n_total = len(dataset_small)\n    n_train = int(0.8 * n_total)\n    n_test = n_total - n_train\n    train_small, test_small = torch.utils.data.random_split(dataset_small, [n_train, n_test])\n    train_large, test_large = torch.utils.data.random_split(dataset_large, [n_train, n_test])\n\n    train_loader_small = DataLoader(train_small, batch_size=32, shuffle=True, num_workers=2)\n    test_loader_small = DataLoader(test_small, batch_size=32, shuffle=False, num_workers=2)\n\n    train_loader_large = DataLoader(train_large, batch_size=32, shuffle=True, num_workers=2)\n    test_loader_large = DataLoader(test_large, batch_size=32, shuffle=False, num_workers=2)\n\n    # -------------------------------\n    # Initialize models, loss function, and optimizers\n    # -------------------------------\n    num_classes = 2\n    modelA = ModelA(num_classes=num_classes).to(device)\n    modelB = ModelB(num_classes=num_classes).to(device)\n    modelC = ModelC(num_classes=num_classes).to(device)\n\n    criterion = nn.CrossEntropyLoss()\n\n    optimizerA = optim.Adam(modelA.parameters(), lr=1e-4)\n    optimizerB = optim.Adam(modelB.parameters(), lr=1e-4)\n    optimizerC = optim.Adam(modelC.parameters(), lr=1e-4)\n\n    print(\"Training Model A (CNN-based - Model A, 12 layers, small images)...\")\n    modelA, lossA, accA = train_model(modelA, train_loader_small, criterion, optimizerA, device, num_epochs=10)\n\n    print(\"Training Model B (Simpler CNN-based - Model B, small images)...\")\n    modelB, lossB, accB = train_model(modelB, train_loader_small, criterion, optimizerB, device, num_epochs=10)\n\n    print(\"Training Model C (CNN + ViT based - large images)...\")\n    modelC, lossC, accC = train_model(modelC, train_loader_large, criterion, optimizerC, device, num_epochs=10)\n\n    # After training all three:\n    epochs = list(range(1, 11))\n\n    plt.figure()\n    plt.plot(epochs, lossA, label='Model A')\n    plt.plot(epochs, lossB, label='Model B')\n    plt.plot(epochs, lossC, label='Model C')\n    plt.title('Training Loss vs. Epochs')\n    plt.xlabel('Epoch')\n    plt.ylabel('Loss')\n    plt.legend()\n    plt.show()\n\n    plt.figure()\n    plt.plot(epochs, accA, label='Model A')\n    plt.plot(epochs, accB, label='Model B')\n    plt.plot(epochs, accC, label='Model C')\n    plt.title('Training Accuracy vs. Epochs')\n    plt.xlabel('Epoch')\n    plt.ylabel('Accuracy')\n    plt.legend()\n    plt.show()\n\n    # -------------------------------\n    # Evaluate models on the test set\n    # -------------------------------\n    preds_A, labels = evaluate_model(modelA, test_loader_small, device)\n    preds_B, _ = evaluate_model(modelB, test_loader_small, device)\n    preds_C, _ = evaluate_model(modelC, test_loader_large, device)  # using large images for model C\n\n    # -------------------------------\n    # Ensemble: Majority Voting\n    # -------------------------------\n    # Ensure predictions arrays have the same length.\n    # (Here, we assume that test loaders are synchronized; otherwise, adjust accordingly.)\n    predictions = majority_vote([preds_A, preds_B, preds_C])\n    accuracy = np.mean(predictions == labels)\n    print(f\"Ensemble (Majority Voting) Test Accuracy: {accuracy * 100:.2f}%\")\n\n    from sklearn.metrics import confusion_matrix, classification_report\n\n    # ... after predictions = majority_vote(...)\n    accuracy = np.mean(predictions == labels)\n    print(f\"Ensemble Test Accuracy: {accuracy:.4f}\")\n\n    # Confusion matrix\n    cm = confusion_matrix(labels, predictions)\n    print(\"Confusion Matrix:\\n\", cm)\n\n    # Detailed metrics\n    print(\"Classification Report:\\n\",\n      classification_report(labels, predictions, target_names=['Real','Fake']))\n\n    plt.figure()\n    plt.imshow(cm, interpolation='nearest')\n    plt.title('Ensemble Confusion Matrix')\n    plt.xlabel('Predicted')\n    plt.ylabel('Actual')\n    plt.xticks([0,1], ['Real','Fake'])\n    plt.yticks([0,1], ['Real','Fake'])\n    for i in range(2):\n        for j in range(2):\n            plt.text(j, i, cm[i,j], ha='center', va='center')\n    plt.show()\n\n\n    torch.save(modelA.state_dict(), 'modelA.pth')\n    torch.save(modelB.state_dict(), 'modelB.pth')\n    torch.save(modelC.state_dict(), 'modelC.pth')\n\n    # Optionally save the entire ensemble config:\n    import json\n    with open('ensemble_config.json','w') as f:\n        json.dump({\n        \"models\": [\"modelA.pth\",\"modelB.pth\",\"modelC.pth\"],\n        \"accuracy\": accuracy\n        }, f)\n\nif __name__ == \"__main__\":\n    main()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2025-04-15T23:40:09.081763Z","iopub.execute_input":"2025-04-15T23:40:09.082302Z","iopub.status.idle":"2025-04-16T08:30:28.702175Z","shell.execute_reply.started":"2025-04-15T23:40:09.082271Z","shell.execute_reply":"2025-04-16T08:30:28.701482Z"}},"outputs":[{"name":"stdout","text":"Using device: cuda\nTraining Model A (CNN-based - Model A, 12 layers, small images)...\nEpoch 1/10 — Loss: 0.3871, Acc: 0.8163\nEpoch 2/10 — Loss: 0.2458, Acc: 0.8953\nEpoch 3/10 — Loss: 0.1972, Acc: 0.9184\nEpoch 4/10 — Loss: 0.1662, Acc: 0.9317\nEpoch 5/10 — Loss: 0.1490, Acc: 0.9395\nEpoch 6/10 — Loss: 0.1339, Acc: 0.9463\nEpoch 7/10 — Loss: 0.1236, Acc: 0.9505\nEpoch 8/10 — Loss: 0.1151, Acc: 0.9542\nEpoch 9/10 — Loss: 0.1083, Acc: 0.9565\nEpoch 10/10 — Loss: 0.1022, Acc: 0.9590\nTraining Model B (Simpler CNN-based - Model B, small images)...\nEpoch 1/10 — Loss: 0.4825, Acc: 0.7608\nEpoch 2/10 — Loss: 0.3833, Acc: 0.8236\nEpoch 3/10 — Loss: 0.3323, Acc: 0.8504\nEpoch 4/10 — Loss: 0.2963, Acc: 0.8694\nEpoch 5/10 — Loss: 0.2692, Acc: 0.8823\nEpoch 6/10 — Loss: 0.2499, Acc: 0.8925\nEpoch 7/10 — Loss: 0.2330, Acc: 0.9000\nEpoch 8/10 — Loss: 0.2214, Acc: 0.9063\nEpoch 9/10 — Loss: 0.2106, Acc: 0.9113\nEpoch 10/10 — Loss: 0.2016, Acc: 0.9157\nTraining Model C (CNN + ViT based - large images)...\nEpoch 1/10 — Loss: 0.1301, Acc: 0.9477\nEpoch 2/10 — Loss: 0.0874, Acc: 0.9659\nEpoch 3/10 — Loss: 0.0713, Acc: 0.9719\nEpoch 4/10 — Loss: 0.0612, Acc: 0.9755\nEpoch 5/10 — Loss: 0.0547, Acc: 0.9785\nEpoch 6/10 — Loss: 0.0483, Acc: 0.9814\nEpoch 7/10 — Loss: 0.0439, Acc: 0.9826\nEpoch 8/10 — Loss: 0.0402, Acc: 0.9842\nEpoch 9/10 — Loss: 0.0361, Acc: 0.9859\nEpoch 10/10 — Loss: 0.5087, Acc: 0.7173\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<Figure size 640x480 with 1 Axes>","image/png":"\n"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<Figure size 640x480 with 1 Axes>","image/png":"\n"},"metadata":{}},{"name":"stdout","text":"Ensemble (Majority Voting) Test Accuracy: 94.87%\nEnsemble Test Accuracy: 0.9487\nConfusion Matrix:\n [[17778  1237]\n [  714 18338]]\nClassification Report:\n               precision    recall  f1-score   support\n\n        Real       0.96      0.93      0.95     19015\n        Fake       0.94      0.96      0.95     19052\n\n    accuracy                           0.95     38067\n   macro avg       0.95      0.95      0.95     38067\nweighted avg       0.95      0.95      0.95     38067\n\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<Figure size 640x480 with 1 Axes>","image/png":"\n"},"metadata":{}}],"execution_count":41}]}