Spaces:

tbvl
/

Fake_Face_Detection

Running

App Files Files Community

shuklaji9810 commited on Nov 8, 2023

Commit

5e014de

1 Parent(s): c329af9

first commit

Browse files

Files changed (26) hide show

.gitignore +46 -0
Examples/DeepFakes_10.png +0 -0
Examples/DeepFakes_2.png +0 -0
Examples/DeepFakes_4.png +0 -0
Examples/DeepFakes_8.png +0 -0
Examples/DeepFakes_9.png +0 -0
Examples/SimSwap_8.png +0 -0
Examples/StyleGAN_7.png +0 -0
Examples/o_11.jpg +0 -0
Examples/o_3.jpg +0 -0
Examples/o_5.jpg +0 -0
Examples/o_6.jpg +0 -0
Examples/o_7.jpg +0 -0
app.py +206 -0
dataset/real_n_fake_dataloader.py +119 -0
face_cropper.py +99 -0
net/Multimodalmodel.py +41 -0
test_image_fusion.py +182 -0
utils/__init__.py +1 -0
utils/basicblocks.py +32 -0
utils/classifier.py +32 -0
utils/config.py +38 -0
utils/data_transforms.py +33 -0
utils/feature_fusion_block.py +46 -0
weights/faceswap-fft-best_model.pth +3 -0
weights/faceswap-hh-best_model.pth +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,46 @@

+working.ipynb
+training.py
+# Compiled source #
+###################
+*.com
+*.class
+*.dll
+*.exe
+*.o
+*.so
+# Packages #
+############
+# it's better to unpack these files and commit the raw source because
+# git has its own built in compression methods
+*.7z
+*.dmg
+*.gz
+*.iso
+*.jar
+*.rar
+*.tar
+*.zip
+# Logs and databases #
+######################
+*.log
+*.sql
+*.sqlite
+# OS generated files #
+######################
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+___pycache__/
+test_image.py
+*.pyc

Examples/DeepFakes_10.png ADDED Viewed

Examples/DeepFakes_2.png ADDED Viewed

Examples/DeepFakes_4.png ADDED Viewed

Examples/DeepFakes_8.png ADDED Viewed

Examples/DeepFakes_9.png ADDED Viewed

Examples/SimSwap_8.png ADDED Viewed

Examples/StyleGAN_7.png ADDED Viewed

Examples/o_11.jpg ADDED Viewed

Examples/o_3.jpg ADDED Viewed

Examples/o_5.jpg ADDED Viewed

Examples/o_6.jpg ADDED Viewed

Examples/o_7.jpg ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import gradio as gr
+from PIL import Image
+import numpy as np
+import os
+from face_cropper import detect_and_label_faces
+# Define a custom function to convert an image to grayscale
+def to_grayscale(input_image):
+    grayscale_image = Image.fromarray(np.array(input_image).mean(axis=-1).astype(np.uint8))
+    return grayscale_image
+description_markdown = """
+# Fake Face Detection tool from TrustWorthy BiometraVision Lab IISER Bhopal
+## Usage
+This tool expects a face image as input. Upon submission, it will process the image and provide an output with bounding boxes drawn on the face. Alongside the visual markers, the tool will give a detection result indicating whether the face is fake or real.
+## Disclaimer
+Please note that this tool is for research purposes only and may not always be 100% accurate. Users are advised to exercise discretion and supervise the tool's usage accordingly.
+## Licensing and Permissions
+This tool has been developed solely for research and demonstrative purposes. Any commercial utilization of this tool is strictly prohibited unless explicit permission has been obtained from the developers.
+## Developer Contact
+For further inquiries or permissions, you can reach out to the developer through the following social media accounts:
+- [LAB Webpage](https://sites.google.com/iiitd.ac.in/agarwalakshay/labiiserb?authuser=0)
+- [LinkedIn](https://www.linkedin.com/in/shivam-shukla-0a50ab1a2/)
+- [GitHub](https://github.com/SaShukla090)
+"""
+# Create the Gradio app
+app = gr.Interface(
+    fn=detect_and_label_faces,
+    inputs=gr.Image(type="pil"),
+    outputs="image",
+    # examples=[
+    #     "path_to_example_image_1.jpg",
+    #     "path_to_example_image_2.jpg"
+    # ]
+    examples=[
+        os.path.join("Examples", image_name) for image_name in os.listdir("Examples")
+    ],
+    title="Fake Face Detection",
+    description=description_markdown,
+)
+# Run the app
+app.launch()
+# import torch.nn.functional as F
+# import torch
+# import torch.nn as nn
+# import torch.optim as optim
+# from torch.utils.data import DataLoader
+# from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+# from torch.optim.lr_scheduler import CosineAnnealingLR
+# from tqdm import tqdm
+# import warnings
+# warnings.filterwarnings("ignore")
+# from utils.config import cfg
+# from dataset.real_n_fake_dataloader import Extracted_Frames_Dataset
+# from utils.data_transforms import get_transforms_train, get_transforms_val
+# from net.Multimodalmodel import Image_n_DCT
+# import gradio as gr
+# import os
+# import json
+# import torch
+# from torchvision import transforms
+# from torch.utils.data import DataLoader, Dataset
+# from PIL import Image
+# import numpy as np
+# import pandas as pd
+# import cv2
+# import argparse
+# from sklearn.metrics import classification_report, confusion_matrix
+# import matplotlib.pyplot as plt
+# import seaborn as sns
+# class Test_Dataset(Dataset):
+#     def __init__(self, test_data_path = None, transform = None, image = None):
+#         """
+#         Args:
+#         returns:
+#             """
+#         if test_data_path is None and image is not None:
+#             self.dataset = [(image, 2)]
+#             self.transform = transform
+#     def __len__(self):
+#         return len(self.dataset)
+#     def __getitem__(self, idx):
+#         sample_input = self.get_sample_input(idx)
+#         return sample_input
+#     def get_sample_input(self, idx):
+#         rgb_image = self.get_rgb_image(self.dataset[idx][0])
+#         dct_image = self.compute_dct_color(self.dataset[idx][0])
+#         # label = self.get_label(idx)
+#         sample_input = {"rgb_image": rgb_image, "dct_image": dct_image}
+#         return sample_input
+#     def get_rgb_image(self, rgb_image):
+#         # rgb_image_path = self.dataset[idx][0]
+#         # rgb_image = Image.open(rgb_image_path)
+#         if self.transform:
+#             rgb_image = self.transform(rgb_image)
+#         return rgb_image
+#     def get_dct_image(self, idx):
+#         rgb_image_path = self.dataset[idx][0]
+#         rgb_image = cv2.imread(rgb_image_path)
+#         dct_image = self.compute_dct_color(rgb_image)
+#         if self.transform:
+#             dct_image = self.transform(dct_image)
+#         return dct_image
+#     def get_label(self, idx):
+#         return self.dataset[idx][1]
+#     def compute_dct_color(self, image):
+#         image_float = np.float32(image)
+#         dct_image = np.zeros_like(image_float)
+#         for i in range(3):
+#             dct_image[:, :, i] = cv2.dct(image_float[:, :, i])
+#         if self.transform:
+#             dct_image = self.transform(dct_image)
+#         return dct_image
+# device = torch.device("cpu")
+# # print(device)
+# model = Image_n_DCT()
+# model.load_state_dict(torch.load('weights/best_model.pth', map_location = device))
+# model.to(device)
+# model.eval()
+# def classify(image):
+#     test_dataset = Test_Dataset(transform = get_transforms_val(), image = image)
+#     inputs = test_dataset[0]
+#     rgb_image, dct_image = inputs['rgb_image'].to(device), inputs['dct_image'].to(device)
+#     output = model(rgb_image.unsqueeze(0), dct_image.unsqueeze(0))
+#     # _, predicted = torch.max(output.data, 1)
+#     # print(f"the face is {'real' if predicted==1 else 'fake'}")
+#     return {'Fake': output[0][0], 'Real': output[0][1]}
+# iface = gr.Interface(fn=classify, inputs="image", outputs="label")
+# if __name__ == "__main__":
+#     iface.launch()

dataset/real_n_fake_dataloader.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# We will use this file to create a dataloader for the real and fake dataset
+import os
+import json
+import torch
+from torchvision import transforms
+from torch.utils.data import DataLoader, Dataset
+from PIL import Image
+import numpy as np
+import pandas as pd
+import cv2
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+import pywt
+class Extracted_Frames_Dataset(Dataset):
+    def __init__(self, root_dir, split = "train", transform = None, extend = 'None', multi_modal = "dct"):
+        """
+        Args:
+        returns:
+            """
+        AssertionError(split in ["train", "val", "test"]), "Split must be one of (train, val, test)"
+        self.multi_modal = multi_modal
+        self.root_dir = root_dir
+        self.split = split
+        self.transform = transform
+        if extend == 'faceswap':
+            self.dataset = pd.read_csv(os.path.join(root_dir, f"faceswap_extended_{self.split}.csv"))
+        elif extend == 'fsgan':
+            self.dataset = pd.read_csv(os.path.join(root_dir, f"fsgan_extended_{self.split}.csv"))
+        else:
+            self.dataset = pd.read_csv(os.path.join(root_dir, f"{self.split}.csv"))
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, idx):
+        sample_input = self.get_sample_input(idx)
+        return sample_input
+    def get_sample_input(self, idx):
+        rgb_image = self.get_rgb_image(idx)
+        label = self.get_label(idx)
+        if self.multi_modal == "dct":
+            dct_image = self.get_dct_image(idx)
+            sample_input = {"rgb_image": rgb_image, "dct_image": dct_image, "label": label}
+        # dct_image = self.get_dct_image(idx)
+        elif self.multi_modal == "fft":
+            fft_image = self.get_fft_image(idx)
+            sample_input = {"rgb_image": rgb_image, "dct_image": fft_image, "label": label}
+        elif self.multi_modal == "hh":
+            hh_image = self.get_hh_image(idx)
+            sample_input = {"rgb_image": rgb_image, "dct_image": hh_image, "label": label}
+        else:
+            AssertionError("multi_modal must be one of (dct:discrete cosine transform, fft: fast forier transform, hh)")
+        return sample_input
+    def get_fft_image(self, idx):
+        gray_image_path = self.dataset.iloc[idx, 0]
+        gray_image = cv2.imread(gray_image_path, cv2.IMREAD_GRAYSCALE)
+        fft_image = self.compute_fft(gray_image)
+        if self.transform:
+            fft_image = self.transform(fft_image)
+        return fft_image
+    def compute_fft(self, image):
+        f = np.fft.fft2(image)
+        fshift = np.fft.fftshift(f)
+        magnitude_spectrum = 20 * np.log(np.abs(fshift) + 1)  # Add 1 to avoid log(0)
+        return magnitude_spectrum
+    def get_hh_image(self, idx):
+        gray_image_path = self.dataset.iloc[idx, 0]
+        gray_image = cv2.imread(gray_image_path, cv2.IMREAD_GRAYSCALE)
+        hh_image = self.compute_hh(gray_image)
+        if self.transform:
+            hh_image = self.transform(hh_image)
+        return hh_image
+    def compute_hh(self, image):
+        coeffs2 = pywt.dwt2(image, 'haar')
+        LL, (LH, HL, HH) = coeffs2
+        return HH
+    def get_rgb_image(self, idx):
+        rgb_image_path = self.dataset.iloc[idx, 0]
+        rgb_image = Image.open(rgb_image_path)
+        if self.transform:
+            rgb_image = self.transform(rgb_image)
+        return rgb_image
+    def get_dct_image(self, idx):
+        rgb_image_path = self.dataset.iloc[idx, 0]
+        rgb_image = cv2.imread(rgb_image_path)
+        dct_image = self.compute_dct_color(rgb_image)
+        if self.transform:
+            dct_image = self.transform(dct_image)
+        return dct_image
+    def get_label(self, idx):
+        return self.dataset.iloc[idx, 1]
+    def compute_dct_color(self, image):
+        image_float = np.float32(image)
+        dct_image = np.zeros_like(image_float)
+        for i in range(3):
+            dct_image[:, :, i] = cv2.dct(image_float[:, :, i])
+        return dct_image

face_cropper.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import cv2
+import mediapipe as mp
+import os
+from gradio_client import Client
+from test_image_fusion import Test
+from test_image_fusion import Test
+import numpy as np
+from PIL import Image
+import numpy as np
+import cv2
+# client = Client("https://tbvl-real-and-fake-face-detection.hf.space/--replicas/40d41jxhhx/")
+data = 'faceswap'
+dct = 'fft'
+testet = Test(model_paths = [f"weights/{data}-hh-best_model.pth",
+                            f"weights/{data}-fft-best_model.pth"],
+                            multi_modal = ['hh', 'fft'])
+# Initialize MediaPipe Face Detection
+mp_face_detection = mp.solutions.face_detection
+mp_drawing = mp.solutions.drawing_utils
+face_detection = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.35)
+# Create a directory to save the cropped face images if it does not exist
+save_dir = "cropped_faces"
+os.makedirs(save_dir, exist_ok=True)
+# def detect_and_label_faces(image_path):
+# Function to crop faces from a video and save them as images
+# def crop_faces_from_video(video_path):
+#     # Read the video
+#     cap = cv2.VideoCapture(video_path)
+#     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+#     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+#     fps = int(cap.get(cv2.CAP_PROP_FPS))
+#     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+#     # Define the codec and create VideoWriter object
+#     out = cv2.VideoWriter(f'output_{real}_{data}_fusion.avi', cv2.VideoWriter_fourcc('M','J','P','G'), fps, (frame_width, frame_height))
+#     if not cap.isOpened():
+#         print("Error: Could not open video.")
+#         return
+# Convert PIL Image to NumPy array for OpenCV
+def pil_to_opencv(pil_image):
+    open_cv_image = np.array(pil_image)
+    # Convert RGB to BGR for OpenCV
+    open_cv_image = open_cv_image[:, :, ::-1].copy()
+    return open_cv_image
+# Convert OpenCV NumPy array to PIL Image
+def opencv_to_pil(opencv_image):
+    # Convert BGR to RGB
+    pil_image = Image.fromarray(opencv_image[:, :, ::-1])
+    return pil_image
+def detect_and_label_faces(frame):
+    frame = pil_to_opencv(frame)
+    print(type(frame))
+    # Convert the frame to RGB
+    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    # Perform face detection
+    results = face_detection.process(frame_rgb)
+    # If faces are detected, crop and save each face as an image
+    if results.detections:
+        for face_count,detection in enumerate(results.detections):
+            bboxC = detection.location_data.relative_bounding_box
+            ih, iw, _ = frame.shape
+            x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
+            # Crop the face region and make sure the bounding box is within the frame dimensions
+            crop_img = frame[max(0, y):min(ih, y+h), max(0, x):min(iw, x+w)]
+            if crop_img.size > 0:
+                face_filename = os.path.join(save_dir, f'face_{face_count}.jpg')
+                cv2.imwrite(face_filename, crop_img)
+                label = testet.testimage(face_filename)
+                if os.path.exists(face_filename):
+                    os.remove(face_filename)
+                color = (0, 0, 255) if label == 'fake' else (0, 255, 0)
+                cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
+                cv2.putText(frame, label, (x, y + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
+    return opencv_to_pil(frame)

net/Multimodalmodel.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from utils.config import cfg
+from utils.basicblocks import BasicBlock
+from utils.feature_fusion_block import DCT_Attention_Fusion_Conv
+from utils.classifier import ClassifierModel
+class Image_n_DCT(nn.Module):
+    def __init__(self,):
+        super(Image_n_DCT, self).__init__()
+        self.Img_Block = nn.ModuleList()
+        self.DCT_Block = nn.ModuleList()
+        self.RGB_n_DCT_Fusion = nn.ModuleList()
+        self.num_classes = len(cfg.CLASSES)
+        for i in range(len(cfg.MULTIMODAL_FUSION.IMG_CHANNELS) - 1):
+            self.Img_Block.append(BasicBlock(cfg.MULTIMODAL_FUSION.IMG_CHANNELS[i], cfg.MULTIMODAL_FUSION.IMG_CHANNELS[i+1], stride=1))
+            self.DCT_Block.append(BasicBlock(cfg.MULTIMODAL_FUSION.DCT_CHANNELS[i], cfg.MULTIMODAL_FUSION.IMG_CHANNELS[i+1], stride=1))
+            self.RGB_n_DCT_Fusion.append(DCT_Attention_Fusion_Conv(cfg.MULTIMODAL_FUSION.IMG_CHANNELS[i+1]))
+        self.classifier = ClassifierModel(self.num_classes)
+    def forward(self, rgb_image, dct_image):
+        image = [rgb_image]
+        dct_image = [dct_image]
+        for i in range(len(self.Img_Block)):
+            image.append(self.Img_Block[i](image[-1]))
+            dct_image.append(self.DCT_Block[i](dct_image[-1]))
+            image[-1] = self.RGB_n_DCT_Fusion[i](image[-1], dct_image[-1])
+            dct_image[-1] = image[-1]
+        out = self.classifier(image[-1])
+        return out

test_image_fusion.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import torch.nn.functional as F
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support
+from torch.optim.lr_scheduler import CosineAnnealingLR
+from tqdm import tqdm
+import warnings
+warnings.filterwarnings("ignore")
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+import pywt
+from utils.config import cfg
+from dataset.real_n_fake_dataloader import Extracted_Frames_Dataset
+from utils.data_transforms import get_transforms_train, get_transforms_val
+from net.Multimodalmodel import Image_n_DCT
+import os
+import json
+import torch
+from torchvision import transforms
+from torch.utils.data import DataLoader, Dataset
+from PIL import Image
+import numpy as np
+import pandas as pd
+import cv2
+import argparse
+class Test_Dataset(Dataset):
+    def __init__(self, test_data_path = None, transform = None, image_path = None, multi_modal = "dct"):
+        """
+        Args:
+        returns:
+            """
+        self.multi_modal = multi_modal
+        if test_data_path is None and image_path is not None:
+            self.dataset = [[image_path, 2]]
+            self.transform = transform
+        else:
+            self.transform = transform
+            self.real_data = os.listdir(test_data_path + "/real")
+            self.fake_data = os.listdir(test_data_path + "/fake")
+            self.dataset = []
+            for image in self.real_data:
+                self.dataset.append([test_data_path + "/real/" + image, 1])
+            for image in self.fake_data:
+                self.dataset.append([test_data_path + "/fake/" + image, 0])
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, idx):
+        sample_input = self.get_sample_input(idx)
+        return sample_input
+    def get_sample_input(self, idx):
+        rgb_image = self.get_rgb_image(idx)
+        label = self.get_label(idx)
+        if self.multi_modal == "dct":
+            dct_image = self.get_dct_image(idx)
+            sample_input = {"rgb_image": rgb_image, "dct_image": dct_image, "label": label}
+        # dct_image = self.get_dct_image(idx)
+        elif self.multi_modal == "fft":
+            fft_image = self.get_fft_image(idx)
+            sample_input = {"rgb_image": rgb_image, "dct_image": fft_image, "label": label}
+        elif self.multi_modal == "hh":
+            hh_image = self.get_hh_image(idx)
+            sample_input = {"rgb_image": rgb_image, "dct_image": hh_image, "label": label}
+        else:
+            AssertionError("multi_modal must be one of (dct:discrete cosine transform, fft: fast forier transform, hh)")
+        return sample_input
+    def get_fft_image(self, idx):
+        gray_image_path = self.dataset[idx][0]
+        gray_image = cv2.imread(gray_image_path, cv2.IMREAD_GRAYSCALE)
+        fft_image = self.compute_fft(gray_image)
+        if self.transform:
+            fft_image = self.transform(fft_image)
+        return fft_image
+    def compute_fft(self, image):
+        f = np.fft.fft2(image)
+        fshift = np.fft.fftshift(f)
+        magnitude_spectrum = 20 * np.log(np.abs(fshift) + 1)  # Add 1 to avoid log(0)
+        return magnitude_spectrum
+    def get_hh_image(self, idx):
+        gray_image_path = self.dataset[idx][0]
+        gray_image = cv2.imread(gray_image_path, cv2.IMREAD_GRAYSCALE)
+        hh_image = self.compute_hh(gray_image)
+        if self.transform:
+            hh_image = self.transform(hh_image)
+        return hh_image
+    def compute_hh(self, image):
+        coeffs2 = pywt.dwt2(image, 'haar')
+        LL, (LH, HL, HH) = coeffs2
+        return HH
+    def get_rgb_image(self, idx):
+        rgb_image_path = self.dataset[idx][0]
+        rgb_image = Image.open(rgb_image_path)
+        if self.transform:
+            rgb_image = self.transform(rgb_image)
+        return rgb_image
+    def get_dct_image(self, idx):
+        rgb_image_path = self.dataset[idx][0]
+        rgb_image = cv2.imread(rgb_image_path)
+        dct_image = self.compute_dct_color(rgb_image)
+        if self.transform:
+            dct_image = self.transform(dct_image)
+        return dct_image
+    def get_label(self, idx):
+        return self.dataset[idx][1]
+    def compute_dct_color(self, image):
+        image_float = np.float32(image)
+        dct_image = np.zeros_like(image_float)
+        for i in range(3):
+            dct_image[:, :, i] = cv2.dct(image_float[:, :, i])
+        return dct_image
+class Test:
+    def __init__(self, model_paths = [ 'weights/faceswap-hh-best_model.pth',
+                                      'weights/faceswap-fft-best_model.pth',
+                                                                            ],
+                 multi_modal = ["hh","fct"]):
+        self.model_path = model_paths
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(self.device)
+        # Load the model
+        self.model1 = Image_n_DCT()
+        self.model1.load_state_dict(torch.load(self.model_path[0], map_location = self.device))
+        self.model1.to(self.device)
+        self.model1.eval()
+        self.model2 = Image_n_DCT()
+        self.model2.load_state_dict(torch.load(self.model_path[1], map_location = self.device))
+        self.model2.to(self.device)
+        self.model2.eval()
+        self.multi_modal = multi_modal
+    def testimage(self, image_path):
+        test_dataset1 = Test_Dataset(transform = get_transforms_val(), image_path = image_path, multi_modal = self.multi_modal[0])
+        test_dataset2 = Test_Dataset(transform = get_transforms_val(), image_path = image_path, multi_modal = self.multi_modal[1])
+        inputs1 = test_dataset1[0]
+        rgb_image1, dct_image1 = inputs1['rgb_image'].to(self.device), inputs1['dct_image'].to(self.device)
+        inputs2 = test_dataset2[0]
+        rgb_image2, dct_image2 = inputs2['rgb_image'].to(self.device), inputs2['dct_image'].to(self.device)
+        output1 = self.model1(rgb_image1.unsqueeze(0), dct_image1.unsqueeze(0))
+        output2 = self.model2(rgb_image2.unsqueeze(0), dct_image2.unsqueeze(0))
+        output = (output1 + output2)/2
+        # print(output.shape)
+        _, predicted = torch.max(output.data, 1)
+        return 'real' if predicted==1 else 'fake'

utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ import os

utils/basicblocks.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+BatchNorm2d = nn.BatchNorm2d
+def conv3x3(in_planes, out_planes, stride = 1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size = 3, stride = stride,
+                     padding = 1, bias = False)
+def conv1x1(in_planes, out_planes, stride = 1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size = 1, stride = stride,
+                     padding = 0, bias = False)
+class BasicBlock(nn.Module):
+    def __init__(self, inplanes, outplanes, stride = 1):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, outplanes, stride)
+        self.bn1 = BatchNorm2d(outplanes)
+        self.relu = nn.ReLU(inplace = True)
+        self.conv2 = conv3x3(outplanes, outplanes, 2*stride)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        return out

utils/classifier.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ClassifierModel(nn.Module):
+    def __init__(self, num_classes):
+        super(ClassifierModel, self).__init__()
+        # Apply adaptive average pooling to convert (512, 14, 14) to (512)
+        self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))
+        # Define multiple fully connected layers
+        self.fc1 = nn.Linear(512, 256)  # First FC layer, reducing to 256 features
+        self.fc2 = nn.Linear(256, 128)  # Second FC layer, reducing to 128 features
+        self.fc3 = nn.Linear(128, num_classes)  # Final FC layer, outputting num_classes for classification
+        #dropout for regularization
+        self.dropout = nn.Dropout(0.2)
+    def forward(self, x):
+        # Flatten the output from the adaptive pooling
+        x = self.adaptive_pool(x)
+        x = torch.flatten(x, 1)
+        # Pass through the fully connected layers with ReLU activations and dropout
+        x = F.relu(self.fc1(x))
+        x = self.dropout(x)
+        x = F.relu(self.fc2(x))
+        x = self.dropout(x)
+        x = self.fc3(x)  # No activation, raw scores
+        x = F.softmax(x, dim=1)
+        return x

utils/config.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from easydict import EasyDict as edict
+import numpy as np
+__C = edict()
+cfg = __C
+# 0. basic config
+__C.TAG = 'default'
+__C.CLASSES = ['Real', 'Fake']
+# config of network input
+__C.MULTIMODAL_FUSION = edict()
+__C.MULTIMODAL_FUSION.IMG_CHANNELS = [3, 64, 128, 256, 512]
+__C.MULTIMODAL_FUSION.DCT_CHANNELS = [1, 64, 128, 256, 512]
+__C.NUM_EPOCHS = 100
+__C.BATCH_SIZE = 64
+__C.NUM_WORKERS = 4
+__C.LEARNING_RATE = 0.0001
+__C.PRETRAINED = False
+__C.PRETRAINED_PATH = "/home/user/Documents/Real_and_DeepFake/src/best_model.pth"
+__C.TEST_BATCH_SIZE = 512
+__C.TEST_CSV = "/home/user/Documents/Real_and_DeepFake/src/dataset/extended_val.csv"
+__C.MODEL_PATH = "/home/user/Documents/Real_and_DeepFake/src/best_model.pth"

utils/data_transforms.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from torchvision import transforms
+def get_transforms_train():
+# Define the dataset object
+    transform = transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Lambda(lambda x: x.float()) ,
+        transforms.Resize((224, 224)),
+        transforms.RandomHorizontalFlip(),
+        transforms.RandomRotation(10),
+        transforms.Normalize(mean=[(0.485+0.456+0.406)/3], std=[(0.229+ 0.224+ 0.225)/3]),
+    ])
+    return transform
+def get_transforms_val():
+    transform = transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Lambda(lambda x: x.float()) ,
+        transforms.Resize((224, 224)),
+        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        transforms.Normalize(mean=[(0.485+0.456+0.406)/3], std=[(0.229+ 0.224+ 0.225)/3]),
+    ])
+    return transform

utils/feature_fusion_block.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from torch import nn
+from torch.nn import functional as F
+class SpatialAttention(nn.Module):
+    def __init__(self, in_channels):
+        super(SpatialAttention, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels, 1, kernel_size=1, stride=1, padding=0)
+    def forward(self, x):
+        # Calculate attention scores
+        attention_scores = self.conv1(x)
+        attention_scores = F.softmax(attention_scores, dim=2)
+        # Apply attention to input features
+        attended_features = x * attention_scores
+        return attended_features
+class DCT_Attention_Fusion_Conv(nn.Module):
+    def __init__(self, channels):
+        super(DCT_Attention_Fusion_Conv, self).__init__()
+        self.rgb_attention = SpatialAttention(channels)
+        self.depth_attention = SpatialAttention(channels)
+        self.rgb_pooling = nn.AdaptiveAvgPool2d(1)
+        self.depth_pooling = nn.AdaptiveAvgPool2d(1)
+    def forward(self, rgb_features, DCT_features):
+        # Spatial attention for both modalities
+        rgb_attended_features = self.rgb_attention(rgb_features)
+        depth_attended_features = self.depth_attention(DCT_features)
+        # Adaptive pooling for both modalities
+        rgb_pooled = self.rgb_pooling(rgb_attended_features)
+        depth_pooled = self.depth_pooling(depth_attended_features)
+        # Upsample attended and pooled features to the original size
+        rgb_upsampled = F.interpolate(rgb_pooled, size=rgb_features.size()[2:], mode='bilinear', align_corners=False)
+        depth_upsampled = F.interpolate(depth_pooled, size=DCT_features.size()[2:], mode='bilinear', align_corners=False)
+        # Concatenate the upsampled features
+        fused_features = F.relu(rgb_upsampled+depth_upsampled)
+        # fused_features = fused_features.sum(dim=1)
+        return fused_features

weights/faceswap-fft-best_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c42f82049bed6db4edb5e933ffe4ce6e3612e7fbf351c29327d9cfe81f8c5ff
+size 38189260

weights/faceswap-hh-best_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15272d1439ef629566cf43b3d4d1bc4f2091f3db1c0d0430038b56880c7ef385
+size 38189178