Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

attacks/CW.py +0 -0
attacks/LBFGS.py +0 -0
attacks/__init__.py +71 -0
attacks/apgd_ce.py +102 -0
attacks/autoattack.py +92 -0
attacks/base.py +63 -0
attacks/deepfool.py +253 -0
attacks/fgsm.py +171 -0
attacks/local_lip.py +29 -0
attacks/pgd.py +248 -0
attacks/squred.py +86 -0
attacks/utils.py +279 -0

attacks/CW.py ADDED Viewed

File without changes

attacks/LBFGS.py ADDED Viewed

File without changes

attacks/__init__.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from .base import Attack
+from .fgsm import FGMAttack
+from .fgsm import FGSMAttack
+from .fgsm import L2FastGradientAttack
+from .fgsm import LinfFastGradientAttack
+from .pgd import PGDAttack
+from .pgd import L2PGDAttack
+from .pgd import LinfPGDAttack
+from .deepfool import DeepFoolAttack
+from .deepfool import LinfDeepFoolAttack
+from .deepfool import L2DeepFoolAttack
+from .utils import CWLoss
+from .autoattack import AutoAttacks
+from .apgd_ce import  Autoattack_apgd_ce
+from .squred import  Squre_Attack
+ATTACKS = ['fgsm', 'linf-pgd', 'fgm', 'l2-pgd', 'linf-df', 'l2-df', 'linf-apgd', 'l2-apgd','squar_attack','autoattack','apgd_ce']
+def create_attack(model, criterion, attack_type, attack_eps, attack_iter, attack_step, rand_init_type='uniform',
+                  clip_min=0., clip_max=1.):
+    """
+    Initialize adversary.
+    Arguments:
+        model (nn.Module): forward pass function.
+        criterion (nn.Module): loss function.
+        attack_type (str): name of the attack.
+        attack_eps (float): attack radius.
+        attack_iter (int): number of attack iterations.
+        attack_step (float): step size for the attack.
+        rand_init_type (str): random initialization type for PGD (default: uniform).
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+   Returns:
+       Attack
+   """
+    if attack_type == 'fgsm':
+        attack = FGSMAttack(model, criterion, eps=attack_eps, clip_min=clip_min, clip_max=clip_max)
+    elif attack_type == 'fgm':
+        attack = FGMAttack(model, criterion, eps=attack_eps, clip_min=clip_min, clip_max=clip_max)
+    elif attack_type == 'linf-pgd':
+        attack = LinfPGDAttack(model, criterion, eps=attack_eps, nb_iter=attack_iter, eps_iter=attack_step,
+                               rand_init_type=rand_init_type, clip_min=clip_min, clip_max=clip_max)
+    elif attack_type == 'l2-pgd':
+        attack = L2PGDAttack(model, criterion, eps=attack_eps, nb_iter=attack_iter, eps_iter=attack_step,
+                             rand_init_type=rand_init_type, clip_min=clip_min, clip_max=clip_max)
+    elif attack_type == 'linf-df':
+        attack = LinfDeepFoolAttack(model, overshoot=0.02, nb_iter=attack_iter, search_iter=0, clip_min=clip_min,
+                                    clip_max=clip_max)
+    elif attack_type == 'l2-df':
+        attack = L2DeepFoolAttack(model, overshoot=0.02, nb_iter=attack_iter, search_iter=0, clip_min=clip_min,
+                                  clip_max=clip_max)
+    elif  attack_type == 'squar_attack':
+        attack =  Squre_Attack(model, criterion, nb_iter=attack_iter, eps_iter=attack_step,
+                             rand_init_type=rand_init_type, clip_min=clip_min, clip_max=clip_max)
+    elif attack_type == "autoattack":
+        attack =  AutoAttacks(model, nb_iter=attack_iter, eps=attack_eps, eps_iter=attack_step,
+                             rand_init_type=rand_init_type, clip_min=clip_min, clip_max=clip_max)
+    elif attack_type == "apgd_ce":
+        attack = Autoattack_apgd_ce (model, nb_iter=attack_iter, eps_iter=attack_step,
+                             rand_init_type=rand_init_type, clip_min=clip_min, clip_max=clip_max)
+    else:
+        raise NotImplementedError('{} is not yet implemented!'.format(attack_type))
+    return attack

attacks/apgd_ce.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+from adv_lib.attacks import carlini_wagner_linf
+import torch.optim as optim
+from autoattack import AutoAttack
+import numpy as np
+import logging
+from .base import Attack,LabelMixin
+from typing import List, Union,Dict
+import torch
+import torch.nn as nn
+from typing import Dict
+from .utils import  ctx_noparamgrad_and_eval
+from utils.distributed import DistributedMetric
+from tqdm import tqdm
+from torchpack import distributed as dist
+from utils import accuracy
+class Autoattack_apgd_ce(Attack, LabelMixin):
+    def __init__(
+            self, predict, loss_fn=None, eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0., clip_max=1.,
+            ord=np.inf, targeted=False, rand_init_type='uniform'):
+        super(Autoattack_apgd_ce, self).__init__(predict, loss_fn, clip_min, clip_max)
+        self.eps = eps
+        self.nb_iter = nb_iter
+        self.eps_iter = eps_iter
+        self.rand_init = rand_init
+        self.rand_init_type = rand_init_type
+        self.ord = ord
+        self.targeted = targeted
+        if self.loss_fn is None:
+            self.loss_fn = nn.CrossEntropyLoss(reduction="sum")
+        self.adversary = AutoAttack(predict, norm='Linf', eps=self.eps, version='standard')
+    def perturb(self, x, y=None):
+        self.adversary.attacks_to_run=['apgd-ce']
+        adversarial_examples = self.adversary.run_standard_evaluation(x, y, bs=100)
+        return adversarial_examples,adversarial_examples
+    def eval_AutoAttack_apgd_ce(self,data_loader_dict: Dict)-> Dict:
+        test_criterion = nn.CrossEntropyLoss().cuda()
+        val_loss = DistributedMetric()
+        val_top1 = DistributedMetric()
+        val_top5 = DistributedMetric()
+        val_advloss = DistributedMetric()
+        val_advtop1 = DistributedMetric()
+        val_advtop5 = DistributedMetric()
+        self.predict.eval()
+        with tqdm(
+                total=len(data_loader_dict["val"]),
+                desc="Eval",
+                disable=not dist.is_master(),
+            ) as t:
+                for images, labels in data_loader_dict["val"]:
+                    images, labels = images.cuda(), labels.cuda()
+                    # compute output
+                    output = self.predict(images)
+                    loss = test_criterion(output, labels)
+                    val_loss.update(loss, images.shape[0])
+                    acc1, acc5 = accuracy(output, labels, topk=(1, 5))
+                    val_top5.update(acc5[0], images.shape[0])
+                    val_top1.update(acc1[0], images.shape[0])
+                    with ctx_noparamgrad_and_eval(self.predict):
+                        images_adv,_ = self.perturb(images, labels)
+                    output_adv = self.predict(images_adv)
+                    loss_adv = test_criterion(output_adv,labels)
+                    val_advloss.update(loss_adv, images.shape[0])
+                    acc1_adv, acc5_adv = accuracy(output_adv, labels, topk=(1, 5))
+                    val_advtop1.update(acc1_adv[0], images.shape[0])
+                    val_advtop5.update(acc5_adv[0], images.shape[0])
+                    t.set_postfix(
+                        {
+                            "loss": val_loss.avg.item(),
+                            "top1": val_top1.avg.item(),
+                            "top5": val_top5.avg.item(),
+                            "adv_loss": val_advloss.avg.item(),
+                            "adv_top1": val_advtop1.avg.item(),
+                            "adv_top5": val_advtop5.avg.item(),
+                            "#samples": val_top1.count.item(),
+                            "batch_size": images.shape[0],
+                            "img_size": images.shape[2],
+                        }
+                    )
+                    t.update()
+        val_results = {
+            "val_top1": val_top1.avg.item(),
+            "val_top5": val_top5.avg.item(),
+            "val_loss": val_loss.avg.item(),
+            "val_advtop1": val_advtop1.avg.item(),
+            "val_advtop5": val_advtop5.avg.item(),
+            "val_advloss": val_advloss.avg.item(),
+        }
+        return val_results

attacks/autoattack.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from autoattack import AutoAttack
+import numpy as np
+import logging
+from .base import Attack,LabelMixin
+import torch
+import torch.nn as nn
+from typing import Dict
+from .utils import  ctx_noparamgrad_and_eval
+from utils.distributed import DistributedMetric
+from tqdm import tqdm
+from torchpack import distributed as dist
+from utils import accuracy
+class AutoAttacks(Attack, LabelMixin):
+    def __init__(
+            self, predict, loss_fn=None, eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0., clip_max=1.,
+            ord=np.inf, targeted=False, rand_init_type='uniform'):
+        super(AutoAttacks, self).__init__(predict, loss_fn, clip_min, clip_max)
+        self.eps = eps
+        self.nb_iter = nb_iter
+        self.eps_iter = eps_iter
+        self.rand_init = rand_init
+        self.rand_init_type = rand_init_type
+        self.ord = ord
+        self.targeted = targeted
+        if self.loss_fn is None:
+            self.loss_fn = nn.CrossEntropyLoss(reduction="sum")
+        self.adversary =  AutoAttack(predict, norm='Linf', eps=self.eps, version='standard')
+    def perturb(self, x, y=None):
+        adversarial_examples = self.adversary.run_standard_evaluation(x, y, bs=100)
+        return adversarial_examples,adversarial_examples
+    def eval_AutoAttack(self,data_loader_dict: Dict)-> Dict:
+        test_criterion = nn.CrossEntropyLoss().cuda()
+        val_loss = DistributedMetric()
+        val_top1 = DistributedMetric()
+        val_top5 = DistributedMetric()
+        val_advloss = DistributedMetric()
+        val_advtop1 = DistributedMetric()
+        val_advtop5 = DistributedMetric()
+        self.predict.eval()
+        with tqdm(
+                total=len(data_loader_dict["val"]),
+                desc="Eval",
+                disable=not dist.is_master(),
+            ) as t:
+                for images, labels in data_loader_dict["val"]:
+                    images, labels = images.cuda(), labels.cuda()
+                    # compute output
+                    output = self.predict(images)
+                    loss = test_criterion(output, labels)
+                    val_loss.update(loss, images.shape[0])
+                    acc1, acc5 = accuracy(output, labels, topk=(1, 5))
+                    val_top5.update(acc5[0], images.shape[0])
+                    val_top1.update(acc1[0], images.shape[0])
+                    with ctx_noparamgrad_and_eval(self.predict):
+                        images_adv,_ = self.perturb(images, labels)
+                    output_adv = self.predict(images_adv)
+                    loss_adv = test_criterion(output_adv,labels)
+                    val_advloss.update(loss_adv, images.shape[0])
+                    acc1_adv, acc5_adv = accuracy(output_adv, labels, topk=(1, 5))
+                    val_advtop1.update(acc1_adv[0], images.shape[0])
+                    val_advtop5.update(acc5_adv[0], images.shape[0])
+                    t.set_postfix(
+                        {
+                            "loss": val_loss.avg.item(),
+                            "top1": val_top1.avg.item(),
+                            "top5": val_top5.avg.item(),
+                            "adv_loss": val_advloss.avg.item(),
+                            "adv_top1": val_advtop1.avg.item(),
+                            "adv_top5": val_advtop5.avg.item(),
+                            "#samples": val_top1.count.item(),
+                            "batch_size": images.shape[0],
+                            "img_size": images.shape[2],
+                        }
+                    )
+                    t.update()
+        val_results = {
+            "val_top1": val_top1.avg.item(),
+            "val_top5": val_top5.avg.item(),
+            "val_loss": val_loss.avg.item(),
+            "val_advtop1": val_advtop1.avg.item(),
+            "val_advtop5": val_advtop5.avg.item(),
+            "val_advloss": val_advloss.avg.item(),
+        }
+        return val_results

attacks/base.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+import torch.nn as nn
+from .utils import replicate_input
+class Attack(object):
+    """
+    Abstract base class for all attack classes.
+    Arguments:
+        predict (nn.Module): forward pass function.
+        loss_fn (nn.Module): loss function.
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+    """
+    def __init__(self, predict, loss_fn, clip_min, clip_max):
+        self.predict = predict
+        self.loss_fn = loss_fn
+        self.clip_min = clip_min
+        self.clip_max = clip_max
+    def perturb(self, x, **kwargs):
+        """
+        Virtual method for generating the adversarial examples.
+        Arguments:
+            x (torch.Tensor): the model's input tensor.
+            **kwargs: optional parameters used by child classes.
+        Returns:
+            adversarial examples.
+        """
+        error = "Sub-classes must implement perturb."
+        raise NotImplementedError(error)
+    def __call__(self, *args, **kwargs):
+        return self.perturb(*args, **kwargs)
+class LabelMixin(object):
+    def _get_predicted_label(self, x):
+        """
+        Compute predicted labels given x. Used to prevent label leaking during adversarial training.
+        Arguments:
+            x (torch.Tensor): the model's input tensor.
+        Returns:
+            torch.Tensor containing predicted labels.
+        """
+        with torch.no_grad():
+            outputs = self.predict(x)
+        _, y = torch.max(outputs, dim=1)
+        return y
+    def _verify_and_process_inputs(self, x, y):
+        if self.targeted:
+            assert y is not None
+        if not self.targeted:
+            if y is None:
+                y = self._get_predicted_label(x)
+        x = replicate_input(x)
+        y = replicate_input(y)
+        return x,y

attacks/deepfool.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import copy
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+from .utils import  ctx_noparamgrad_and_eval
+from .base import Attack, LabelMixin
+from typing import Dict
+from .utils import batch_multiply
+from .utils import clamp
+from .utils import is_float_or_torch_tensor
+from utils.distributed import DistributedMetric
+from tqdm import tqdm
+from torchpack import distributed as dist
+from utils import accuracy
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+def perturb_deepfool(xvar, yvar, predict, nb_iter=50, overshoot=0.02, ord=np.inf, clip_min=0.0, clip_max=1.0,
+                     search_iter=0, device=None):
+    """
+    Compute DeepFool perturbations (Moosavi-Dezfooli et al, 2016).
+    Arguments:
+        xvar (torch.Tensor): input images.
+        yvar (torch.Tensor): predictions.
+        predict (nn.Module): forward pass function.
+        nb_iter (int): number of iterations.
+        overshoot (float): how much to overshoot the boundary.
+        ord (int): (optional) the order of maximum distortion (inf or 2).
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+        search_iter (int): no of search iterations.
+        device (torch.device): device to work on.
+    Returns:
+        torch.Tensor containing the perturbed input,
+        torch.Tensor containing the perturbation
+    """
+    x_orig = xvar
+    x = torch.empty_like(xvar).copy_(xvar)
+    x.requires_grad_(True)
+    batch_i = torch.arange(x.shape[0])
+    r_tot = torch.zeros_like(x.data)
+    for i in range(nb_iter):
+        if x.grad is not None:
+            x.grad.zero_()
+        logits = predict(x)
+        df_inds = np.argsort(logits.detach().cpu().numpy(), axis=-1)
+        df_inds_other, df_inds_orig = df_inds[:, :-1], df_inds[:, -1]
+        df_inds_orig = torch.from_numpy(df_inds_orig)
+        df_inds_orig = df_inds_orig.to(device)
+        not_done_inds = df_inds_orig == yvar
+        if not_done_inds.sum() == 0:
+            break
+        logits[batch_i, df_inds_orig].sum().backward(retain_graph=True)
+        grad_orig = x.grad.data.clone().detach()
+        pert = x.data.new_ones(x.shape[0]) * np.inf
+        w = torch.zeros_like(x.data)
+        for inds in df_inds_other.T:
+            x.grad.zero_()
+            logits[batch_i, inds].sum().backward(retain_graph=True)
+            grad_cur = x.grad.data.clone().detach()
+            with torch.no_grad():
+                w_k = grad_cur - grad_orig
+                f_k = logits[batch_i, inds] - logits[batch_i, df_inds_orig]
+                if ord == 2:
+                    pert_k = torch.abs(f_k) / torch.norm(w_k.flatten(1), 2, -1)
+                elif ord == np.inf:
+                    pert_k = torch.abs(f_k) / torch.norm(w_k.flatten(1), 1, -1)
+                else:
+                    raise NotImplementedError("Only ord=inf and ord=2 have been implemented")
+                swi = pert_k < pert
+                if swi.sum() > 0:
+                    pert[swi] = pert_k[swi]
+                    w[swi] = w_k[swi]
+        if ord == 2:
+            r_i = (pert + 1e-6)[:, None, None, None] * w / torch.norm(w.flatten(1), 2, -1)[:, None, None, None]
+        elif ord == np.inf:
+            r_i = (pert + 1e-6)[:, None, None, None] * w.sign()
+        r_tot += r_i * not_done_inds[:, None, None, None].float()
+        x.data = x_orig + (1. + overshoot) * r_tot
+        x.data = torch.clamp(x.data, clip_min, clip_max)
+    x = x.detach()
+    if search_iter > 0:
+        dx = x - x_orig
+        dx_l_low, dx_l_high = torch.zeros_like(dx), torch.ones_like(dx)
+        for i in range(search_iter):
+            dx_l = (dx_l_low + dx_l_high) / 2.
+            dx_x = x_orig + dx_l * dx
+            dx_y = predict(dx_x).argmax(-1)
+            label_stay = dx_y == yvar
+            label_change = dx_y != yvar
+            dx_l_low[label_stay] = dx_l[label_stay]
+            dx_l_high[label_change] = dx_l[label_change]
+        x = dx_x
+    # x.data = torch.clamp(x.data, clip_min, clip_max)
+    r_tot = x.data - x_orig
+    return x, r_tot
+class DeepFoolAttack(Attack, LabelMixin):
+    """
+    DeepFool attack.
+    [Seyed-Mohsen Moosavi-Dezfooli, Alhussein Fawzi, Pascal Frossard,
+    "DeepFool: a simple and accurate method to fool deep neural networks"]
+    Arguments:
+        predict (nn.Module): forward pass function.
+        overshoot (float): how much to overshoot the boundary.
+        nb_iter (int): number of iterations.
+        search_iter (int): no of search iterations.
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+        ord (int): (optional) the order of maximum distortion (inf or 2).
+    """
+    def __init__(
+            self, predict, overshoot=0.02, nb_iter=50, search_iter=50, clip_min=0., clip_max=1., ord=np.inf):
+        super(DeepFoolAttack, self).__init__(predict, None, clip_min, clip_max)
+        self.overshoot = overshoot
+        self.nb_iter = nb_iter
+        self.search_iter = search_iter
+        self.targeted = False
+        self.ord = ord
+        assert is_float_or_torch_tensor(self.overshoot)
+    def perturb(self, x, y=None):
+        """
+        Given examples x, returns their adversarial counterparts.
+        Arguments:
+            x (torch.Tensor): input tensor.
+            y (torch.Tensor): label tensor.
+                - if None and self.targeted=False, compute y as predicted labels.
+        Returns:
+            torch.Tensor containing perturbed inputs,
+            torch.Tensor containing the perturbation
+        """
+        x, y = self._verify_and_process_inputs(x, None)
+        x_adv, r_adv = perturb_deepfool(x, y, self.predict, self.nb_iter, self.overshoot, ord=self.ord,
+                                        clip_min=self.clip_min, clip_max=self.clip_max, search_iter=self.search_iter,
+                                        device=device)
+        return x_adv, r_adv
+    def eval_deepfool(self,data_loader_dict: Dict)-> Dict:
+        test_criterion = nn.CrossEntropyLoss().cuda()
+        val_loss = DistributedMetric()
+        val_top1 = DistributedMetric()
+        val_top5 = DistributedMetric()
+        val_advloss = DistributedMetric()
+        val_advtop1 = DistributedMetric()
+        val_advtop5 = DistributedMetric()
+        self.predict.eval()
+        with tqdm(
+                total=len(data_loader_dict["val"]),
+                desc="Eval",
+                disable=not dist.is_master(),
+            ) as t:
+                for images, labels in data_loader_dict["val"]:
+                    images, labels = images.cuda(), labels.cuda()
+                    # compute output
+                    output = self.predict(images)
+                    loss = test_criterion(output, labels)
+                    val_loss.update(loss, images.shape[0])
+                    acc1, acc5 = accuracy(output, labels, topk=(1, 5))
+                    val_top5.update(acc5[0], images.shape[0])
+                    val_top1.update(acc1[0], images.shape[0])
+                    with ctx_noparamgrad_and_eval(self.predict):
+                        images_adv,_ = self.perturb(images, labels)
+                    output_adv = self.predict(images_adv)
+                    loss_adv = test_criterion(output_adv,labels)
+                    val_advloss.update(loss_adv, images.shape[0])
+                    acc1_adv, acc5_adv = accuracy(output_adv, labels, topk=(1, 5))
+                    val_advtop1.update(acc1_adv[0], images.shape[0])
+                    val_advtop5.update(acc5_adv[0], images.shape[0])
+                    t.set_postfix(
+                        {
+                            "loss": val_loss.avg.item(),
+                            "top1": val_top1.avg.item(),
+                            "top5": val_top5.avg.item(),
+                            "adv_loss": val_advloss.avg.item(),
+                            "adv_top1": val_advtop1.avg.item(),
+                            "adv_top5": val_advtop5.avg.item(),
+                            "#samples": val_top1.count.item(),
+                            "batch_size": images.shape[0],
+                            "img_size": images.shape[2],
+                        }
+                    )
+                    t.update()
+        val_results = {
+            "val_top1": val_top1.avg.item(),
+            "val_top5": val_top5.avg.item(),
+            "val_loss": val_loss.avg.item(),
+            "val_advtop1": val_advtop1.avg.item(),
+            "val_advtop5": val_advtop5.avg.item(),
+            "val_advloss": val_advloss.avg.item(),
+        }
+        return val_results
+class LinfDeepFoolAttack(DeepFoolAttack):
+    """
+    DeepFool Attack with order=Linf.
+    Arguments:
+    Arguments:
+        predict (nn.Module): forward pass function.
+        overshoot (float): how much to overshoot the boundary.
+        nb_iter (int): number of iterations.
+        search_iter (int): no of search iterations.
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+    """
+    def __init__(
+            self, predict, overshoot=0.02, nb_iter=50, search_iter=50, clip_min=0., clip_max=1.):
+        ord = np.inf
+        super(LinfDeepFoolAttack, self).__init__(
+            predict=predict, overshoot=overshoot, nb_iter=nb_iter, search_iter=search_iter, clip_min=clip_min,
+            clip_max=clip_max, ord=ord)
+class L2DeepFoolAttack(DeepFoolAttack):
+    """
+    DeepFool Attack with order=L2.
+    Arguments:
+        predict (nn.Module): forward pass function.
+        overshoot (float): how much to overshoot the boundary.
+        nb_iter (int): number of iterations.
+        search_iter (int): no of search iterations.
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+    """
+    def __init__(
+            self, predict, overshoot=0.02, nb_iter=50, search_iter=50, clip_min=0., clip_max=1.):
+        ord = 2
+        super(L2DeepFoolAttack, self).__init__(
+            predict=predict, overshoot=overshoot, nb_iter=nb_iter, search_iter=search_iter, clip_min=clip_min,
+            clip_max=clip_max, ord=ord)

attacks/fgsm.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import torch
+import torch.nn as nn
+from .base import Attack, LabelMixin
+from .utils import  ctx_noparamgrad_and_eval
+from .utils import batch_multiply
+from .utils import clamp ,normalize_by_pnorm
+from utils.distributed import DistributedMetric
+from tqdm import tqdm
+from torchpack import distributed as dist
+from utils import accuracy
+from typing import Dict
+class FGSMAttack(Attack, LabelMixin):
+    """
+    One step fast gradient sign method (Goodfellow et al, 2014).
+    Arguments:
+        predict (nn.Module): forward pass function.
+        loss_fn (nn.Module): loss function.
+        eps (float): attack step size.
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+        targeted (bool): indicate if this is a targeted attack.
+    """
+    def __init__(self, predict, loss_fn=None, eps=0.3, clip_min=0., clip_max=1., targeted=False):
+        super(FGSMAttack, self).__init__(predict, loss_fn, clip_min, clip_max)
+        self.eps = eps
+        self.targeted = targeted
+        if self.loss_fn is None:
+            self.loss_fn = nn.CrossEntropyLoss(reduction="sum")
+    def perturb(self, x, y=None):
+        """
+        Given examples (x, y), returns their adversarial counterparts with an attack length of eps.
+        Arguments:
+            x (torch.Tensor): input tensor.
+            y  (torch.Tensor): label tensor.
+                - if None and self.targeted=False, compute y as predicted labels.
+                - if self.targeted=True, then y must be the targeted labels.
+        Returns:
+            torch.Tensor containing perturbed inputs.
+            torch.Tensor containing the perturbation.
+        """
+        x, y = self._verify_and_process_inputs(x, y)
+        xadv = x.requires_grad_()
+        outputs = self.predict(xadv)
+        loss = self.loss_fn(outputs, y)
+        if self.targeted:
+            loss = -loss
+        loss.backward()
+        grad_sign = xadv.grad.detach().sign()
+        xadv = xadv + batch_multiply(self.eps, grad_sign)
+        xadv = clamp(xadv, self.clip_min, self.clip_max)
+        radv = xadv - x
+        return xadv.detach(), radv.detach()
+LinfFastGradientAttack = FGSMAttack
+class FGMAttack(Attack, LabelMixin):
+    """
+    One step fast gradient method. Perturbs the input with gradient (not gradient sign) of the loss wrt the input.
+    Arguments:
+        predict (nn.Module): forward pass function.
+        loss_fn (nn.Module): loss function.
+        eps (float): attack step size.
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+        targeted (bool): indicate if this is a targeted attack.
+    """
+    def __init__(self, predict, loss_fn=None, eps=0.3, clip_min=0., clip_max=1., targeted=False):
+        super(FGMAttack, self).__init__(
+            predict, loss_fn, clip_min, clip_max)
+        self.eps = eps
+        self.targeted = targeted
+        if self.loss_fn is None:
+            self.loss_fn = nn.CrossEntropyLoss(reduction="sum")
+    def perturb(self, x, y=None):
+        """
+        Given examples (x, y), returns their adversarial counterparts with an attack length of eps.
+        Arguments:
+            x (torch.Tensor): input tensor.
+            y  (torch.Tensor): label tensor.
+                - if None and self.targeted=False, compute y as predicted labels.
+                - if self.targeted=True, then y must be the targeted labels.
+        Returns:
+            torch.Tensor containing perturbed inputs.
+            torch.Tensor containing the perturbation.
+        """
+        x, y = self._verify_and_process_inputs(x, y)
+        xadv = x.requires_grad_()
+        outputs = self.predict(xadv)
+        loss = self.loss_fn(outputs, y)
+        if self.targeted:
+            loss = -loss
+        loss.backward()
+        grad = normalize_by_pnorm(xadv.grad)
+        xadv = xadv + batch_multiply(self.eps, grad)
+        xadv = clamp(xadv, self.clip_min, self.clip_max)
+        radv = xadv - x
+        return xadv.detach(), radv.detach()
+    def eval_fgsm(self,data_loader_dict: Dict)-> Dict:
+        test_criterion = nn.CrossEntropyLoss().cuda()
+        val_loss = DistributedMetric()
+        val_top1 = DistributedMetric()
+        val_top5 = DistributedMetric()
+        val_advloss = DistributedMetric()
+        val_advtop1 = DistributedMetric()
+        val_advtop5 = DistributedMetric()
+        self.predict.eval()
+        with tqdm(
+                total=len(data_loader_dict["val"]),
+                desc="Eval",
+                disable=not dist.is_master(),
+            ) as t:
+                for images, labels in data_loader_dict["val"]:
+                    images, labels = images.cuda(), labels.cuda()
+                    # compute output
+                    output = self.predict(images)
+                    loss = test_criterion(output, labels)
+                    val_loss.update(loss, images.shape[0])
+                    acc1, acc5 = accuracy(output, labels, topk=(1, 5))
+                    val_top5.update(acc5[0], images.shape[0])
+                    val_top1.update(acc1[0], images.shape[0])
+                    with ctx_noparamgrad_and_eval(self.predict):
+                        images_adv,_ = self.perturb(images, labels)
+                    output_adv = self.predict(images_adv)
+                    loss_adv = test_criterion(output_adv,labels)
+                    val_advloss.update(loss_adv, images.shape[0])
+                    acc1_adv, acc5_adv = accuracy(output_adv, labels, topk=(1, 5))
+                    val_advtop1.update(acc1_adv[0], images.shape[0])
+                    val_advtop5.update(acc5_adv[0], images.shape[0])
+                    t.set_postfix(
+                        {
+                            "loss": val_loss.avg.item(),
+                            "top1": val_top1.avg.item(),
+                            "top5": val_top5.avg.item(),
+                            "adv_loss": val_advloss.avg.item(),
+                            "adv_top1": val_advtop1.avg.item(),
+                            "adv_top5": val_advtop5.avg.item(),
+                            "#samples": val_top1.count.item(),
+                            "batch_size": images.shape[0],
+                            "img_size": images.shape[2],
+                        }
+                    )
+                    t.update()
+        val_results = {
+            "val_top1": val_top1.avg.item(),
+            "val_top5": val_top5.avg.item(),
+            "val_loss": val_loss.avg.item(),
+            "val_advtop1": val_advtop1.avg.item(),
+            "val_advtop5": val_advtop5.avg.item(),
+            "val_advloss": val_advloss.avg.item(),
+        }
+        return val_results
+L2FastGradientAttack = FGMAttack

attacks/local_lip.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import torch
+import torch.nn as nn
+from typing import Dict
+from utils.distributed import DistributedMetric
+from tqdm import tqdm
+from torchpack import distributed as dist
+from utils import accuracy
+import copy
+import torch.nn.functional as F
+import numpy as np
+def eval_local_lip(model, x, xp, top_norm=1, btm_norm=float('inf'), reduction='mean'):
+    model.eval()
+    down = torch.flatten(x - xp, start_dim=1)
+    with torch.no_grad():
+        if top_norm == "kl":
+            criterion_kl = nn.KLDivLoss(reduction='none')
+            top = criterion_kl(F.log_softmax(model(xp), dim=1),
+                               F.softmax(model(x), dim=1))
+            ret = torch.sum(top, dim=1) / torch.norm(down + 1e-6, dim=1, p=btm_norm)
+        else:
+            top = torch.flatten(model(x), start_dim=1) - torch.flatten(model(xp), start_dim=1)
+            ret = torch.norm(top, dim=1, p=top_norm) / torch.norm(down + 1e-6, dim=1, p=btm_norm)
+    if reduction == 'mean':
+        return torch.mean(ret)
+    elif reduction == 'sum':
+        return torch.sum(ret)
+    else:
+        raise ValueError("Not supported reduction")

attacks/pgd.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import numpy as np
+import torch
+import torch.nn as nn
+from .utils import  ctx_noparamgrad_and_eval
+from .base import Attack, LabelMixin
+from typing import Dict
+from .utils import batch_clamp
+from .utils import batch_multiply
+from .utils import clamp
+from .utils import clamp_by_pnorm
+from .utils import is_float_or_torch_tensor
+from .utils import normalize_by_pnorm
+from .utils import rand_init_delta
+from .utils import replicate_input
+from utils.distributed import DistributedMetric
+from tqdm import tqdm
+from torchpack import distributed as dist
+from utils import accuracy
+def perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn, delta_init=None, minimize=False, ord=np.inf,
+                      clip_min=0.0, clip_max=1.0):
+    """
+    Iteratively maximize the loss over the input. It is a shared method for iterative attacks.
+    Arguments:
+        xvar (torch.Tensor): input data.
+        yvar (torch.Tensor): input labels.
+        predict (nn.Module): forward pass function.
+        nb_iter (int): number of iterations.
+        eps (float): maximum distortion.
+        eps_iter (float): attack step size.
+        loss_fn (nn.Module): loss function.
+        delta_init (torch.Tensor): (optional) tensor contains the random initialization.
+        minimize (bool): (optional) whether to minimize or maximize the loss.
+        ord (int): (optional) the order of maximum distortion (inf or 2).
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+    Returns:
+        torch.Tensor containing the perturbed input,
+        torch.Tensor containing the perturbation
+    """
+    if delta_init is not None:
+        delta = delta_init
+    else:
+        delta = torch.zeros_like(xvar)
+    delta.requires_grad_()
+    for ii in range(nb_iter):
+        outputs = predict(xvar + delta)
+        loss = loss_fn(outputs, yvar)
+        if minimize:
+            loss = -loss
+        loss.backward()
+        if ord == np.inf:
+            grad_sign = delta.grad.data.sign()
+            delta.data = delta.data + batch_multiply(eps_iter, grad_sign)
+            delta.data = batch_clamp(eps, delta.data)
+            delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data
+        elif ord == 2:
+            grad = delta.grad.data
+            grad = normalize_by_pnorm(grad)
+            delta.data = delta.data + batch_multiply(eps_iter, grad)
+            delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data
+            if eps is not None:
+                delta.data = clamp_by_pnorm(delta.data, ord, eps)
+        else:
+            error = "Only ord=inf and ord=2 have been implemented"
+            raise NotImplementedError(error)
+        delta.grad.data.zero_()
+    x_adv = clamp(xvar + delta, clip_min, clip_max)
+    r_adv = x_adv - xvar
+    return x_adv, r_adv
+class PGDAttack(Attack, LabelMixin):
+    """
+    The projected gradient descent attack (Madry et al, 2017).
+    The attack performs nb_iter steps of size eps_iter, while always staying within eps from the initial point.
+    Arguments:
+        predict (nn.Module): forward pass function.
+        loss_fn (nn.Module): loss function.
+        eps (float): maximum distortion.
+        nb_iter (int): number of iterations.
+        eps_iter (float): attack step size.
+        rand_init (bool): (optional) random initialization.
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+        ord (int): (optional) the order of maximum distortion (inf or 2).
+        targeted (bool): if the attack is targeted.
+        rand_init_type (str): (optional) random initialization type.
+    """
+    def __init__(
+            self, predict, loss_fn=None, eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0., clip_max=1.,
+            ord=np.inf, targeted=False, rand_init_type='uniform'):
+        super(PGDAttack, self).__init__(predict, loss_fn, clip_min, clip_max)
+        self.eps = eps
+        self.nb_iter = nb_iter
+        self.eps_iter = eps_iter
+        self.rand_init = rand_init
+        self.rand_init_type = rand_init_type
+        self.ord = ord
+        self.targeted = targeted
+        if self.loss_fn is None:
+            self.loss_fn = nn.CrossEntropyLoss(reduction="sum")
+        assert is_float_or_torch_tensor(self.eps_iter)
+        assert is_float_or_torch_tensor(self.eps)
+    def perturb(self, x, y=None):
+        """
+        Given examples (x, y), returns their adversarial counterparts with an attack length of eps.
+        Arguments:
+            x (torch.Tensor): input tensor.
+            y (torch.Tensor): label tensor.
+                - if None and self.targeted=False, compute y as predicted
+                labels.
+                - if self.targeted=True, then y must be the targeted labels.
+        Returns:
+            torch.Tensor containing perturbed inputs,
+            torch.Tensor containing the perturbation
+        """
+        x, y = self._verify_and_process_inputs(x, y)
+        delta = torch.zeros_like(x)
+        delta = nn.Parameter(delta)
+        if self.rand_init:
+            if self.rand_init_type == 'uniform':
+                rand_init_delta(
+                    delta, x, self.ord, self.eps, self.clip_min, self.clip_max)
+                delta.data = clamp(
+                    x + delta.data, min=self.clip_min, max=self.clip_max) - x
+            elif self.rand_init_type == 'normal':
+                delta.data = 0.001 * torch.randn_like(x) # initialize as in TRADES
+            else:
+                raise NotImplementedError('Only rand_init_type=normal and rand_init_type=uniform have been implemented.')
+        x_adv, r_adv = perturb_iterative(
+            x, y, self.predict, nb_iter=self.nb_iter, eps=self.eps, eps_iter=self.eps_iter, loss_fn=self.loss_fn,
+            minimize=self.targeted, ord=self.ord, clip_min=self.clip_min, clip_max=self.clip_max, delta_init=delta
+        )
+        return x_adv.data, r_adv.data
+    def eval_pgd(self,data_loader_dict: Dict)-> Dict:
+        test_criterion = nn.CrossEntropyLoss().cuda()
+        val_loss = DistributedMetric()
+        val_top1 = DistributedMetric()
+        val_top5 = DistributedMetric()
+        val_advloss = DistributedMetric()
+        val_advtop1 = DistributedMetric()
+        val_advtop5 = DistributedMetric()
+        self.predict.eval()
+        with tqdm(
+                total=len(data_loader_dict["val"]),
+                desc="Eval",
+                disable=not dist.is_master(),
+            ) as t:
+                for images, labels in data_loader_dict["val"]:
+                    images, labels = images.cuda(), labels.cuda()
+                    # compute output
+                    output = self.predict(images)
+                    loss = test_criterion(output, labels)
+                    val_loss.update(loss, images.shape[0])
+                    acc1, acc5 = accuracy(output, labels, topk=(1, 5))
+                    val_top5.update(acc5[0], images.shape[0])
+                    val_top1.update(acc1[0], images.shape[0])
+                    with ctx_noparamgrad_and_eval(self.predict):
+                        images_adv,_ = self.perturb(images, labels)
+                    output_adv = self.predict(images_adv)
+                    loss_adv = test_criterion(output_adv,labels)
+                    val_advloss.update(loss_adv, images.shape[0])
+                    acc1_adv, acc5_adv = accuracy(output_adv, labels, topk=(1, 5))
+                    val_advtop1.update(acc1_adv[0], images.shape[0])
+                    val_advtop5.update(acc5_adv[0], images.shape[0])
+                    t.set_postfix(
+                        {
+                            "loss": val_loss.avg.item(),
+                            "top1": val_top1.avg.item(),
+                            "top5": val_top5.avg.item(),
+                            "adv_loss": val_advloss.avg.item(),
+                            "adv_top1": val_advtop1.avg.item(),
+                            "adv_top5": val_advtop5.avg.item(),
+                            "#samples": val_top1.count.item(),
+                            "batch_size": images.shape[0],
+                            "img_size": images.shape[2],
+                        }
+                    )
+                    t.update()
+        val_results = {
+            "val_top1": val_top1.avg.item(),
+            "val_top5": val_top5.avg.item(),
+            "val_loss": val_loss.avg.item(),
+            "val_advtop1": val_advtop1.avg.item(),
+            "val_advtop5": val_advtop5.avg.item(),
+            "val_advloss": val_advloss.avg.item(),
+        }
+        return val_results
+class LinfPGDAttack(PGDAttack):
+    """
+    PGD Attack with order=Linf
+    Arguments:
+        predict (nn.Module): forward pass function.
+        loss_fn (nn.Module): loss function.
+        eps (float): maximum distortion.
+        nb_iter (int): number of iterations.
+        eps_iter (float): attack step size.
+        rand_init (bool): (optional) random initialization.
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+        targeted (bool): if the attack is targeted.
+        rand_init_type (str): (optional) random initialization type.
+    """
+    def __init__(
+            self, predict, loss_fn=None, eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0., clip_max=1.,
+            targeted=False, rand_init_type='uniform'):
+        ord = np.inf
+        super(LinfPGDAttack, self).__init__(
+            predict=predict, loss_fn=loss_fn, eps=eps, nb_iter=nb_iter, eps_iter=eps_iter, rand_init=rand_init,
+            clip_min=clip_min, clip_max=clip_max, targeted=targeted, ord=ord, rand_init_type=rand_init_type)
+class L2PGDAttack(PGDAttack):
+    """
+    PGD Attack with order=L2
+    Arguments:
+        predict (nn.Module): forward pass function.
+        loss_fn (nn.Module): loss function.
+        eps (float): maximum distortion.
+        nb_iter (int): number of iterations.
+        eps_iter (float): attack step size.
+        rand_init (bool): (optional) random initialization.
+        clip_min (float): mininum value per input dimension.
+        clip_max (float): maximum value per input dimension.
+        targeted (bool): if the attack is targeted.
+        rand_init_type (str): (optional) random initialization type.
+    """
+    def __init__(
+            self, predict, loss_fn=None, eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0., clip_max=1.,
+            targeted=False, rand_init_type='uniform'):
+        ord = 2
+        super(L2PGDAttack, self).__init__(
+            predict=predict, loss_fn=loss_fn, eps=eps, nb_iter=nb_iter, eps_iter=eps_iter, rand_init=rand_init,
+            clip_min=clip_min, clip_max=clip_max, targeted=targeted, ord=ord, rand_init_type=rand_init_type)

attacks/squred.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from autoattack import AutoAttack
+import numpy as np
+from .base import Attack,LabelMixin
+import torch.nn as nn
+from utils.distributed import DistributedMetric
+from tqdm import tqdm
+from torchpack import distributed as dist
+from utils import accuracy
+from typing import Dict
+from .utils import  ctx_noparamgrad_and_eval
+class Squre_Attack(Attack, LabelMixin):
+    def __init__(
+            self, predict, loss_fn=None, eps=0.3, nb_iter=40, eps_iter=0.01, rand_init=True, clip_min=0., clip_max=1.,
+            ord=np.inf, targeted=False, rand_init_type='uniform'):
+        super(Squre_Attack, self).__init__(predict, loss_fn, clip_min, clip_max)
+        self.eps = eps
+        self.nb_iter = nb_iter
+        self.eps_iter = eps_iter
+        self.rand_init = rand_init
+        self.rand_init_type = rand_init_type
+        self.ord = ord
+        self.targeted = targeted
+        if self.loss_fn is None:
+            self.loss_fn = nn.CrossEntropyLoss(reduction="sum")
+        self.adversary = AutoAttack(predict, norm='Linf', eps=self.eps, version='standard')
+    def perturb(self, x, y=None):
+        self.adversary.attacks_to_run=['square']
+        adversarial_examples = self.adversary.run_standard_evaluation(x, y, bs=100)
+        return adversarial_examples,adversarial_examples
+    def eval_squred(self,data_loader_dict: Dict)-> Dict:
+        test_criterion = nn.CrossEntropyLoss().cuda()
+        val_loss = DistributedMetric()
+        val_top1 = DistributedMetric()
+        val_top5 = DistributedMetric()
+        val_advloss = DistributedMetric()
+        val_advtop1 = DistributedMetric()
+        val_advtop5 = DistributedMetric()
+        self.predict.eval()
+        with tqdm(
+                total=len(data_loader_dict["val"]),
+                desc="Eval",
+                disable=not dist.is_master(),
+            ) as t:
+                for images, labels in data_loader_dict["val"]:
+                    images, labels = images.cuda(), labels.cuda()
+                    # compute output
+                    output = self.predict(images)
+                    loss = test_criterion(output, labels)
+                    val_loss.update(loss, images.shape[0])
+                    acc1, acc5 = accuracy(output, labels, topk=(1, 5))
+                    val_top5.update(acc5[0], images.shape[0])
+                    val_top1.update(acc1[0], images.shape[0])
+                    with ctx_noparamgrad_and_eval(self.predict):
+                        images_adv,_ = self.perturb(images, labels)
+                    output_adv = self.predict(images_adv)
+                    loss_adv = test_criterion(output_adv,labels)
+                    val_advloss.update(loss_adv, images.shape[0])
+                    acc1_adv, acc5_adv = accuracy(output_adv, labels, topk=(1, 5))
+                    val_advtop1.update(acc1_adv[0], images.shape[0])
+                    val_advtop5.update(acc5_adv[0], images.shape[0])
+                    t.set_postfix(
+                        {
+                            "loss": val_loss.avg.item(),
+                            "top1": val_top1.avg.item(),
+                            "top5": val_top5.avg.item(),
+                            "adv_loss": val_advloss.avg.item(),
+                            "adv_top1": val_advtop1.avg.item(),
+                            "adv_top5": val_advtop5.avg.item(),
+                            "#samples": val_top1.count.item(),
+                            "batch_size": images.shape[0],
+                            "img_size": images.shape[2],
+                        }
+                    )
+                    t.update()
+        val_results = {
+            "val_top1": val_top1.avg.item(),
+            "val_top5": val_top5.avg.item(),
+            "val_loss": val_loss.avg.item(),
+            "val_advtop1": val_advtop1.avg.item(),
+            "val_advtop5": val_advtop5.avg.item(),
+            "val_advloss": val_advloss.avg.item(),
+        }
+        return val_results

attacks/utils.py ADDED Viewed

	@@ -0,0 +1,279 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+from torch.distributions import laplace
+from torch.distributions import uniform
+from torch.nn.modules.loss import _Loss
+from contextlib import contextmanager
+def replicate_input(x):
+    """
+    Clone the input tensor x.
+    """
+    return x.detach().clone()
+def replicate_input_withgrad(x):
+    """
+    Clone the input tensor x and set requires_grad=True.
+    """
+    return x.detach().clone().requires_grad_()
+def calc_l2distsq(x, y):
+    """
+    Calculate L2 distance between tensors x and y.
+    """
+    d = (x - y)**2
+    return d.view(d.shape[0], -1).sum(dim=1)
+def clamp(input, min=None, max=None):
+    """
+    Clamp a tensor by its minimun and maximun values.
+    """
+    ndim = input.ndimension()
+    if min is None:
+        pass
+    elif isinstance(min, (float, int)):
+        input = torch.clamp(input, min=min)
+    elif isinstance(min, torch.Tensor):
+        if min.ndimension() == ndim - 1 and min.shape == input.shape[1:]:
+            input = torch.max(input, min.view(1, *min.shape))
+        else:
+            assert min.shape == input.shape
+            input = torch.max(input, min)
+    else:
+        raise ValueError("min can only be None | float | torch.Tensor")
+    if max is None:
+        pass
+    elif isinstance(max, (float, int)):
+        input = torch.clamp(input, max=max)
+    elif isinstance(max, torch.Tensor):
+        if max.ndimension() == ndim - 1 and max.shape == input.shape[1:]:
+            input = torch.min(input, max.view(1, *max.shape))
+        else:
+            assert max.shape == input.shape
+            input = torch.min(input, max)
+    else:
+        raise ValueError("max can only be None | float | torch.Tensor")
+    return input
+def _batch_multiply_tensor_by_vector(vector, batch_tensor):
+    """Equivalent to the following.
+    for ii in range(len(vector)):
+        batch_tensor.data[ii] *= vector[ii]
+    return batch_tensor
+    """
+    return (
+        batch_tensor.transpose(0, -1) * vector).transpose(0, -1).contiguous()
+def _batch_clamp_tensor_by_vector(vector, batch_tensor):
+    """Equivalent to the following.
+    for ii in range(len(vector)):
+        batch_tensor[ii] = clamp(
+            batch_tensor[ii], -vector[ii], vector[ii])
+    """
+    return torch.min(
+        torch.max(batch_tensor.transpose(0, -1), -vector), vector
+    ).transpose(0, -1).contiguous()
+def batch_multiply(float_or_vector, tensor):
+    """
+    Multpliy a batch of tensors with a float or vector.
+    """
+    if isinstance(float_or_vector, torch.Tensor):
+        assert len(float_or_vector) == len(tensor)
+        tensor = _batch_multiply_tensor_by_vector(float_or_vector, tensor)
+    elif isinstance(float_or_vector, float):
+        tensor *= float_or_vector
+    else:
+        raise TypeError("Value has to be float or torch.Tensor")
+    return tensor
+def batch_clamp(float_or_vector, tensor):
+    """
+    Clamp a batch of tensors.
+    """
+    if isinstance(float_or_vector, torch.Tensor):
+        assert len(float_or_vector) == len(tensor)
+        tensor = _batch_clamp_tensor_by_vector(float_or_vector, tensor)
+        return tensor
+    elif isinstance(float_or_vector, float):
+        tensor = clamp(tensor, -float_or_vector, float_or_vector)
+    else:
+        raise TypeError("Value has to be float or torch.Tensor")
+    return tensor
+def _get_norm_batch(x, p):
+    """
+    Returns the Lp norm of batch x.
+    """
+    batch_size = x.size(0)
+    return x.abs().pow(p).view(batch_size, -1).sum(dim=1).pow(1. / p)
+def _thresh_by_magnitude(theta, x):
+    """
+    Threshold by magnitude.
+    """
+    return torch.relu(torch.abs(x) - theta) * x.sign()
+def clamp_by_pnorm(x, p, r):
+    """
+    Clamp tensor by its norm.
+    """
+    assert isinstance(p, float) or isinstance(p, int)
+    norm = _get_norm_batch(x, p)
+    if isinstance(r, torch.Tensor):
+        assert norm.size() == r.size()
+    else:
+        assert isinstance(r, float)
+    factor = torch.min(r / norm, torch.ones_like(norm))
+    return batch_multiply(factor, x)
+def is_float_or_torch_tensor(x):
+    """
+    Return whether input x is a float or a torch.Tensor.
+    """
+    return isinstance(x, torch.Tensor) or isinstance(x, float)
+def normalize_by_pnorm(x, p=2, small_constant=1e-6):
+    """
+    Normalize gradients for gradient (not gradient sign) attacks.
+    Arguments:
+        x (torch.Tensor): tensor containing the gradients on the input.
+        p (int): (optional) order of the norm for the normalization (1 or 2).
+        small_constant (float): (optional) to avoid dividing by zero.
+    Returns:
+        normalized gradients.
+    """
+    assert isinstance(p, float) or isinstance(p, int)
+    norm = _get_norm_batch(x, p)
+    norm = torch.max(norm, torch.ones_like(norm) * small_constant)
+    return batch_multiply(1. / norm, x)
+def rand_init_delta(delta, x, ord, eps, clip_min, clip_max):
+    """
+    Randomly initialize the perturbation.
+    """
+    if isinstance(eps, torch.Tensor):
+        assert len(eps) == len(delta)
+    if ord == np.inf:
+        delta.data.uniform_(-1, 1)
+        delta.data = batch_multiply(eps, delta.data)
+    elif ord == 2:
+        delta.data.uniform_(clip_min, clip_max)
+        delta.data = delta.data - x
+        delta.data = clamp_by_pnorm(delta.data, ord, eps)
+    elif ord == 1:
+        ini = laplace.Laplace(
+            loc=delta.new_tensor(0), scale=delta.new_tensor(1))
+        delta.data = ini.sample(delta.data.shape)
+        delta.data = normalize_by_pnorm(delta.data, p=1)
+        ray = uniform.Uniform(0, eps).sample()
+        delta.data *= ray
+        delta.data = clamp(x.data + delta.data, clip_min, clip_max) - x.data
+    else:
+        error = "Only ord = inf, ord = 1 and ord = 2 have been implemented"
+        raise NotImplementedError(error)
+    delta.data = clamp(
+        x + delta.data, min=clip_min, max=clip_max) - x
+    return delta.data
+def CWLoss(output, target, confidence=0):
+    """
+    CW loss (Marging loss).
+    """
+    num_classes = output.shape[-1]
+    target = target.data
+    target_onehot = torch.zeros(target.size() + (num_classes,))
+    target_onehot = target_onehot.cuda()
+    target_onehot.scatter_(1, target.unsqueeze(1), 1.)
+    target_var = Variable(target_onehot, requires_grad=False)
+    real = (target_var * output).sum(1)
+    other = ((1. - target_var) * output - target_var * 10000.).max(1)[0]
+    loss = - torch.clamp(real - other + confidence, min=0.)
+    loss = torch.sum(loss)
+    return loss
+class ctx_noparamgrad(object):
+    def __init__(self, module):
+        self.prev_grad_state = get_param_grad_state(module)
+        self.module = module
+        set_param_grad_off(module)
+    def __enter__(self):
+        pass
+    def __exit__(self, *args):
+        set_param_grad_state(self.module, self.prev_grad_state)
+        return False
+class ctx_eval(object):
+    def __init__(self, module):
+        self.prev_training_state = get_module_training_state(module)
+        self.module = module
+        set_module_training_off(module)
+    def __enter__(self):
+        pass
+    def __exit__(self, *args):
+        set_module_training_state(self.module, self.prev_training_state)
+        return False
+@contextmanager
+def ctx_noparamgrad_and_eval(module):
+    with ctx_noparamgrad(module) as a, ctx_eval(module) as b:
+        yield (a, b)
+def get_module_training_state(module):
+    return {mod: mod.training for mod in module.modules()}
+def set_module_training_state(module, training_state):
+    for mod in module.modules():
+        mod.training = training_state[mod]
+def set_module_training_off(module):
+    for mod in module.modules():
+        mod.training = False
+def get_param_grad_state(module):
+    return {param: param.requires_grad for param in module.parameters()}
+def set_param_grad_state(module, grad_state):
+    for param in module.parameters():
+        param.requires_grad = grad_state[param]
+def set_param_grad_off(module):
+    for param in module.parameters():
+        param.requires_grad = False