Spaces:

nyx-ai
/

stylegan2-flax-tpu

Build error

App Files Files Community

akhaliq HF Staff commited on Jul 20, 2022

Commit

81170fd

1 Parent(s): 0edc624

add files

Browse files

Files changed (19) hide show

checkpoint.py +96 -0
data_pipeline.py +85 -0
dataset_utils/crop_image_borders.py +57 -0
dataset_utils/images_to_tfrecords.py +145 -0
fid/__init__.py +1 -0
fid/core.py +150 -0
fid/inception.py +655 -0
fid/utils.py +59 -0
generate_images.py +61 -0
main.py +102 -0
requirements.txt +14 -0
stylegan2/__init__.py +5 -0
stylegan2/discriminator.py +451 -0
stylegan2/generator.py +713 -0
stylegan2/ops.py +674 -0
stylegan2/utils.py +37 -0
training.py +382 -0
training_steps.py +219 -0
training_utils.py +174 -0

checkpoint.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import flax
+import dill as pickle
+import os
+import builtins
+from jax._src.lib import xla_client
+import tensorflow as tf
+# Hack: this is the module reported by this object.
+# https://github.com/google/jax/issues/8505
+builtins.bfloat16 = xla_client.bfloat16
+def pickle_dump(obj, filename):
+    """ Wrapper to dump an object to a file."""
+    with tf.io.gfile.GFile(filename, "wb") as f:
+        f.write(pickle.dumps(obj))
+def pickle_load(filename):
+    """ Wrapper to load an object from a file."""
+    with tf.io.gfile.GFile(filename, 'rb') as f:
+        pickled = pickle.loads(f.read())
+    return pickled
+def save_checkpoint(ckpt_dir, state_G, state_D, params_ema_G, pl_mean, config, step, epoch, fid_score=None, keep=2):
+    """
+    Saves checkpoint.
+    Args:
+        ckpt_dir (str): Path to the directory, where checkpoints are saved.
+        state_G (train_state.TrainState): Generator state.
+        state_D (train_state.TrainState): Discriminator state.
+        params_ema_G (frozen_dict.FrozenDict): Parameters of the ema generator.
+        pl_mean (array): Moving average of the path length (generator regularization).
+        config (argparse.Namespace): Configuration.
+        step (int): Current step.
+        epoch (int): Current epoch.
+        fid_score (float): FID score corresponding to the checkpoint.
+        keep (int): Number of checkpoints to keep.
+    """
+    state_dict = {'state_G': flax.jax_utils.unreplicate(state_G),
+                  'state_D': flax.jax_utils.unreplicate(state_D),
+                  'params_ema_G': params_ema_G,
+                  'pl_mean': flax.jax_utils.unreplicate(pl_mean),
+                  'config': config,
+                  'fid_score': fid_score,
+                  'step': step,
+                  'epoch': epoch}
+    pickle_dump(state_dict, os.path.join(ckpt_dir, f'ckpt_{step}.pickle'))
+    ckpts = tf.io.gfile.glob(os.path.join(ckpt_dir, '*.pickle'))
+    if len(ckpts) > keep:
+        modified_times = {}
+        for ckpt in ckpts:
+            stats = tf.io.gfile.stat(ckpt)
+            modified_times[ckpt] = stats.mtime_nsec
+        oldest_ckpt = sorted(modified_times, key=modified_times.get)[0]
+        tf.io.gfile.remove(oldest_ckpt)
+def load_checkpoint(filename):
+    """
+    Loads checkpoints.
+    Args:
+        filename (str): Path to the checkpoint file.
+    Returns:
+        (dict): Checkpoint.
+    """
+    state_dict = pickle_load(filename)
+    return state_dict
+def get_latest_checkpoint(ckpt_dir):
+    """
+    Returns the path of the latest checkpoint.
+    Args:
+        ckpt_dir (str): Path to the directory, where checkpoints are saved.
+    Returns:
+        (str): Path to latest checkpoint (if it exists).
+    """
+    ckpts = tf.io.gfile.glob(os.path.join(ckpt_dir, '*.pickle'))
+    if len(ckpts) == 0:
+        return None
+    modified_times = {}
+    for ckpt in ckpts:
+        stats = tf.io.gfile.stat(ckpt)
+        modified_times[ckpt] = stats.mtime_nsec
+    latest_ckpt = sorted(modified_times, key=modified_times.get)[-1]
+    return latest_ckpt

data_pipeline.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import tensorflow as tf
+import tensorflow_datasets as tfds
+import jax
+import flax
+import numpy as np
+from PIL import Image
+import os
+from typing import Sequence
+from tqdm import tqdm
+import json
+from tqdm import tqdm
+import logging
+logger = logging.getLogger(__name__)
+def prefetch(dataset, n_prefetch):
+    # Taken from: https://github.com/google-research/vision_transformer/blob/master/vit_jax/input_pipeline.py
+    ds_iter = iter(dataset)
+    ds_iter = map(lambda x: jax.tree_map(lambda t: np.asarray(memoryview(t)), x),
+                  ds_iter)
+    if n_prefetch:
+        ds_iter = flax.jax_utils.prefetch_to_device(ds_iter, n_prefetch)
+    return ds_iter
+def get_data(data_dir, img_size, img_channels, num_classes, num_local_devices, batch_size, shuffle_buffer=1000):
+    """
+    Args:
+        data_dir (str): Root directory of the dataset.
+        img_size (int): Image size for training.
+        img_channels (int): Number of image channels.
+        num_classes (int): Number of classes, 0 for no classes.
+        num_local_devices (int): Number of devices.
+        batch_size (int): Batch size (per device).
+        shuffle_buffer (int): Buffer used for shuffling the dataset.
+    Returns:
+        (tf.data.Dataset): Dataset.
+    """
+    def pre_process(serialized_example):
+        feature = {'height': tf.io.FixedLenFeature([], tf.int64),
+                   'width': tf.io.FixedLenFeature([], tf.int64),
+                   'channels': tf.io.FixedLenFeature([], tf.int64),
+                   'image': tf.io.FixedLenFeature([], tf.string),
+                   'label': tf.io.FixedLenFeature([], tf.int64)}
+        example = tf.io.parse_single_example(serialized_example, feature)
+        height = tf.cast(example['height'], dtype=tf.int64)
+        width = tf.cast(example['width'], dtype=tf.int64)
+        channels = tf.cast(example['channels'], dtype=tf.int64)
+        image = tf.io.decode_raw(example['image'], out_type=tf.uint8)
+        image = tf.reshape(image, shape=[height, width, channels])
+        image = tf.cast(image, dtype='float32')
+        image = tf.image.resize(image, size=[img_size, img_size], method='bicubic', antialias=True)
+        image = tf.image.random_flip_left_right(image)
+        image = (image - 127.5) / 127.5
+        label = tf.one_hot(example['label'], num_classes)
+        return {'image': image, 'label': label}
+    def shard(data):
+        # Reshape images from [num_devices * batch_size, H, W, C] to [num_devices, batch_size, H, W, C]
+        # because the first dimension will be mapped across devices using jax.pmap
+        data['image'] = tf.reshape(data['image'], [num_local_devices, -1, img_size, img_size, img_channels])
+        data['label'] = tf.reshape(data['label'], [num_local_devices, -1, num_classes])
+        return data
+    logger.info('Loading TFRecord...')
+    with tf.io.gfile.GFile(os.path.join(data_dir, 'dataset_info.json'), 'r') as fin:
+        dataset_info = json.load(fin)
+    ds = tf.data.TFRecordDataset(filenames=os.path.join(data_dir, 'dataset.tfrecords'))
+    ds = ds.shard(jax.process_count(), jax.process_index())
+    ds = ds.shuffle(min(dataset_info['num_examples'], shuffle_buffer))
+    ds = ds.map(pre_process, tf.data.AUTOTUNE)
+    ds = ds.batch(batch_size * num_local_devices, drop_remainder=True)  # uses per-worker batch size
+    ds = ds.map(shard, tf.data.AUTOTUNE)
+    ds = ds.prefetch(1)  # prefetches the next batch
+    return ds, dataset_info

dataset_utils/crop_image_borders.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import numpy as np
+from PIL import Image
+import os
+from tqdm import tqdm
+import argparse
+import logging
+logger = logging.getLogger(__name__)
+"""
+Crops the black borders around images.
+"""
+def crop_border(x, constant=0.0):
+    top = 0
+    while True:
+        if np.sum(x[top] != constant) != 0.0:
+            break
+        top += 1
+    bottom = x.shape[0] - 1
+    while True:
+        if np.sum(x[bottom] != constant) != 0.0:
+            bottom += 1
+            break
+        bottom -= 1
+    left = 0
+    while True:
+        if np.sum(x[:, left] != constant) != 0.0:
+            break
+        left += 1
+    right = x.shape[1] - 1
+    while True:
+        if np.sum(x[:, right] != constant) != 0.0:
+            right += 1
+            break
+        right -= 1
+    return x[top:bottom, left:right]
+def crop_images(path, constant_value):
+    logger.info('Crop image borders...')
+    for f in tqdm(os.listdir(path)):
+        img = Image.open(os.path.join(path, f))
+        img = crop_border(np.array(img), constant=constant_value)
+        img = Image.fromarray(img)
+        img.save(os.path.join(path, f))
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--image_dir', type=str, help='Path to the image directory.')
+    parser.add_argument('--constant_value', type=float, default=0.0, help='Value of the border that should be cropped.')
+    args = parser.parse_args()
+    crop_images(args.image_dir, args.constant_value)

dataset_utils/images_to_tfrecords.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import tensorflow as tf
+import numpy as np
+from PIL import Image
+from typing import Sequence
+from tqdm import tqdm
+import argparse
+import json
+import os
+import logging
+logger = logging.getLogger(__name__)
+def images_to_tfrecords(image_dir, data_dir, has_labels):
+    """
+    Converts a folder of images to a TFRecord file.
+    The image directory should have one of the following structures:
+    If has_labels = False, image_dir should look like this:
+    path/to/image_dir/
+        0.jpg
+        1.jpg
+        2.jpg
+        4.jpg
+        ...
+    If has_labels = True, image_dir should look like this:
+    path/to/image_dir/
+        label0/
+            0.jpg
+            1.jpg
+            ...
+        label1/
+            a.jpg
+            b.jpg
+            c.jpg
+            ...
+        ...
+    The labels will be label0 -> 0, label1 -> 1.
+    Args:
+        image_dir (str): Path to images.
+        data_dir (str): Path where the TFrecords dataset is stored.
+        has_labels (bool): If True, 'image_dir' contains label directories.
+    Returns:
+        (dict): Dataset info.
+    """
+    def _bytes_feature(value):
+        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+    def _int64_feature(value):
+        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+    os.makedirs(data_dir, exist_ok=True)
+    writer = tf.io.TFRecordWriter(os.path.join(data_dir, 'dataset.tfrecords'))
+    num_examples = 0
+    num_classes = 0
+    if has_labels:
+        for label_dir in os.listdir(image_dir):
+            if not os.path.isdir(os.path.join(image_dir, label_dir)):
+                logger.warning('The image directory should contain one directory for each label.')
+                logger.warning('These label directories should contain the image files.')
+                if os.path.exists(os.path.join(data_dir, 'dataset.tfrecords')):
+                    os.remove(os.path.join(data_dir, 'dataset.tfrecords'))
+                return
+            for img_file in tqdm(os.listdir(os.path.join(image_dir, label_dir))):
+                file_format = img_file[img_file.rfind('.') + 1:]
+                if file_format not in ['png', 'jpg', 'jpeg']:
+                    continue
+                #img = Image.open(os.path.join(image_dir, label_dir, img_file)).resize(img_size)
+                img = Image.open(os.path.join(image_dir, label_dir, img_file))
+                img = np.array(img, dtype=np.uint8)
+                height = img.shape[0]
+                width = img.shape[1]
+                channels = img.shape[2]
+                img_encoded = img.tobytes()
+                example = tf.train.Example(features=tf.train.Features(feature={
+                    'height': _int64_feature(height),
+                    'width': _int64_feature(width),
+                    'channels': _int64_feature(channels),
+                    'image': _bytes_feature(img_encoded),
+                    'label': _int64_feature(num_classes)}))
+                writer.write(example.SerializeToString())
+                num_examples += 1
+            num_classes += 1
+    else:
+        for img_file in tqdm(os.listdir(os.path.join(image_dir))):
+            file_format = img_file[img_file.rfind('.') + 1:]
+            if file_format not in ['png', 'jpg', 'jpeg']:
+                continue
+            #img = Image.open(os.path.join(image_dir, label_dir, img_file)).resize(img_size)
+            img = Image.open(os.path.join(image_dir, img_file))
+            img = np.array(img, dtype=np.uint8)
+            height = img.shape[0]
+            width = img.shape[1]
+            channels = img.shape[2]
+            img_encoded = img.tobytes()
+            example = tf.train.Example(features=tf.train.Features(feature={
+                'height': _int64_feature(height),
+                'width': _int64_feature(width),
+                'channels': _int64_feature(channels),
+                'image': _bytes_feature(img_encoded),
+                'label': _int64_feature(num_classes)})) # dummy label
+            writer.write(example.SerializeToString())
+            num_examples += 1
+    writer.close()
+    dataset_info = {'num_examples': num_examples, 'num_classes': num_classes}
+    with open(os.path.join(data_dir, 'dataset_info.json'), 'w') as fout:
+        json.dump(dataset_info, fout)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--image_dir', type=str, help='Path to the image directory.')
+    parser.add_argument('--data_dir', type=str, help='Path where the TFRecords dataset is stored.')
+    parser.add_argument('--has_labels', action='store_true', help='If True, image_dir contains label directories.')
+    args = parser.parse_args()
+    images_to_tfrecords(args.image_dir, args.data_dir, args.has_labels)

fid/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .core import FID

fid/core.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import jax
+import jax.numpy as jnp
+import flax
+import flax.linen as nn
+import numpy as np
+import os
+import functools
+import argparse
+import scipy
+from tqdm import tqdm
+import logging
+from . import inception
+from . import utils
+logger = logging.getLogger(__name__)
+class FID:
+    def __init__(self, generator, dataset, config, use_cache=True, truncation_psi=1.0):
+        """
+        Evaluates the FID score for a given generator and a given dataset.
+        Implementation mostly taken from https://github.com/matthias-wright/jax-fid
+        Reference: https://arxiv.org/abs/1706.08500
+        Args:
+            generator (nn.Module): Generator network.
+            dataset (tf.data.Dataset): Dataset containing the real images.
+            config (argparse.Namespace): Configuration.
+            use_cache (bool): If True, only compute the activation stats once for the real images and store them.
+            truncation_psi (float): Controls truncation (trading off variation for quality). If 1, truncation is disabled.
+        """
+        self.num_images = config.num_fid_images
+        self.batch_size = config.batch_size
+        self.c_dim = config.c_dim
+        self.z_dim = config.z_dim
+        self.dataset = dataset
+        self.num_devices = jax.device_count()
+        self.num_local_devices = jax.local_device_count()
+        self.use_cache = use_cache
+        if self.use_cache:
+            self.cache = {}
+        rng = jax.random.PRNGKey(0)
+        inception_net = inception.InceptionV3(pretrained=True)
+        self.inception_params = inception_net.init(rng, jnp.ones((1, config.resolution, config.resolution, 3)))
+        self.inception_params = flax.jax_utils.replicate(self.inception_params)
+        #self.inception = jax.jit(functools.partial(model.apply, train=False))
+        self.inception_apply = jax.pmap(functools.partial(inception_net.apply, train=False), axis_name='batch')
+        self.generator_apply = jax.pmap(functools.partial(generator.apply, truncation_psi=truncation_psi, train=False, noise_mode='const'), axis_name='batch')
+    def compute_fid(self, generator_params, seed_offset=0):
+        generator_params = flax.jax_utils.replicate(generator_params)
+        mu_real, sigma_real = self.compute_stats_for_dataset()
+        mu_fake, sigma_fake = self.compute_stats_for_generator(generator_params, seed_offset)
+        fid_score = self.compute_frechet_distance(mu_real, mu_fake, sigma_real, sigma_fake, eps=1e-6)
+        return fid_score
+    def compute_frechet_distance(self, mu1, mu2, sigma1, sigma2, eps=1e-6):
+        # Taken from: https://github.com/mseitzer/pytorch-fid/blob/master/src/pytorch_fid/fid_score.py
+        mu1 = np.atleast_1d(mu1)
+        mu2 = np.atleast_1d(mu2)
+        sigma1 = np.atleast_1d(sigma1)
+        sigma2 = np.atleast_1d(sigma2)
+        assert mu1.shape == mu2.shape
+        assert sigma1.shape == sigma2.shape
+        diff = mu1 - mu2
+        covmean, _ = scipy.linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+        if not np.isfinite(covmean).all():
+            msg = ('fid calculation produces singular product; '
+                   'adding %s to diagonal of cov estimates') % eps
+            logger.info(msg)
+            offset = np.eye(sigma1.shape[0]) * eps
+            covmean = scipy.linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+        # Numerical error might give slight imaginary component
+        if np.iscomplexobj(covmean):
+            if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+                m = np.max(np.abs(covmean.imag))
+                raise ValueError('Imaginary component {}'.format(m))
+            covmean = covmean.real
+        tr_covmean = np.trace(covmean)
+        return (diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean)
+    def compute_stats_for_dataset(self):
+        if self.use_cache and 'mu' in self.cache and 'sigma' in self.cache:
+            logger.info('Use cached statistics for dataset...')
+            return self.cache['mu'], self.cache['sigma']
+        print()
+        logger.info('Compute statistics for dataset...')
+        image_count = 0
+        activations = []
+        for batch in utils.prefetch(self.dataset, n_prefetch=2):
+            act = self.inception_apply(self.inception_params, jax.lax.stop_gradient(batch['image']))
+            act = jnp.reshape(act, (self.num_local_devices * self.batch_size, -1))
+            activations.append(act)
+            image_count += self.num_local_devices * self.batch_size
+            if image_count >= self.num_images:
+                break
+        activations = jnp.concatenate(activations, axis=0)
+        activations = activations[:self.num_images]
+        mu = np.mean(activations, axis=0)
+        sigma = np.cov(activations, rowvar=False)
+        self.cache['mu'] = mu
+        self.cache['sigma'] = sigma
+        return mu, sigma
+    def compute_stats_for_generator(self, generator_params, seed_offset):
+        print()
+        logger.info('Compute statistics for generator...')
+        num_batches = int(np.ceil(self.num_images / (self.batch_size * self.num_local_devices)))
+        activations = []
+        for i in range(num_batches):
+            rng = jax.random.PRNGKey(seed_offset + i)
+            z_latent = jax.random.normal(rng, shape=(self.num_local_devices, self.batch_size, self.z_dim))
+            labels = None
+            if self.c_dim > 0:
+                labels = jax.random.randint(rng, shape=(self.num_local_devices * self.batch_size,), minval=0, maxval=self.c_dim)
+                labels = jax.nn.one_hot(labels, num_classes=self.c_dim)
+                labels = jnp.reshape(labels, (self.num_local_devices, self.batch_size, self.c_dim))
+            image = self.generator_apply(generator_params, jax.lax.stop_gradient(z_latent), labels)
+            image = (image - jnp.min(image)) / (jnp.max(image) - jnp.min(image))
+            image = 2 * image - 1
+            act = self.inception_apply(self.inception_params, jax.lax.stop_gradient(image))
+            act = jnp.reshape(act, (self.num_local_devices * self.batch_size, -1))
+            activations.append(act)
+        activations = jnp.concatenate(activations, axis=0)
+        activations = activations[:self.num_images]
+        mu = np.mean(activations, axis=0)
+        sigma = np.cov(activations, rowvar=False)
+        return mu, sigma

fid/inception.py ADDED Viewed

	@@ -0,0 +1,655 @@

+import jax
+from jax import lax
+from jax.nn import initializers
+import jax.numpy as jnp
+import flax
+from flax.linen.module import merge_param
+import flax.linen as nn
+from typing import Callable, Iterable, Optional, Tuple, Union, Any
+import functools
+import pickle
+from . import utils
+PRNGKey = Any
+Array = Any
+Shape = Tuple[int]
+Dtype = Any
+class InceptionV3(nn.Module):
+    """
+    InceptionV3 network.
+    Reference: https://arxiv.org/abs/1512.00567
+    Ported mostly from: https://github.com/pytorch/vision/blob/master/torchvision/models/inception.py
+    Attributes:
+        include_head (bool): If True, include classifier head.
+        num_classes (int): Number of classes.
+        pretrained (bool): If True, use pretrained weights.
+        transform_input (bool): If True, preprocesses the input according to the method with which it
+                                was trained on ImageNet.
+        aux_logits (bool): If True, add an auxiliary branch that can improve training.
+        dtype (str): Data type.
+    """
+    include_head: bool=False
+    num_classes: int=1000
+    pretrained: bool=False
+    transform_input: bool=False
+    aux_logits: bool=False
+    ckpt_path: str='https://www.dropbox.com/s/0zo4pd6cfwgzem7/inception_v3_weights_fid.pickle?dl=1'
+    dtype: str='float32'
+    def setup(self):
+        if self.pretrained:
+            ckpt_file = utils.download(self.ckpt_path)
+            self.params_dict = pickle.load(open(ckpt_file, 'rb'))
+            self.num_classes_ = 1000
+        else:
+            self.params_dict = None
+            self.num_classes_ = self.num_classes
+    @nn.compact
+    def __call__(self, x, train=True, rng=jax.random.PRNGKey(0)):
+        """
+        Args:
+            x (tensor): Input image, shape [B, H, W, C].
+            train (bool): If True, training mode.
+            rng (jax.random.PRNGKey): Random seed.
+        """
+        x = self._transform_input(x)
+        x = BasicConv2d(out_channels=32,
+                        kernel_size=(3, 3),
+                        strides=(2, 2),
+                        params_dict=utils.get(self.params_dict, 'Conv2d_1a_3x3'),
+                        dtype=self.dtype)(x, train)
+        x = BasicConv2d(out_channels=32,
+                        kernel_size=(3, 3),
+                        params_dict=utils.get(self.params_dict, 'Conv2d_2a_3x3'),
+                        dtype=self.dtype)(x, train)
+        x = BasicConv2d(out_channels=64,
+                        kernel_size=(3, 3),
+                        padding=((1, 1), (1, 1)),
+                        params_dict=utils.get(self.params_dict, 'Conv2d_2b_3x3'),
+                        dtype=self.dtype)(x, train)
+        x = nn.max_pool(x, window_shape=(3, 3), strides=(2, 2))
+        x = BasicConv2d(out_channels=80,
+                        kernel_size=(1, 1),
+                        params_dict=utils.get(self.params_dict, 'Conv2d_3b_1x1'),
+                        dtype=self.dtype)(x, train)
+        x = BasicConv2d(out_channels=192,
+                        kernel_size=(3, 3),
+                        params_dict=utils.get(self.params_dict, 'Conv2d_4a_3x3'),
+                        dtype=self.dtype)(x, train)
+        x = nn.max_pool(x, window_shape=(3, 3), strides=(2, 2))
+        x = InceptionA(pool_features=32,
+                       params_dict=utils.get(self.params_dict, 'Mixed_5b'),
+                       dtype=self.dtype)(x, train)
+        x = InceptionA(pool_features=64,
+                       params_dict=utils.get(self.params_dict, 'Mixed_5c'),
+                       dtype=self.dtype)(x, train)
+        x = InceptionA(pool_features=64,
+                       params_dict=utils.get(self.params_dict, 'Mixed_5d'),
+                       dtype=self.dtype)(x, train)
+        x = InceptionB(params_dict=utils.get(self.params_dict, 'Mixed_6a'),
+                       dtype=self.dtype)(x, train)
+        x = InceptionC(channels_7x7=128,
+                       params_dict=utils.get(self.params_dict, 'Mixed_6b'),
+                       dtype=self.dtype)(x, train)
+        x = InceptionC(channels_7x7=160,
+                       params_dict=utils.get(self.params_dict, 'Mixed_6c'),
+                       dtype=self.dtype)(x, train)
+        x = InceptionC(channels_7x7=160,
+                       params_dict=utils.get(self.params_dict, 'Mixed_6d'),
+                       dtype=self.dtype)(x, train)
+        x = InceptionC(channels_7x7=192,
+                       params_dict=utils.get(self.params_dict, 'Mixed_6e'),
+                       dtype=self.dtype)(x, train)
+        aux = None
+        if self.aux_logits and train:
+            aux = InceptionAux(num_classes=self.num_classes_,
+                               params_dict=utils.get(self.params_dict, 'AuxLogits'),
+                               dtype=self.dtype)(x, train)
+        x = InceptionD(params_dict=utils.get(self.params_dict, 'Mixed_7a'),
+                       dtype=self.dtype)(x, train)
+        x = InceptionE(avg_pool, params_dict=utils.get(self.params_dict, 'Mixed_7b'),
+                       dtype=self.dtype)(x, train)
+        # Following the implementation by @mseitzer, we use max pooling instead
+        # of average pooling here.
+        # See: https://github.com/mseitzer/pytorch-fid/blob/master/src/pytorch_fid/inception.py#L320
+        x = InceptionE(nn.max_pool, params_dict=utils.get(self.params_dict, 'Mixed_7c'),
+                       dtype=self.dtype)(x, train)
+        x = jnp.mean(x, axis=(1, 2), keepdims=True)
+        if not self.include_head:
+            return x
+        x = nn.Dropout(rate=0.5)(x, deterministic=not train, rng=rng)
+        x = jnp.reshape(x, newshape=(x.shape[0], -1))
+        x = Dense(features=self.num_classes_,
+                  params_dict=utils.get(self.params_dict, 'fc'),
+                  dtype=self.dtype)(x)
+        if self.aux_logits:
+            return x, aux
+        return x
+    def _transform_input(self, x):
+        if self.transform_input:
+            x_ch0 = jnp.expand_dims(x[..., 0], axis=-1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
+            x_ch1 = jnp.expand_dims(x[..., 1], axis=-1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
+            x_ch2 = jnp.expand_dims(x[..., 2], axis=-1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
+            x = jnp.concatenate((x_ch0, x_ch1, x_ch2), axis=-1)
+        return x
+class Dense(nn.Module):
+    features: int
+    kernel_init: functools.partial=nn.initializers.lecun_normal()
+    bias_init: functools.partial=nn.initializers.zeros
+    params_dict: dict=None
+    dtype: str='float32'
+    @nn.compact
+    def __call__(self, x):
+        x = nn.Dense(features=self.features,
+                     kernel_init=self.kernel_init if self.params_dict is None else lambda *_ : jnp.array(self.params_dict['kernel']),
+                     bias_init=self.bias_init if self.params_dict is None else lambda *_ : jnp.array(self.params_dict['bias']))(x)
+        return x
+class BasicConv2d(nn.Module):
+    out_channels: int
+    kernel_size: Union[int, Iterable[int]]=(3, 3)
+    strides: Optional[Iterable[int]]=(1, 1)
+    padding: Union[str, Iterable[Tuple[int, int]]]='valid'
+    use_bias: bool=False
+    kernel_init: functools.partial=nn.initializers.lecun_normal()
+    bias_init: functools.partial=nn.initializers.zeros
+    params_dict: dict=None
+    dtype: str='float32'
+    @nn.compact
+    def __call__(self, x, train=True):
+        x = nn.Conv(features=self.out_channels,
+                    kernel_size=self.kernel_size,
+                    strides=self.strides,
+                    padding=self.padding,
+                    use_bias=self.use_bias,
+                    kernel_init=self.kernel_init if self.params_dict is None else lambda *_ : jnp.array(self.params_dict['conv']['kernel']),
+                    bias_init=self.bias_init if self.params_dict is None else lambda *_ : jnp.array(self.params_dict['conv']['bias']),
+                    dtype=self.dtype)(x)
+        if self.params_dict is None:
+            x = BatchNorm(epsilon=0.001,
+                          momentum=0.1,
+                          use_running_average=not train,
+                          dtype=self.dtype)(x)
+        else:
+            x = BatchNorm(epsilon=0.001,
+                          momentum=0.1,
+                          bias_init=lambda *_ : jnp.array(self.params_dict['bn']['bias']),
+                          scale_init=lambda *_ : jnp.array(self.params_dict['bn']['scale']),
+                          mean_init=lambda *_ : jnp.array(self.params_dict['bn']['mean']),
+                          var_init=lambda *_ : jnp.array(self.params_dict['bn']['var']),
+                          use_running_average=not train,
+                          dtype=self.dtype)(x)
+        x = jax.nn.relu(x)
+        return x
+class InceptionA(nn.Module):
+    pool_features: int
+    params_dict: dict=None
+    dtype: str='float32'
+    @nn.compact
+    def __call__(self, x, train=True):
+        branch1x1 = BasicConv2d(out_channels=64,
+                                kernel_size=(1, 1),
+                                params_dict=utils.get(self.params_dict, 'branch1x1'),
+                                dtype=self.dtype)(x, train)
+        branch5x5 = BasicConv2d(out_channels=48,
+                                kernel_size=(1, 1),
+                                params_dict=utils.get(self.params_dict, 'branch5x5_1'),
+                                dtype=self.dtype)(x, train)
+        branch5x5 = BasicConv2d(out_channels=64,
+                                kernel_size=(5, 5),
+                                padding=((2, 2), (2, 2)),
+                                params_dict=utils.get(self.params_dict, 'branch5x5_2'),
+                                dtype=self.dtype)(branch5x5, train)
+        branch3x3dbl = BasicConv2d(out_channels=64,
+                                   kernel_size=(1, 1),
+                                   params_dict=utils.get(self.params_dict, 'branch3x3dbl_1'),
+                                   dtype=self.dtype)(x, train)
+        branch3x3dbl = BasicConv2d(out_channels=96,
+                                   kernel_size=(3, 3),
+                                   padding=((1, 1), (1, 1)),
+                                   params_dict=utils.get(self.params_dict, 'branch3x3dbl_2'),
+                                   dtype=self.dtype)(branch3x3dbl, train)
+        branch3x3dbl = BasicConv2d(out_channels=96,
+                                   kernel_size=(3, 3),
+                                   padding=((1, 1), (1, 1)),
+                                   params_dict=utils.get(self.params_dict, 'branch3x3dbl_3'),
+                                   dtype=self.dtype)(branch3x3dbl, train)
+        branch_pool = avg_pool(x, window_shape=(3, 3), strides=(1, 1), padding=((1, 1), (1, 1)))
+        branch_pool = BasicConv2d(out_channels=self.pool_features,
+                                  kernel_size=(1, 1),
+                                  params_dict=utils.get(self.params_dict, 'branch_pool'),
+                                  dtype=self.dtype)(branch_pool, train)
+        output = jnp.concatenate((branch1x1, branch5x5, branch3x3dbl, branch_pool), axis=-1)
+        return output
+class InceptionB(nn.Module):
+    params_dict: dict=None
+    dtype: str='float32'
+    @nn.compact
+    def __call__(self, x, train=True):
+        branch3x3 = BasicConv2d(out_channels=384,
+                                kernel_size=(3, 3),
+                                strides=(2, 2),
+                                params_dict=utils.get(self.params_dict, 'branch3x3'),
+                                dtype=self.dtype)(x, train)
+        branch3x3dbl = BasicConv2d(out_channels=64,
+                                   kernel_size=(1, 1),
+                                   params_dict=utils.get(self.params_dict, 'branch3x3dbl_1'),
+                                   dtype=self.dtype)(x, train)
+        branch3x3dbl = BasicConv2d(out_channels=96,
+                                   kernel_size=(3, 3),
+                                   padding=((1, 1), (1, 1)),
+                                   params_dict=utils.get(self.params_dict, 'branch3x3dbl_2'),
+                                   dtype=self.dtype)(branch3x3dbl, train)
+        branch3x3dbl = BasicConv2d(out_channels=96,
+                                   kernel_size=(3, 3),
+                                   strides=(2, 2),
+                                   params_dict=utils.get(self.params_dict, 'branch3x3dbl_3'),
+                                   dtype=self.dtype)(branch3x3dbl, train)
+        branch_pool = nn.max_pool(x, window_shape=(3, 3), strides=(2, 2))
+        output = jnp.concatenate((branch3x3, branch3x3dbl, branch_pool), axis=-1)
+        return output
+class InceptionC(nn.Module):
+    channels_7x7: int
+    params_dict: dict=None
+    dtype: str='float32'
+    @nn.compact
+    def __call__(self, x, train=True):
+        branch1x1 = BasicConv2d(out_channels=192,
+                                kernel_size=(1, 1),
+                                params_dict=utils.get(self.params_dict, 'branch1x1'),
+                                dtype=self.dtype)(x, train)
+        branch7x7 = BasicConv2d(out_channels=self.channels_7x7,
+                                kernel_size=(1, 1),
+                                params_dict=utils.get(self.params_dict, 'branch7x7_1'),
+                                dtype=self.dtype)(x, train)
+        branch7x7 = BasicConv2d(out_channels=self.channels_7x7,
+                                kernel_size=(1, 7),
+                                padding=((0, 0), (3, 3)),
+                                params_dict=utils.get(self.params_dict, 'branch7x7_2'),
+                                dtype=self.dtype)(branch7x7, train)
+        branch7x7 = BasicConv2d(out_channels=192,
+                                kernel_size=(7, 1),
+                                padding=((3, 3), (0, 0)),
+                                params_dict=utils.get(self.params_dict, 'branch7x7_3'),
+                                dtype=self.dtype)(branch7x7, train)
+        branch7x7dbl = BasicConv2d(out_channels=self.channels_7x7,
+                                   kernel_size=(1, 1),
+                                   params_dict=utils.get(self.params_dict, 'branch7x7dbl_1'),
+                                   dtype=self.dtype)(x, train)
+        branch7x7dbl = BasicConv2d(out_channels=self.channels_7x7,
+                                   kernel_size=(7, 1),
+                                   padding=((3, 3), (0, 0)),
+                                   params_dict=utils.get(self.params_dict, 'branch7x7dbl_2'),
+                                   dtype=self.dtype)(branch7x7dbl, train)
+        branch7x7dbl = BasicConv2d(out_channels=self.channels_7x7,
+                                   kernel_size=(1, 7),
+                                   padding=((0, 0), (3, 3)),
+                                   params_dict=utils.get(self.params_dict, 'branch7x7dbl_3'),
+                                   dtype=self.dtype)(branch7x7dbl, train)
+        branch7x7dbl = BasicConv2d(out_channels=self.channels_7x7,
+                                   kernel_size=(7, 1),
+                                   padding=((3, 3), (0, 0)),
+                                   params_dict=utils.get(self.params_dict, 'branch7x7dbl_4'),
+                                   dtype=self.dtype)(branch7x7dbl, train)
+        branch7x7dbl = BasicConv2d(out_channels=self.channels_7x7,
+                                   kernel_size=(1, 7),
+                                   padding=((0, 0), (3, 3)),
+                                   params_dict=utils.get(self.params_dict, 'branch7x7dbl_5'),
+                                   dtype=self.dtype)(branch7x7dbl, train)
+        branch_pool = avg_pool(x, window_shape=(3, 3), strides=(1, 1), padding=((1, 1), (1, 1)))
+        branch_pool = BasicConv2d(out_channels=192,
+                                  kernel_size=(1, 1),
+                                  params_dict=utils.get(self.params_dict, 'branch_pool'),
+                                  dtype=self.dtype)(branch_pool, train)
+        output = jnp.concatenate((branch1x1, branch7x7, branch7x7dbl, branch_pool), axis=-1)
+        return output
+class InceptionD(nn.Module):
+    params_dict: dict=None
+    dtype: str='float32'
+    @nn.compact
+    def __call__(self, x, train=True):
+        branch3x3 = BasicConv2d(out_channels=192,
+                                kernel_size=(1, 1),
+                                params_dict=utils.get(self.params_dict, 'branch3x3_1'),
+                                dtype=self.dtype)(x, train)
+        branch3x3 = BasicConv2d(out_channels=320,
+                                kernel_size=(3, 3),
+                                strides=(2, 2),
+                                params_dict=utils.get(self.params_dict, 'branch3x3_2'),
+                                dtype=self.dtype)(branch3x3, train)
+        branch7x7x3 = BasicConv2d(out_channels=192,
+                                  kernel_size=(1, 1),
+                                  params_dict=utils.get(self.params_dict, 'branch7x7x3_1'),
+                                  dtype=self.dtype)(x, train)
+        branch7x7x3 = BasicConv2d(out_channels=192,
+                                  kernel_size=(1, 7),
+                                  padding=((0, 0), (3, 3)),
+                                  params_dict=utils.get(self.params_dict, 'branch7x7x3_2'),
+                                  dtype=self.dtype)(branch7x7x3, train)
+        branch7x7x3 = BasicConv2d(out_channels=192,
+                                  kernel_size=(7, 1),
+                                  padding=((3, 3), (0, 0)),
+                                  params_dict=utils.get(self.params_dict, 'branch7x7x3_3'),
+                                  dtype=self.dtype)(branch7x7x3, train)
+        branch7x7x3 = BasicConv2d(out_channels=192,
+                                  kernel_size=(3, 3),
+                                  strides=(2, 2),
+                                  params_dict=utils.get(self.params_dict, 'branch7x7x3_4'),
+                                  dtype=self.dtype)(branch7x7x3, train)
+        branch_pool = nn.max_pool(x, window_shape=(3, 3), strides=(2, 2))
+        output = jnp.concatenate((branch3x3, branch7x7x3, branch_pool), axis=-1)
+        return output
+class InceptionE(nn.Module):
+    pooling: Callable
+    params_dict: dict=None
+    dtype: str='float32'
+    @nn.compact
+    def __call__(self, x, train=True):
+        branch1x1 = BasicConv2d(out_channels=320,
+                                kernel_size=(1, 1),
+                                params_dict=utils.get(self.params_dict, 'branch1x1'),
+                                dtype=self.dtype)(x, train)
+        branch3x3 = BasicConv2d(out_channels=384,
+                                kernel_size=(1, 1),
+                                params_dict=utils.get(self.params_dict, 'branch3x3_1'),
+                                dtype=self.dtype)(x, train)
+        branch3x3_a = BasicConv2d(out_channels=384,
+                                  kernel_size=(1, 3),
+                                  padding=((0, 0), (1, 1)),
+                                  params_dict=utils.get(self.params_dict, 'branch3x3_2a'),
+                                  dtype=self.dtype)(branch3x3, train)
+        branch3x3_b = BasicConv2d(out_channels=384,
+                                  kernel_size=(3, 1),
+                                  padding=((1, 1), (0, 0)),
+                                  params_dict=utils.get(self.params_dict, 'branch3x3_2b'),
+                                  dtype=self.dtype)(branch3x3, train)
+        branch3x3 = jnp.concatenate((branch3x3_a, branch3x3_b), axis=-1)
+        branch3x3dbl = BasicConv2d(out_channels=448,
+                                   kernel_size=(1, 1),
+                                   params_dict=utils.get(self.params_dict, 'branch3x3dbl_1'),
+                                   dtype=self.dtype)(x, train)
+        branch3x3dbl = BasicConv2d(out_channels=384,
+                                   kernel_size=(3, 3),
+                                   padding=((1, 1), (1, 1)),
+                                   params_dict=utils.get(self.params_dict, 'branch3x3dbl_2'),
+                                   dtype=self.dtype)(branch3x3dbl, train)
+        branch3x3dbl_a = BasicConv2d(out_channels=384,
+                                     kernel_size=(1, 3),
+                                     padding=((0, 0), (1, 1)),
+                                     params_dict=utils.get(self.params_dict, 'branch3x3dbl_3a'),
+                                     dtype=self.dtype)(branch3x3dbl, train)
+        branch3x3dbl_b = BasicConv2d(out_channels=384,
+                                     kernel_size=(3, 1),
+                                     padding=((1, 1), (0, 0)),
+                                     params_dict=utils.get(self.params_dict, 'branch3x3dbl_3b'),
+                                     dtype=self.dtype)(branch3x3dbl, train)
+        branch3x3dbl = jnp.concatenate((branch3x3dbl_a, branch3x3dbl_b), axis=-1)
+        branch_pool = self.pooling(x, window_shape=(3, 3), strides=(1, 1), padding=((1, 1), (1, 1)))
+        branch_pool = BasicConv2d(out_channels=192,
+                                  kernel_size=(1, 1),
+                                  params_dict=utils.get(self.params_dict, 'branch_pool'),
+                                  dtype=self.dtype)(branch_pool, train)
+        output = jnp.concatenate((branch1x1, branch3x3, branch3x3dbl, branch_pool), axis=-1)
+        return output
+class InceptionAux(nn.Module):
+    num_classes: int
+    kernel_init: functools.partial=nn.initializers.lecun_normal()
+    bias_init: functools.partial=nn.initializers.zeros
+    params_dict: dict=None
+    dtype: str='float32'
+    @nn.compact
+    def __call__(self, x, train=True):
+        x = avg_pool(x, window_shape=(5, 5), strides=(3, 3))
+        x = BasicConv2d(out_channels=128,
+                        kernel_size=(1, 1),
+                        params_dict=utils.get(self.params_dict, 'conv0'),
+                        dtype=self.dtype)(x, train)
+        x = BasicConv2d(out_channels=768,
+                        kernel_size=(5, 5),
+                        params_dict=utils.get(self.params_dict, 'conv1'),
+                        dtype=self.dtype)(x, train)
+        x = jnp.mean(x, axis=(1, 2))
+        x = jnp.reshape(x, newshape=(x.shape[0], -1))
+        x = Dense(features=self.num_classes,
+                  params_dict=utils.get(self.params_dict, 'fc'),
+                  dtype=self.dtype)(x)
+        return x
+def _absolute_dims(rank, dims):
+    return tuple([rank + dim if dim < 0 else dim for dim in dims])
+class BatchNorm(nn.Module):
+    """BatchNorm Module.
+    Taken from: https://github.com/google/flax/blob/master/flax/linen/normalization.py
+    Attributes:
+        use_running_average: if True, the statistics stored in batch_stats
+                             will be used instead of computing the batch statistics on the input.
+    axis: the feature or non-batch axis of the input.
+    momentum: decay rate for the exponential moving average of the batch statistics.
+    epsilon: a small float added to variance to avoid dividing by zero.
+    dtype: the dtype of the computation (default: float32).
+    use_bias:  if True, bias (beta) is added.
+    use_scale: if True, multiply by scale (gamma).
+               When the next layer is linear (also e.g. nn.relu), this can be disabled
+               since the scaling will be done by the next layer.
+    bias_init: initializer for bias, by default, zero.
+    scale_init: initializer for scale, by default, one.
+    axis_name: the axis name used to combine batch statistics from multiple
+               devices. See `jax.pmap` for a description of axis names (default: None).
+    axis_index_groups: groups of axis indices within that named axis
+                       representing subsets of devices to reduce over (default: None). For
+                       example, `[[0, 1], [2, 3]]` would independently batch-normalize over
+                       the examples on the first two and last two devices. See `jax.lax.psum`
+                       for more details.
+    """
+    use_running_average: Optional[bool] = None
+    axis: int = -1
+    momentum: float = 0.99
+    epsilon: float = 1e-5
+    dtype: Dtype = jnp.float32
+    use_bias: bool = True
+    use_scale: bool = True
+    bias_init: Callable[[PRNGKey, Shape, Dtype], Array] = initializers.zeros
+    scale_init: Callable[[PRNGKey, Shape, Dtype], Array] = initializers.ones
+    mean_init: Callable[[Shape], Array] = lambda s: jnp.zeros(s, jnp.float32)
+    var_init: Callable[[Shape], Array] = lambda s: jnp.ones(s, jnp.float32)
+    axis_name: Optional[str] = None
+    axis_index_groups: Any = None
+    @nn.compact
+    def __call__(self, x, use_running_average: Optional[bool] = None):
+        """Normalizes the input using batch statistics.
+        NOTE:
+        During initialization (when parameters are mutable) the running average
+        of the batch statistics will not be updated. Therefore, the inputs
+        fed during initialization don't need to match that of the actual input
+        distribution and the reduction axis (set with `axis_name`) does not have
+        to exist.
+        Args:
+            x: the input to be normalized.
+            use_running_average: if true, the statistics stored in batch_stats
+                                 will be used instead of computing the batch statistics on the input.
+        Returns:
+            Normalized inputs (the same shape as inputs).
+        """
+        use_running_average = merge_param(
+            'use_running_average', self.use_running_average, use_running_average)
+        x = jnp.asarray(x, jnp.float32)
+        axis = self.axis if isinstance(self.axis, tuple) else (self.axis,)
+        axis = _absolute_dims(x.ndim, axis)
+        feature_shape = tuple(d if i in axis else 1 for i, d in enumerate(x.shape))
+        reduced_feature_shape = tuple(d for i, d in enumerate(x.shape) if i in axis)
+        reduction_axis = tuple(i for i in range(x.ndim) if i not in axis)
+        # see NOTE above on initialization behavior
+        initializing = self.is_mutable_collection('params')
+        ra_mean = self.variable('batch_stats', 'mean',
+                                self.mean_init,
+                                reduced_feature_shape)
+        ra_var = self.variable('batch_stats', 'var',
+                               self.var_init,
+                               reduced_feature_shape)
+        if use_running_average:
+            mean, var = ra_mean.value, ra_var.value
+        else:
+            mean = jnp.mean(x, axis=reduction_axis, keepdims=False)
+            mean2 = jnp.mean(lax.square(x), axis=reduction_axis, keepdims=False)
+            if self.axis_name is not None and not initializing:
+                concatenated_mean = jnp.concatenate([mean, mean2])
+                mean, mean2 = jnp.split(
+                    lax.pmean(
+                        concatenated_mean,
+                        axis_name=self.axis_name,
+                        axis_index_groups=self.axis_index_groups), 2)
+            var = mean2 - lax.square(mean)
+            if not initializing:
+                ra_mean.value = self.momentum * ra_mean.value + (1 - self.momentum) * mean
+                ra_var.value = self.momentum * ra_var.value + (1 - self.momentum) * var
+        y = x - mean.reshape(feature_shape)
+        mul = lax.rsqrt(var + self.epsilon)
+        if self.use_scale:
+            scale = self.param('scale',
+                               self.scale_init,
+                               reduced_feature_shape).reshape(feature_shape)
+            mul = mul * scale
+        y = y * mul
+        if self.use_bias:
+            bias = self.param('bias',
+                              self.bias_init,
+                              reduced_feature_shape).reshape(feature_shape)
+            y = y + bias
+        return jnp.asarray(y, self.dtype)
+def pool(inputs, init, reduce_fn, window_shape, strides, padding):
+    """
+    Taken from: https://github.com/google/flax/blob/main/flax/linen/pooling.py
+    Helper function to define pooling functions.
+    Pooling functions are implemented using the ReduceWindow XLA op.
+    NOTE: Be aware that pooling is not generally differentiable.
+    That means providing a reduce_fn that is differentiable does not imply
+    that pool is differentiable.
+    Args:
+      inputs: input data with dimensions (batch, window dims..., features).
+      init: the initial value for the reduction
+      reduce_fn: a reduce function of the form `(T, T) -> T`.
+      window_shape: a shape tuple defining the window to reduce over.
+      strides: a sequence of `n` integers, representing the inter-window
+          strides.
+      padding: either the string `'SAME'`, the string `'VALID'`, or a sequence
+        of `n` `(low, high)` integer pairs that give the padding to apply before
+        and after each spatial dimension.
+    Returns:
+      The output of the reduction for each window slice.
+    """
+    strides = strides or (1,) * len(window_shape)
+    assert len(window_shape) == len(strides), (
+        f"len({window_shape}) == len({strides})")
+    strides = (1,) + strides + (1,)
+    dims = (1,) + window_shape + (1,)
+    is_single_input = False
+    if inputs.ndim == len(dims) - 1:
+      # add singleton batch dimension because lax.reduce_window always
+      # needs a batch dimension.
+      inputs = inputs[None]
+      is_single_input = True
+    assert inputs.ndim == len(dims), f"len({inputs.shape}) != len({dims})"
+    if not isinstance(padding, str):
+      padding = tuple(map(tuple, padding))
+      assert(len(padding) == len(window_shape)), (
+        f"padding {padding} must specify pads for same number of dims as "
+        f"window_shape {window_shape}")
+      assert(all([len(x) == 2 for x in padding])), (
+        f"each entry in padding {padding} must be length 2")
+      padding = ((0,0),) + padding + ((0,0),)
+    y = jax.lax.reduce_window(inputs, init, reduce_fn, dims, strides, padding)
+    if is_single_input:
+      y = jnp.squeeze(y, axis=0)
+    return y
+def avg_pool(inputs, window_shape, strides=None, padding='VALID'):
+    """
+    Pools the input by taking the average over a window.
+    In comparison to flax.linen.avg_pool, this pooling operation does not
+    consider the padded zero's for the average computation.
+    Args:
+      inputs: input data with dimensions (batch, window dims..., features).
+      window_shape: a shape tuple defining the window to reduce over.
+      strides: a sequence of `n` integers, representing the inter-window
+          strides (default: `(1, ..., 1)`).
+      padding: either the string `'SAME'`, the string `'VALID'`, or a sequence
+        of `n` `(low, high)` integer pairs that give the padding to apply before
+        and after each spatial dimension (default: `'VALID'`).
+    Returns:
+      The average for each window slice.
+    """
+    assert inputs.ndim == 4
+    assert len(window_shape) == 2
+    y = pool(inputs, 0., jax.lax.add, window_shape, strides, padding)
+    ones = jnp.ones(shape=(1, inputs.shape[1], inputs.shape[2], 1)).astype(inputs.dtype)
+    counts = jax.lax.conv_general_dilated(ones,
+                                          jnp.expand_dims(jnp.ones(window_shape).astype(inputs.dtype), axis=(-2, -1)),
+                                          window_strides=(1, 1),
+                                          padding=((1, 1), (1, 1)),
+                                          dimension_numbers=nn.linear._conv_dimension_numbers(ones.shape),
+                                          feature_group_count=1)
+    y = y / counts
+    return y

fid/utils.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import jax
+import flax
+import numpy as np
+from tqdm import tqdm
+import requests
+import os
+import tempfile
+import logging
+logger = logging.getLogger(__name__)
+def download(url, ckpt_dir=None):
+    name = url[url.rfind('/') + 1 : url.rfind('?')]
+    if ckpt_dir is None:
+        ckpt_dir = tempfile.gettempdir()
+    ckpt_dir = os.path.join(ckpt_dir, 'flaxmodels')
+    ckpt_file = os.path.join(ckpt_dir, name)
+    if not os.path.exists(ckpt_file):
+        logger.info(f'Downloading: \"{url[:url.rfind("?")]}\" to {ckpt_file}')
+        if not os.path.exists(ckpt_dir):
+            os.makedirs(ckpt_dir)
+        response = requests.get(url, stream=True)
+        total_size_in_bytes = int(response.headers.get('content-length', 0))
+        progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
+        # first create temp file, in case the download fails
+        ckpt_file_temp = os.path.join(ckpt_dir, name + '.temp')
+        with open(ckpt_file_temp, 'wb') as file:
+            for data in response.iter_content(chunk_size=1024):
+                progress_bar.update(len(data))
+                file.write(data)
+        progress_bar.close()
+        if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
+            logger.error('An error occured while downloading, please try again.')
+            if os.path.exists(ckpt_file_temp):
+                os.remove(ckpt_file_temp)
+        else:
+            # if download was successful, rename the temp file
+            os.rename(ckpt_file_temp, ckpt_file)
+    return ckpt_file
+def get(dictionary, key):
+    if dictionary is None or key not in dictionary:
+        return None
+    return dictionary[key]
+def prefetch(dataset, n_prefetch):
+    # Taken from: https://github.com/google-research/vision_transformer/blob/master/vit_jax/input_pipeline.py
+    ds_iter = iter(dataset)
+    ds_iter = map(lambda x: jax.tree_map(lambda t: np.asarray(memoryview(t)), x),
+                  ds_iter)
+    if n_prefetch:
+        ds_iter = flax.jax_utils.prefetch_to_device(ds_iter, n_prefetch)
+    return ds_iter

generate_images.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import argparse
+import functools
+import logging
+import os
+import jax
+import jax.numpy as jnp
+import numpy as np
+from PIL import Image
+from tqdm import tqdm
+import checkpoint
+from stylegan2.generator import Generator
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)-5.5s] [%(name)-12.12s]: %(message)s', force=True)
+logger = logging.getLogger(__name__)
+def generate_images(args):
+    logger.info(f"Loading checking '{args.checkpoint}'...")
+    ckpt = checkpoint.load_checkpoint(args.checkpoint)
+    config = ckpt['config']
+    params_ema_G = ckpt['params_ema_G']
+    generator_ema = Generator(
+        resolution=config.resolution,
+        num_channels=config.img_channels,
+        z_dim=config.z_dim,
+        c_dim=config.c_dim,
+        w_dim=config.w_dim,
+        num_ws=int(np.log2(config.resolution)) * 2 - 3,
+        num_mapping_layers=8,
+        fmap_base=config.fmap_base,
+        dtype=jnp.float32
+    )
+    generator_apply = jax.jit(
+        functools.partial(generator_ema.apply, truncation_psi=args.truncation_psi, train=False, noise_mode='const')
+    )
+    logger.info(f"Generating {len(args.seeds)} images with truncation {args.truncation_psi}...")
+    for seed in tqdm(args.seeds):
+        rng = jax.random.PRNGKey(seed)
+        z_latent = jax.random.normal(rng, shape=(1, config.z_dim))
+        image = generator_apply(params_ema_G, jax.lax.stop_gradient(z_latent), None)
+        image = (image - jnp.min(image)) / (jnp.max(image) - jnp.min(image))
+        Image.fromarray(np.uint8(np.clip(image[0] * 255, 0, 255))).save(os.path.join(args.out_path, f'{seed}.png'))
+    logger.info(f"Images saved in '{args.out_path}/'")
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--checkpoint', type=str, help='Path to the checkpoint.', required=True)
+    parser.add_argument('--out_path', type=str, default='generated_images', help='Path where the generated images are stored.')
+    parser.add_argument('--truncation_psi', type=float, default=0.5, help='Controls truncation (trading off variation for quality). If 1, truncation is disabled.')
+    parser.add_argument('--seeds', type=int, nargs='*', default=[0], help='List of random seeds.')
+    args = parser.parse_args()
+    os.makedirs(args.out_path, exist_ok=True)
+    generate_images(args)

main.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import argparse
+import os
+import jax
+import wandb
+import training
+import logging
+import json
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)-5.5s] [%(name)-12.12s]: %(message)s', force=True)
+logger = logging.getLogger(__name__)
+def main():
+    parser = argparse.ArgumentParser()
+    # Paths
+    parser.add_argument('--data_dir', type=str, required=True, help='Directory of the dataset.')
+    parser.add_argument('--save_dir', type=str, default='gs://ig-standard-usc1/sg2-flax/checkpoints/', help='Directory where checkpoints will be written to. A subfolder with run_id will be created.')
+    parser.add_argument('--load_from_pkl', type=str, help='If provided, start training from an existing checkpoint pickle file.')
+    parser.add_argument('--resume_run_id', type=str, help='If provided, resume existing training run. If --wandb is enabled W&B will also resume.')
+    parser.add_argument('--project', type=str, default='sg2-flax', help='Name of this project.')
+    # Training
+    parser.add_argument('--num_epochs', type=int, default=10000, help='Number of epochs.')
+    parser.add_argument('--learning_rate', type=float, default=0.002, help='Learning rate.')
+    parser.add_argument('--batch_size', type=int, default=8, help='Batch size.')
+    parser.add_argument('--num_prefetch', type=int, default=2, help='Number of prefetched examples for the data pipeline.')
+    parser.add_argument('--resolution', type=int, default=128, help='Image resolution. Must be a multiple of 2.')
+    parser.add_argument('--img_channels', type=int, default=3, help='Number of image channels.')
+    parser.add_argument('--mixed_precision', action='store_true', help='Use mixed precision training.')
+    parser.add_argument('--random_seed', type=int, default=0, help='Random seed.')
+    parser.add_argument('--bf16', action='store_true', help='Use bf16 dtype (This is still WIP).')
+    # Generator
+    parser.add_argument('--fmap_base', type=int, default=16384, help='Overall multiplier for the number of feature maps.')
+    # Discriminator
+    parser.add_argument('--mbstd_group_size', type=int, help='Group size for the minibatch standard deviation layer, None = entire minibatch.')
+    # Exponentially Moving Average of Generator Weights
+    parser.add_argument('--ema_kimg', type=float, default=20.0, help='Controls the ema of the generator weights (larger value -> larger beta).')
+    # Losses
+    parser.add_argument('--pl_decay', type=float, default=0.01, help='Exponentially decay for mean of path length (Path length regul).')
+    parser.add_argument('--pl_weight', type=float, default=2, help='Weight for path length regularization.')
+    # Regularization
+    parser.add_argument('--mixing_prob', type=float, default=0.9, help='Probability for style mixing.')
+    parser.add_argument('--G_reg_interval', type=int, default=4, help='How often to perform regularization for G.')
+    parser.add_argument('--D_reg_interval', type=int, default=16, help='How often to perform regularization for D.')
+    parser.add_argument('--r1_gamma', type=float, default=10.0, help='Weight for R1 regularization.')
+    # Model
+    parser.add_argument('--z_dim', type=int, default=512, help='Input latent (Z) dimensionality.')
+    parser.add_argument('--c_dim', type=int, default=0, help='Conditioning label (C) dimensionality, 0 = no label.')
+    parser.add_argument('--w_dim', type=int, default=512, help='Conditioning label (W) dimensionality.')
+    # Logging
+    parser.add_argument('--log_every', type=int, default=100, help='Log every log_every steps.')
+    parser.add_argument('--save_every', type=int, default=2000, help='Save every save_every steps. Will be ignored if FID evaluation is enabled.')
+    parser.add_argument('--generate_samples_every', type=int, default=10000, help='Generate samples every generate_samples_every steps.')
+    parser.add_argument('--debug', action='store_true', help='Show debug log.')
+    # FID
+    parser.add_argument('--eval_fid_every', type=int, default=1000, help='Compute FID score every eval_fid_every steps.')
+    parser.add_argument('--num_fid_images', type=int, default=10000, help='Number of images to use for FID computation.')
+    parser.add_argument('--disable_fid', action='store_true', help='Disable FID evaluation.')
+    # W&B
+    parser.add_argument('--wandb', action='store_true', help='Log to Weights&Biases.')
+    parser.add_argument('--name', type=str, default=None, help='Name of this experiment in Weights&Biases.')
+    parser.add_argument('--entity', type=str, default='nyxai', help='Entity for this experiment in Weights&Biases.')
+    parser.add_argument('--group', type=str, default=None, help='Group name of this experiment for Weights&Biases.')
+    args = parser.parse_args()
+    # debug mode
+    if args.debug:
+        logging.getLogger().setLevel(logging.DEBUG)
+    # some validation
+    if args.resume_run_id is not None:
+        assert args.load_from_pkl is None, 'When resuming a run one cannot also specify --load_from_pkl'
+    # set unique Run ID
+    if args.resume_run_id:
+        resume = 'must'  # throw error if cannot find id to be resumed
+        args.run_id = args.resume_run_id
+    else:
+        resume = None  # default
+        args.run_id = wandb.util.generate_id()
+    args.ckpt_dir = os.path.join(args.save_dir, args.run_id)
+    if jax.process_index() == 0:
+        if not args.ckpt_dir.startswith('gs://') and not os.path.exists(args.ckpt_dir):
+            os.makedirs(args.ckpt_dir)
+        if args.wandb:
+            wandb.init(id=args.run_id,
+                       project=args.project,
+                       group=args.group,
+                       config=args,
+                       name=args.name,
+                       entity=args.entity,
+                       resume=resume)
+        logger.info('Starting new run with config:')
+        print(json.dumps(vars(args), indent=4))
+    training.train_and_evaluate(args)
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+flaxmodels==0.1.1
+flax==0.4.1
+jax==0.3.14
+tensorflow==2.4.1
+optax==0.0.9
+numpy
+tensorflow-datasets
+argparse
+wandb
+tqdm
+dill
+h5py
+dataclasses
+tqdm

stylegan2/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .generator import SynthesisNetwork
+from .generator import MappingNetwork
+from .generator import Generator
+from .discriminator import Discriminator

stylegan2/discriminator.py ADDED Viewed

	@@ -0,0 +1,451 @@

+import numpy as np
+import jax
+from jax import random
+import jax.numpy as jnp
+import flax.linen as nn
+from typing import Any, Tuple, List, Callable
+import h5py
+from . import ops
+from stylegan2 import utils
+URLS = {'afhqcat': 'https://www.dropbox.com/s/qygbjkefyqyu9k9/stylegan2_discriminator_afhqcat.h5?dl=1',
+        'afhqdog': 'https://www.dropbox.com/s/kmoxbp33qswz64p/stylegan2_discriminator_afhqdog.h5?dl=1',
+        'afhqwild': 'https://www.dropbox.com/s/jz1hpsyt3isj6e7/stylegan2_discriminator_afhqwild.h5?dl=1',
+        'brecahad': 'https://www.dropbox.com/s/h0cb89hruo6pmyj/stylegan2_discriminator_brecahad.h5?dl=1',
+        'car': 'https://www.dropbox.com/s/2ghjrmxih7cic76/stylegan2_discriminator_car.h5?dl=1',
+        'cat': 'https://www.dropbox.com/s/zfhjsvlsny5qixd/stylegan2_discriminator_cat.h5?dl=1',
+        'church': 'https://www.dropbox.com/s/jlno7zeivkjtk8g/stylegan2_discriminator_church.h5?dl=1',
+        'cifar10': 'https://www.dropbox.com/s/eldpubfkl4c6rur/stylegan2_discriminator_cifar10.h5?dl=1',
+        'ffhq': 'https://www.dropbox.com/s/m42qy9951b7lq1s/stylegan2_discriminator_ffhq.h5?dl=1',
+        'horse': 'https://www.dropbox.com/s/19f5pxrcdh2g8cw/stylegan2_discriminator_horse.h5?dl=1',
+        'metfaces': 'https://www.dropbox.com/s/xnokaunql12glkd/stylegan2_discriminator_metfaces.h5?dl=1'}
+RESOLUTION = {'metfaces': 1024,
+              'ffhq': 1024,
+              'church': 256,
+              'cat': 256,
+              'horse': 256,
+              'car': 512,
+              'brecahad': 512,
+              'afhqwild': 512,
+              'afhqdog': 512,
+              'afhqcat': 512,
+              'cifar10': 32}
+C_DIM = {'metfaces': 0,
+         'ffhq': 0,
+         'church': 0,
+         'cat': 0,
+         'horse': 0,
+         'car': 0,
+         'brecahad': 0,
+         'afhqwild': 0,
+         'afhqdog': 0,
+         'afhqcat': 0,
+         'cifar10': 10}
+ARCHITECTURE = {'metfaces': 'resnet',
+                'ffhq': 'resnet',
+                'church': 'resnet',
+                'cat': 'resnet',
+                'horse': 'resnet',
+                'car': 'resnet',
+                'brecahad': 'resnet',
+                'afhqwild': 'resnet',
+                'afhqdog': 'resnet',
+                'afhqcat': 'resnet',
+                'cifar10': 'orig'}
+MBSTD_GROUP_SIZE = {'metfaces': None,
+                    'ffhq': None,
+                    'church': None,
+                    'cat': None,
+                    'horse': None,
+                    'car': None,
+                    'brecahad': None,
+                    'afhqwild': None,
+                    'afhqdog': None,
+                    'afhqcat': None,
+                    'cifar10': 32}
+class FromRGBLayer(nn.Module):
+    """
+    From RGB Layer.
+    Attributes:
+        fmaps (int): Number of output channels of the convolution.
+        kernel (int): Kernel size of the convolution.
+        lr_multiplier (float): Learning rate multiplier.
+        activation (str): Activation function: 'relu', 'lrelu', etc.
+        param_dict (h5py.Group): Parameter dict with pretrained parameters. If not None, 'pretrained' will be ignored.
+        clip_conv (float): Clip the output of convolution layers to [-clip_conv, +clip_conv], None = disable clipping.
+        dtype (str): Data dtype.
+        rng (jax.random.PRNGKey): PRNG for initialization.
+    """
+    fmaps: int
+    kernel: int=1
+    lr_multiplier: float=1
+    activation: str='leaky_relu'
+    param_dict: h5py.Group=None
+    clip_conv: float=None
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    @nn.compact
+    def __call__(self, x, y):
+        """
+        Run From RGB Layer.
+        Args:
+            x (tensor): Input image of shape [N, H, W, num_channels].
+            y (tensor): Input tensor of shape [N, H, W, out_channels].
+        Returns:
+            (tensor): Output tensor of shape [N, H, W, out_channels].
+        """
+        w_shape = [self.kernel, self.kernel, x.shape[3], self.fmaps]
+        w, b = ops.get_weight(w_shape, self.lr_multiplier, True, self.param_dict, 'fromrgb', self.rng)
+        w = self.param(name='weight', init_fn=lambda *_ : w)
+        b = self.param(name='bias', init_fn=lambda *_ : b)
+        w = ops.equalize_lr_weight(w, self.lr_multiplier)
+        b = ops.equalize_lr_bias(b, self.lr_multiplier)
+        x = x.astype(self.dtype)
+        x = ops.conv2d(x, w.astype(x.dtype))
+        x += b.astype(x.dtype)
+        x = ops.apply_activation(x, activation=self.activation)
+        if self.clip_conv is not None:
+            x = jnp.clip(x, -self.clip_conv, self.clip_conv)
+        if y is not None:
+            x += y
+        return x
+class DiscriminatorLayer(nn.Module):
+    """
+    Discriminator Layer.
+    Attributes:
+        fmaps (int): Number of output channels of the convolution.
+        kernel (int): Kernel size of the convolution.
+        use_bias (bool): If True, use bias.
+        down (bool): If True, downsample the spatial resolution.
+        resample_kernel (Tuple): Kernel that is used for FIR filter.
+        activation (str): Activation function: 'relu', 'lrelu', etc.
+        layer_name (str): Layer name.
+        param_dict (h5py.Group): Parameter dict with pretrained parameters.
+        lr_multiplier (float): Learning rate multiplier.
+        clip_conv (float): Clip the output of convolution layers to [-clip_conv, +clip_conv], None = disable clipping.
+        dtype (str): Data dtype.
+        rng (jax.random.PRNGKey): PRNG for initialization.
+    """
+    fmaps: int
+    kernel: int=3
+    use_bias: bool=True
+    down: bool=False
+    resample_kernel: Tuple=None
+    activation: str='leaky_relu'
+    layer_name: str=None
+    param_dict: h5py.Group=None
+    lr_multiplier: float=1
+    clip_conv: float=None
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    @nn.compact
+    def __call__(self, x):
+        """
+        Run Discriminator Layer.
+        Args:
+            x (tensor): Input tensor of shape [N, H, W, C].
+        Returns:
+            (tensor): Output tensor of shape [N, H, W, fmaps].
+        """
+        w_shape = [self.kernel, self.kernel, x.shape[3], self.fmaps]
+        if self.use_bias:
+            w, b = ops.get_weight(w_shape, self.lr_multiplier, self.use_bias, self.param_dict, self.layer_name, self.rng)
+        else:
+            w = ops.get_weight(w_shape, self.lr_multiplier, self.use_bias, self.param_dict, self.layer_name, self.rng)
+        w = self.param(name='weight', init_fn=lambda *_ : w)
+        w = ops.equalize_lr_weight(w, self.lr_multiplier)
+        if self.use_bias:
+            b = self.param(name='bias', init_fn=lambda *_ : b)
+            b = ops.equalize_lr_bias(b, self.lr_multiplier)
+        x = x.astype(self.dtype)
+        x = ops.conv2d(x, w, down=self.down, resample_kernel=self.resample_kernel)
+        if self.use_bias: x += b.astype(x.dtype)
+        x = ops.apply_activation(x, activation=self.activation)
+        if self.clip_conv is not None:
+            x = jnp.clip(x, -self.clip_conv, self.clip_conv)
+        return x
+class DiscriminatorBlock(nn.Module):
+    """
+    Discriminator Block.
+    Attributes:
+        fmaps (int): Number of output channels of the convolution.
+        kernel (int): Kernel size of the convolution.
+        resample_kernel (Tuple): Kernel that is used for FIR filter.
+        activation (str): Activation function: 'relu', 'lrelu', etc.
+        param_dict (h5py.Group): Parameter dict with pretrained parameters.
+        lr_multiplier (float): Learning rate multiplier.
+        architecture (str): Architecture: 'orig', 'resnet'.
+        nf (Callable): Callable that returns the number of feature maps for a given layer.
+        clip_conv (float): Clip the output of convolution layers to [-clip_conv, +clip_conv], None = disable clipping.
+        dtype (str): Data dtype.
+        rng (jax.random.PRNGKey): Random seed for initialization.
+    """
+    res: int
+    kernel: int=3
+    resample_kernel: Tuple=(1, 3, 3, 1)
+    activation: str='leaky_relu'
+    param_dict: Any=None
+    lr_multiplier: float=1
+    architecture: str='resnet'
+    nf: Callable=None
+    clip_conv: float=None
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    @nn.compact
+    def __call__(self, x):
+        """
+        Run Discriminator Block.
+        Args:
+            x (tensor): Input tensor of shape [N, H, W, C].
+        Returns:
+            (tensor): Output tensor of shape [N, H, W, fmaps].
+        """
+        init_rng = self.rng
+        x = x.astype(self.dtype)
+        residual = x
+        for i in range(2):
+            init_rng, init_key = random.split(init_rng)
+            x = DiscriminatorLayer(fmaps=self.nf(self.res - (i + 1)),
+                                   kernel=self.kernel,
+                                   down=i == 1,
+                                   resample_kernel=self.resample_kernel if i == 1 else None,
+                                   activation=self.activation,
+                                   layer_name=f'conv{i}',
+                                   param_dict=self.param_dict,
+                                   lr_multiplier=self.lr_multiplier,
+                                   clip_conv=self.clip_conv,
+                                   dtype=self.dtype,
+                                   rng=init_key)(x)
+        if self.architecture == 'resnet':
+            init_rng, init_key = random.split(init_rng)
+            residual = DiscriminatorLayer(fmaps=self.nf(self.res - 2),
+                                          kernel=1,
+                                          use_bias=False,
+                                          down=True,
+                                          resample_kernel=self.resample_kernel,
+                                          activation='linear',
+                                          layer_name='skip',
+                                          param_dict=self.param_dict,
+                                          lr_multiplier=self.lr_multiplier,
+                                          dtype=self.dtype,
+                                          rng=init_key)(residual)
+            x = (x + residual) * np.sqrt(0.5, dtype=x.dtype)
+        return x
+class Discriminator(nn.Module):
+    """
+    Discriminator.
+    Attributes:
+        resolution (int): Input resolution. Overridden based on dataset.
+        num_channels (int): Number of input color channels. Overridden based on dataset.
+        c_dim (int): Dimensionality of the labels (c), 0 if no labels. Overrttten based on dataset.
+        fmap_base (int): Overall multiplier for the number of feature maps.
+        fmap_decay (int): Log2 feature map reduction when doubling the resolution.
+        fmap_min (int): Minimum number of feature maps in any layer.
+        fmap_max (int): Maximum number of feature maps in any layer.
+        mapping_layers (int): Number of additional mapping layers for the conditioning labels.
+        mapping_fmaps (int): Number of activations in the mapping layers, None = default.
+        mapping_lr_multiplier (float): Learning rate multiplier for the mapping layers.
+        architecture (str): Architecture: 'orig', 'resnet'.
+        activation (int): Activation function: 'relu', 'leaky_relu', etc.
+        mbstd_group_size (int): Group size for the minibatch standard deviation layer, None = entire minibatch.
+        mbstd_num_features (int): Number of features for the minibatch standard deviation layer, 0 = disable.
+        resample_kernel (Tuple): Low-pass filter to apply when resampling activations, None = box filter.
+        num_fp16_res (int): Use float16 for the 'num_fp16_res' highest resolutions.
+        clip_conv (float): Clip the output of convolution layers to [-clip_conv, +clip_conv], None = disable clipping.
+        pretrained (str): Use pretrained model, None for random initialization.
+        ckpt_dir (str): Directory to which the pretrained weights are downloaded. If None, a temp directory will be used.
+        dtype (str): Data type.
+        rng (jax.random.PRNGKey): PRNG for initialization.
+    """
+    # Input dimensions.
+    resolution: int=1024
+    num_channels: int=3
+    c_dim: int=0
+    # Capacity.
+    fmap_base: int=16384
+    fmap_decay: int=1
+    fmap_min: int=1
+    fmap_max: int=512
+    # Internal details.
+    mapping_layers: int=0
+    mapping_fmaps: int=None
+    mapping_lr_multiplier: float=0.1
+    architecture: str='resnet'
+    activation: str='leaky_relu'
+    mbstd_group_size: int=None
+    mbstd_num_features: int=1
+    resample_kernel: Tuple=(1, 3, 3, 1)
+    num_fp16_res: int=0
+    clip_conv: float=None
+    # Pretraining
+    pretrained: str=None
+    ckpt_dir: str=None
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    def setup(self):
+        self.resolution_ = self.resolution
+        self.c_dim_ = self.c_dim
+        self.architecture_ = self.architecture
+        self.mbstd_group_size_ = self.mbstd_group_size
+        self.param_dict = None
+        if self.pretrained is not None:
+            assert self.pretrained in URLS.keys(), f'Pretrained model not available: {self.pretrained}'
+            ckpt_file = utils.download(self.ckpt_dir, URLS[self.pretrained])
+            self.param_dict = h5py.File(ckpt_file, 'r')['discriminator']
+            self.resolution_ = RESOLUTION[self.pretrained]
+            self.architecture_ = ARCHITECTURE[self.pretrained]
+            self.mbstd_group_size_ = MBSTD_GROUP_SIZE[self.pretrained]
+            self.c_dim_ = C_DIM[self.pretrained]
+        assert self.architecture in ['orig', 'resnet']
+    @nn.compact
+    def __call__(self, x, c=None):
+        """
+        Run Discriminator.
+        Args:
+            x (tensor): Input image of shape [N, H, W, num_channels].
+            c (tensor): Input labels, shape [N, c_dim].
+        Returns:
+            (tensor): Output tensor of shape [N, 1].
+        """
+        resolution_log2 = int(np.log2(self.resolution_))
+        assert self.resolution_ == 2**resolution_log2 and self.resolution_ >= 4
+        def nf(stage): return np.clip(int(self.fmap_base / (2.0 ** (stage * self.fmap_decay))), self.fmap_min, self.fmap_max)
+        if self.mapping_fmaps is None:
+            mapping_fmaps = nf(0)
+        else:
+            mapping_fmaps = self.mapping_fmaps
+        init_rng = self.rng
+        # Label embedding and mapping.
+        if self.c_dim_ > 0:
+            c = ops.LinearLayer(in_features=self.c_dim_,
+                                out_features=mapping_fmaps,
+                                lr_multiplier=self.mapping_lr_multiplier,
+                                param_dict=self.param_dict,
+                                layer_name='label_embedding',
+                                dtype=self.dtype,
+                                rng=init_rng)(c)
+            c = ops.normalize_2nd_moment(c)
+            for i in range(self.mapping_layers):
+                init_rng, init_key = random.split(init_rng)
+                c = ops.LinearLayer(in_features=self.c_dim_,
+                                    out_features=mapping_fmaps,
+                                    lr_multiplier=self.mapping_lr_multiplier,
+                                    param_dict=self.param_dict,
+                                    layer_name=f'fc{i}',
+                                    dtype=self.dtype,
+                                    rng=init_key)(c)
+        # Layers for >=8x8 resolutions.
+        y = None
+        for res in range(resolution_log2, 2, -1):
+            res_str = f'block_{2**res}x{2**res}'
+            if res == resolution_log2:
+                init_rng, init_key = random.split(init_rng)
+                x = FromRGBLayer(fmaps=nf(res - 1),
+                                 kernel=1,
+                                 activation=self.activation,
+                                 param_dict=self.param_dict[res_str] if self.param_dict is not None else None,
+                                 clip_conv=self.clip_conv,
+                                 dtype=self.dtype if res >= resolution_log2 + 1 - self.num_fp16_res else 'float32',
+                                 rng=init_key)(x, y)
+            init_rng, init_key = random.split(init_rng)
+            x = DiscriminatorBlock(res=res,
+                                   kernel=3,
+                                   resample_kernel=self.resample_kernel,
+                                   activation=self.activation,
+                                   param_dict=self.param_dict[res_str] if self.param_dict is not None else None,
+                                   architecture=self.architecture_,
+                                   nf=nf,
+                                   clip_conv=self.clip_conv,
+                                   dtype=self.dtype if res >= resolution_log2 + 1 - self.num_fp16_res else 'float32',
+                                   rng=init_key)(x)
+        # Layers for 4x4 resolution.
+        dtype = jnp.float32
+        x = x.astype(dtype)
+        if self.mbstd_num_features > 0:
+            x = ops.minibatch_stddev_layer(x, self.mbstd_group_size_, self.mbstd_num_features)
+        init_rng, init_key = random.split(init_rng)
+        x = DiscriminatorLayer(fmaps=nf(1),
+                               kernel=3,
+                               use_bias=True,
+                               activation=self.activation,
+                               layer_name='conv0',
+                               param_dict=self.param_dict['block_4x4'] if self.param_dict is not None else None,
+                               clip_conv=self.clip_conv,
+                               dtype=dtype,
+                               rng=init_rng)(x)
+        # Switch to NCHW so that the pretrained weights still work after reshaping
+        x = jnp.transpose(x, axes=(0, 3, 1, 2))
+        x = jnp.reshape(x, newshape=(-1, x.shape[1] * x.shape[2] * x.shape[3]))
+        init_rng, init_key = random.split(init_rng)
+        x = ops.LinearLayer(in_features=x.shape[1],
+                            out_features=nf(0),
+                            activation=self.activation,
+                            param_dict=self.param_dict['block_4x4'] if self.param_dict is not None else None,
+                            layer_name='fc0',
+                            dtype=dtype,
+                            rng=init_key)(x)
+        # Output layer.
+        init_rng, init_key = random.split(init_rng)
+        x = ops.LinearLayer(in_features=x.shape[1],
+                            out_features=1 if self.c_dim_ == 0 else mapping_fmaps,
+                            param_dict=self.param_dict,
+                            layer_name='output',
+                            dtype=dtype,
+                            rng=init_key)(x)
+        if self.c_dim_ > 0:
+            x = jnp.sum(x * c, axis=1, keepdims=True) / jnp.sqrt(mapping_fmaps)
+        return x

stylegan2/generator.py ADDED Viewed

	@@ -0,0 +1,713 @@

+import numpy as np
+import jax
+from jax import random
+import jax.numpy as jnp
+import flax.linen as nn
+from typing import Any, Tuple, List
+import h5py
+from . import ops
+from stylegan2 import utils
+URLS = {'afhqcat': 'https://www.dropbox.com/s/lv1r0bwvg5ta51f/stylegan2_generator_afhqcat.h5?dl=1',
+        'afhqdog': 'https://www.dropbox.com/s/px6ply9hv0vdwen/stylegan2_generator_afhqdog.h5?dl=1',
+        'afhqwild': 'https://www.dropbox.com/s/p1slbtmzhcnw9q8/stylegan2_generator_afhqwild.h5?dl=1',
+        'brecahad': 'https://www.dropbox.com/s/28uykhj0ku6hwg2/stylegan2_generator_brecahad.h5?dl=1',
+        'car': 'https://www.dropbox.com/s/67o834b6xfg9x1q/stylegan2_generator_car.h5?dl=1',
+        'cat': 'https://www.dropbox.com/s/cu9egc4e74e1nig/stylegan2_generator_cat.h5?dl=1',
+        'church': 'https://www.dropbox.com/s/kwvokfwbrhsn58m/stylegan2_generator_church.h5?dl=1',
+        'cifar10': 'https://www.dropbox.com/s/h1kmymjzfwwkftk/stylegan2_generator_cifar10.h5?dl=1',
+        'ffhq': 'https://www.dropbox.com/s/e8de1peq7p8gq9d/stylegan2_generator_ffhq.h5?dl=1',
+        'horse': 'https://www.dropbox.com/s/3e5bimv2d41bc13/stylegan2_generator_horse.h5?dl=1',
+        'metfaces': 'https://www.dropbox.com/s/75klr5k6mgm7qdy/stylegan2_generator_metfaces.h5?dl=1'}
+RESOLUTION = {'metfaces': 1024,
+              'ffhq': 1024,
+              'church': 256,
+              'cat': 256,
+              'horse': 256,
+              'car': 512,
+              'brecahad': 512,
+              'afhqwild': 512,
+              'afhqdog': 512,
+              'afhqcat': 512,
+              'cifar10': 32}
+C_DIM = {'metfaces': 0,
+         'ffhq': 0,
+         'church': 0,
+         'cat': 0,
+         'horse': 0,
+         'car': 0,
+         'brecahad': 0,
+         'afhqwild': 0,
+         'afhqdog': 0,
+         'afhqcat': 0,
+         'cifar10': 10}
+NUM_MAPPING_LAYERS = {'metfaces': 8,
+                      'ffhq': 8,
+                      'church': 8,
+                      'cat': 8,
+                      'horse': 8,
+                      'car': 8,
+                      'brecahad': 8,
+                      'afhqwild': 8,
+                      'afhqdog': 8,
+                      'afhqcat': 8,
+                      'cifar10': 2}
+class MappingNetwork(nn.Module):
+    """
+    Mapping Network.
+    Attributes:
+        z_dim (int): Input latent (Z) dimensionality.
+        c_dim (int): Conditioning label (C) dimensionality, 0 = no label.
+        w_dim (int): Intermediate latent (W) dimensionality.
+        embed_features (int): Label embedding dimensionality, None = same as w_dim.
+        layer_features (int): Number of intermediate features in the mapping layers, None = same as w_dim.
+        num_ws (int): Number of intermediate latents to output, None = do not broadcast.
+        num_layers (int): Number of mapping layers.
+        pretrained (str): Which pretrained model to use, None for random initialization.
+        param_dict (h5py.Group): Parameter dict with pretrained parameters. If not None, 'pretrained' will be ignored.
+        ckpt_dir (str): Directory to which the pretrained weights are downloaded. If None, a temp directory will be used.
+        activation (str): Activation function: 'relu', 'lrelu', etc.
+        lr_multiplier (float): Learning rate multiplier for the mapping layers.
+        w_avg_beta (float): Decay for tracking the moving average of W during training, None = do not track.
+        dtype (str): Data type.
+        rng (jax.random.PRNGKey): PRNG for initialization.
+    """
+    # Dimensionality
+    z_dim: int=512
+    c_dim: int=0
+    w_dim: int=512
+    embed_features: int=None
+    layer_features: int=512
+    # Layers
+    num_ws: int=18
+    num_layers: int=8
+    # Pretrained
+    pretrained: str=None
+    param_dict: h5py.Group=None
+    ckpt_dir: str=None
+    # Internal details
+    activation: str='leaky_relu'
+    lr_multiplier: float=0.01
+    w_avg_beta: float=0.995
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    def setup(self):
+        self.embed_features_ = self.embed_features
+        self.c_dim_ = self.c_dim
+        self.layer_features_ = self.layer_features
+        self.num_layers_ = self.num_layers
+        self.param_dict_ = self.param_dict
+        if self.pretrained is not None and self.param_dict is None:
+            assert self.pretrained in URLS.keys(), f'Pretrained model not available: {self.pretrained}'
+            ckpt_file = utils.download(self.ckpt_dir, URLS[self.pretrained])
+            self.param_dict_ = h5py.File(ckpt_file, 'r')['mapping_network']
+            self.c_dim_ = C_DIM[self.pretrained]
+            self.num_layers_ = NUM_MAPPING_LAYERS[self.pretrained]
+        if self.embed_features_ is None:
+            self.embed_features_ = self.w_dim
+        if self.c_dim_ == 0:
+            self.embed_features_ = 0
+        if self.layer_features_ is None:
+            self.layer_features_ = self.w_dim
+        if self.param_dict_ is not None and 'w_avg' in self.param_dict_:
+            self.w_avg = self.variable('moving_stats', 'w_avg', lambda *_ : jnp.array(self.param_dict_['w_avg']), [self.w_dim])
+        else:
+            self.w_avg = self.variable('moving_stats', 'w_avg', jnp.zeros, [self.w_dim])
+    @nn.compact
+    def __call__(self, z, c=None, truncation_psi=1, truncation_cutoff=None, skip_w_avg_update=False, train=True):
+        """
+        Run Mapping Network.
+        Args:
+            z (tensor): Input noise, shape [N, z_dim].
+            c (tensor): Input labels, shape [N, c_dim].
+            truncation_psi (float): Controls truncation (trading off variation for quality). If 1, truncation is disabled.
+            truncation_cutoff (int): Controls truncation. None = disable.
+            skip_w_avg_update (bool): If True, updates the exponential moving average of W.
+            train (bool): Training mode.
+        Returns:
+            (tensor): Intermediate latent W.
+        """
+        init_rng = self.rng
+        # Embed, normalize, and concat inputs.
+        x = None
+        if self.z_dim > 0:
+            x = ops.normalize_2nd_moment(z.astype(jnp.float32))
+        if self.c_dim_ > 0:
+            # Conditioning label
+            y = ops.LinearLayer(in_features=self.c_dim_,
+                                out_features=self.embed_features_,
+                                use_bias=True,
+                                lr_multiplier=self.lr_multiplier,
+                                activation='linear',
+                                param_dict=self.param_dict_,
+                                layer_name='label_embedding',
+                                dtype=self.dtype,
+                                rng=init_rng)(c.astype(jnp.float32))
+            y = ops.normalize_2nd_moment(y)
+            x = jnp.concatenate((x, y), axis=1) if x is not None else y
+        # Main layers.
+        for i in range(self.num_layers_):
+            init_rng, init_key = random.split(init_rng)
+            x = ops.LinearLayer(in_features=x.shape[1],
+                                out_features=self.layer_features_,
+                                use_bias=True,
+                                lr_multiplier=self.lr_multiplier,
+                                activation=self.activation,
+                                param_dict=self.param_dict_,
+                                layer_name=f'fc{i}',
+                                dtype=self.dtype,
+                                rng=init_key)(x)
+        # Update moving average of W.
+        if self.w_avg_beta is not None and train and not skip_w_avg_update:
+            self.w_avg.value = self.w_avg_beta * self.w_avg.value + (1 - self.w_avg_beta) * jnp.mean(x, axis=0)
+        # Broadcast.
+        if self.num_ws is not None:
+            x = jnp.repeat(jnp.expand_dims(x, axis=-2), repeats=self.num_ws, axis=-2)
+        # Apply truncation.
+        if truncation_psi != 1:
+            assert self.w_avg_beta is not None
+            if self.num_ws is None or truncation_cutoff is None:
+                x = truncation_psi * x + (1 - truncation_psi) * self.w_avg.value
+            else:
+                x[:, :truncation_cutoff] = truncation_psi * x[:, :truncation_cutoff] + (1 - truncation_psi) * self.w_avg.value
+        return x
+class SynthesisLayer(nn.Module):
+    """
+    Synthesis Layer.
+    Attributes:
+        fmaps (int): Number of output channels of the modulated convolution.
+        kernel (int): Kernel size of the modulated convolution.
+        layer_idx (int): Layer index. Used to access the latent code for a specific layer.
+        res (int): Resolution (log2) of the current layer.
+        lr_multiplier (float): Learning rate multiplier.
+        up (bool): If True, upsample the spatial resolution.
+        activation (str): Activation function: 'relu', 'lrelu', etc.
+        use_noise (bool): If True, add spatial-specific noise.
+        resample_kernel (Tuple): Kernel that is used for FIR filter.
+        fused_modconv (bool): If True, Perform modulation, convolution, and demodulation as a single fused operation.
+        param_dict (h5py.Group): Parameter dict with pretrained parameters. If not None, 'pretrained' will be ignored.
+        clip_conv (float): Clip the output of convolution layers to [-clip_conv, +clip_conv], None = disable clipping.
+        dtype (str): Data dtype.
+        rng (jax.random.PRNGKey): PRNG for initialization.
+    """
+    fmaps: int
+    kernel: int
+    layer_idx: int
+    res: int
+    lr_multiplier: float=1
+    up: bool=False
+    activation: str='leaky_relu'
+    use_noise: bool=True
+    resample_kernel: Tuple=(1, 3, 3, 1)
+    fused_modconv: bool=False
+    param_dict: h5py.Group=None
+    clip_conv: float=None
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    def setup(self):
+        if self.param_dict is not None:
+            noise_const = jnp.array(self.param_dict['noise_const'], dtype=self.dtype)
+        else:
+            noise_const = random.normal(self.rng, shape=(1, 2 ** self.res, 2 ** self.res, 1), dtype=self.dtype)
+        self.noise_const = self.variable('noise_consts', 'noise_const', lambda *_: noise_const)
+    @nn.compact
+    def __call__(self, x, dlatents, noise_mode='random', rng=random.PRNGKey(0)):
+        """
+        Run Synthesis Layer.
+        Args:
+            x (tensor): Input tensor of the shape [N, H, W, C].
+            dlatents (tensor): Intermediate latents (W) of shape [N, num_ws, w_dim].
+            noise_mode (str): Noise type.
+                              - 'const': Constant noise.
+                              - 'random': Random noise.
+                              - 'none': No noise.
+            rng (jax.random.PRNGKey): PRNG for spatialwise noise.
+        Returns:
+            (tensor): Output tensor of shape [N, H', W', fmaps].
+        """
+        assert noise_mode in ['const', 'random', 'none']
+        linear_rng, conv_rng = random.split(self.rng)
+        # Affine transformation to obtain style variable.
+        s = ops.LinearLayer(in_features=dlatents[:, self.layer_idx].shape[1],
+                            out_features=x.shape[3],
+                            use_bias=True,
+                            bias_init=1,
+                            lr_multiplier=self.lr_multiplier,
+                            param_dict=self.param_dict,
+                            layer_name='affine',
+                            dtype=self.dtype,
+                            rng=linear_rng)(dlatents[:, self.layer_idx])
+        # Noise variables.
+        if self.param_dict is None:
+            noise_strength = jnp.zeros(())
+        else:
+            noise_strength = jnp.array(self.param_dict['noise_strength'])
+        noise_strength = self.param(name='noise_strength', init_fn=lambda *_ : noise_strength)
+        # Weight and bias for convolution operation.
+        w_shape = [self.kernel, self.kernel, x.shape[3], self.fmaps]
+        w, b = ops.get_weight(w_shape, self.lr_multiplier, True, self.param_dict, 'conv', conv_rng)
+        w = self.param(name='weight', init_fn=lambda *_ : w)
+        b = self.param(name='bias', init_fn=lambda *_ : b)
+        w = ops.equalize_lr_weight(w, self.lr_multiplier)
+        b = ops.equalize_lr_bias(b, self.lr_multiplier)
+        x = ops.modulated_conv2d_layer(x=x,
+                                       w=w,
+                                       s=s,
+                                       fmaps=self.fmaps,
+                                       kernel=self.kernel,
+                                       up=self.up,
+                                       resample_kernel=self.resample_kernel,
+                                       fused_modconv=self.fused_modconv)
+        if self.use_noise and noise_mode != 'none':
+            if noise_mode == 'const':
+                noise = self.noise_const.value
+            elif noise_mode == 'random':
+                noise = random.normal(rng, shape=(x.shape[0], x.shape[1], x.shape[2], 1), dtype=self.dtype)
+            x += noise * noise_strength.astype(self.dtype)
+        x += b.astype(x.dtype)
+        x = ops.apply_activation(x, activation=self.activation)
+        if self.clip_conv is not None:
+            x = jnp.clip(x, -self.clip_conv, self.clip_conv)
+        return x
+class ToRGBLayer(nn.Module):
+    """
+    To RGB Layer.
+    Attributes:
+        fmaps (int): Number of output channels of the modulated convolution.
+        layer_idx (int): Layer index. Used to access the latent code for a specific layer.
+        kernel (int): Kernel size of the modulated convolution.
+        lr_multiplier (float): Learning rate multiplier.
+        fused_modconv (bool): If True, Perform modulation, convolution, and demodulation as a single fused operation.
+        param_dict (h5py.Group): Parameter dict with pretrained parameters. If not None, 'pretrained' will be ignored.
+        clip_conv (float): Clip the output of convolution layers to [-clip_conv, +clip_conv], None = disable clipping.
+        dtype (str): Data dtype.
+        rng (jax.random.PRNGKey): PRNG for initialization.
+    """
+    fmaps: int
+    layer_idx: int
+    kernel: int=1
+    lr_multiplier: float=1
+    fused_modconv: bool=False
+    param_dict: h5py.Group=None
+    clip_conv: float=None
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    @nn.compact
+    def __call__(self, x, y, dlatents):
+        """
+        Run To RGB Layer.
+        Args:
+            x (tensor): Input tensor of shape [N, H, W, C].
+            y (tensor): Image of shape [N, H', W', fmaps].
+            dlatents (tensor): Intermediate latents (W) of shape [N, num_ws, w_dim].
+        Returns:
+            (tensor): Output tensor of shape [N, H', W', fmaps].
+        """
+        # Affine transformation to obtain style variable.
+        s = ops.LinearLayer(in_features=dlatents[:, self.layer_idx].shape[1],
+                            out_features=x.shape[3],
+                            use_bias=True,
+                            bias_init=1,
+                            lr_multiplier=self.lr_multiplier,
+                            param_dict=self.param_dict,
+                            layer_name='affine',
+                            dtype=self.dtype,
+                            rng=self.rng)(dlatents[:, self.layer_idx])
+        # Weight and bias for convolution operation.
+        w_shape = [self.kernel, self.kernel, x.shape[3], self.fmaps]
+        w, b = ops.get_weight(w_shape, self.lr_multiplier, True, self.param_dict, 'conv', self.rng)
+        w = self.param(name='weight', init_fn=lambda *_ : w)
+        b = self.param(name='bias', init_fn=lambda *_ : b)
+        w = ops.equalize_lr_weight(w, self.lr_multiplier)
+        b = ops.equalize_lr_bias(b, self.lr_multiplier)
+        x = ops.modulated_conv2d_layer(x, w, s, fmaps=self.fmaps, kernel=self.kernel, demodulate=False, fused_modconv=self.fused_modconv)
+        x += b.astype(x.dtype)
+        x = ops.apply_activation(x, activation='linear')
+        if self.clip_conv is not None:
+            x = jnp.clip(x, -self.clip_conv, self.clip_conv)
+        if y is not None:
+            x += y.astype(jnp.float32)
+        return x
+class SynthesisBlock(nn.Module):
+    """
+    Synthesis Block.
+    Attributes:
+        fmaps (int): Number of output channels of the modulated convolution.
+        res (int): Resolution (log2) of the current block.
+        num_layers (int): Number of layers in the current block.
+        num_channels (int): Number of output color channels.
+        lr_multiplier (float): Learning rate multiplier.
+        activation (str): Activation function: 'relu', 'lrelu', etc.
+        use_noise (bool): If True, add spatial-specific noise.
+        resample_kernel (Tuple): Kernel that is used for FIR filter.
+        fused_modconv (bool): If True, Perform modulation, convolution, and demodulation as a single fused operation.
+        param_dict (h5py.Group): Parameter dict with pretrained parameters. If not None, 'pretrained' will be ignored.
+        clip_conv (float): Clip the output of convolution layers to [-clip_conv, +clip_conv], None = disable clipping.
+        dtype (str): Data dtype.
+        rng (jax.random.PRNGKey): PRNG for initialization.
+    """
+    fmaps: int
+    res: int
+    num_layers: int=2
+    num_channels: int=3
+    lr_multiplier: float=1
+    activation: str='leaky_relu'
+    use_noise: bool=True
+    resample_kernel: Tuple=(1, 3, 3, 1)
+    fused_modconv: bool=False
+    param_dict: h5py.Group=None
+    clip_conv: float=None
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    @nn.compact
+    def __call__(self, x, y, dlatents_in, noise_mode='random', rng=random.PRNGKey(0)):
+        """
+        Run Synthesis Block.
+        Args:
+            x (tensor): Input tensor of shape [N, H, W, C].
+            y (tensor): Image of shape [N, H', W', fmaps].
+            dlatents (tensor): Intermediate latents (W) of shape [N, num_ws, w_dim].
+            noise_mode (str): Noise type.
+                              - 'const': Constant noise.
+                              - 'random': Random noise.
+                              - 'none': No noise.
+            rng (jax.random.PRNGKey): PRNG for spatialwise noise.
+        Returns:
+            (tensor): Output tensor of shape [N, H', W', fmaps].
+        """
+        x = x.astype(self.dtype)
+        init_rng = self.rng
+        for i in range(self.num_layers):
+            init_rng, init_key = random.split(init_rng)
+            x = SynthesisLayer(fmaps=self.fmaps,
+                               kernel=3,
+                               layer_idx=self.res * 2 - (5 - i) if self.res > 2 else 0,
+                               res=self.res,
+                               lr_multiplier=self.lr_multiplier,
+                               up=i == 0 and self.res != 2,
+                               activation=self.activation,
+                               use_noise=self.use_noise,
+                               resample_kernel=self.resample_kernel,
+                               fused_modconv=self.fused_modconv,
+                               param_dict=self.param_dict[f'layer{i}'] if self.param_dict is not None else None,
+                               dtype=self.dtype,
+                               rng=init_key)(x, dlatents_in, noise_mode, rng)
+        if self.num_layers == 2:
+            k = ops.setup_filter(self.resample_kernel)
+            y = ops.upsample2d(y, f=k, up=2)
+        init_rng, init_key = random.split(init_rng)
+        y = ToRGBLayer(fmaps=self.num_channels,
+                       layer_idx=self.res * 2 - 3,
+                       lr_multiplier=self.lr_multiplier,
+                       param_dict=self.param_dict['torgb'] if self.param_dict is not None else None,
+                       dtype=self.dtype,
+                       rng=init_key)(x, y, dlatents_in)
+        return x, y
+class SynthesisNetwork(nn.Module):
+    """
+    Synthesis Network.
+    Attributes:
+        resolution (int): Output resolution.
+        num_channels (int): Number of output color channels.
+        w_dim (int): Input latent (Z) dimensionality.
+        fmap_base (int): Overall multiplier for the number of feature maps.
+        fmap_decay (int): Log2 feature map reduction when doubling the resolution.
+        fmap_min (int): Minimum number of feature maps in any layer.
+        fmap_max (int): Maximum number of feature maps in any layer.
+        fmap_const (int): Number of feature maps in the constant input layer. None = default.
+        pretrained (str): Which pretrained model to use, None for random initialization.
+        param_dict (h5py.Group): Parameter dict with pretrained parameters. If not None, 'pretrained' will be ignored.
+        ckpt_dir (str): Directory to which the pretrained weights are downloaded. If None, a temp directory will be used.
+        activation (str): Activation function: 'relu', 'lrelu', etc.
+        use_noise (bool): If True, add spatial-specific noise.
+        resample_kernel (Tuple): Kernel that is used for FIR filter.
+        fused_modconv (bool): If True, Perform modulation, convolution, and demodulation as a single fused operation.
+        num_fp16_res (int): Use float16 for the 'num_fp16_res' highest resolutions.
+        clip_conv (float): Clip the output of convolution layers to [-clip_conv, +clip_conv], None = disable clipping.
+        dtype (str): Data type.
+        rng (jax.random.PRNGKey): PRNG for initialization.
+    """
+    # Dimensionality
+    resolution: int=1024
+    num_channels: int=3
+    w_dim: int=512
+    # Capacity
+    fmap_base: int=16384
+    fmap_decay: int=1
+    fmap_min: int=1
+    fmap_max: int=512
+    fmap_const: int=None
+    # Pretraining
+    pretrained: str=None
+    param_dict: h5py.Group=None
+    ckpt_dir: str=None
+    # Internal details
+    activation: str='leaky_relu'
+    use_noise: bool=True
+    resample_kernel: Tuple=(1, 3, 3, 1)
+    fused_modconv: bool=False
+    num_fp16_res: int=0
+    clip_conv: float=None
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    def setup(self):
+        self.resolution_ = self.resolution
+        self.param_dict_ = self.param_dict
+        if self.pretrained is not None and self.param_dict is None:
+            assert self.pretrained in URLS.keys(), f'Pretrained model not available: {self.pretrained}'
+            ckpt_file = utils.download(self.ckpt_dir, URLS[self.pretrained])
+            self.param_dict_ = h5py.File(ckpt_file, 'r')['synthesis_network']
+            self.resolution_ = RESOLUTION[self.pretrained]
+    @nn.compact
+    def __call__(self, dlatents_in, noise_mode='random', rng=random.PRNGKey(0)):
+        """
+        Run Synthesis Network.
+        Args:
+            dlatents_in (tensor): Intermediate latents (W) of shape [N, num_ws, w_dim].
+            noise_mode (str): Noise type.
+                              - 'const': Constant noise.
+                              - 'random': Random noise.
+                              - 'none': No noise.
+            rng (jax.random.PRNGKey): PRNG for spatialwise noise.
+        Returns:
+            (tensor): Image of shape [N, H, W, num_channels].
+        """
+        resolution_log2 = int(np.log2(self.resolution_))
+        assert self.resolution_ == 2 ** resolution_log2 and self.resolution_ >= 4
+        def nf(stage): return np.clip(int(self.fmap_base / (2.0 ** (stage * self.fmap_decay))), self.fmap_min, self.fmap_max)
+        num_layers = resolution_log2 * 2 - 2
+        fmaps = self.fmap_const if self.fmap_const is not None else nf(1)
+        if self.param_dict_ is None:
+            const = random.normal(self.rng, (1, 4, 4, fmaps), dtype=self.dtype)
+        else:
+            const = jnp.array(self.param_dict_['const'], dtype=self.dtype)
+        x = self.param(name='const', init_fn=lambda *_ : const)
+        x = jnp.repeat(x, repeats=dlatents_in.shape[0], axis=0)
+        y = None
+        dlatents_in = dlatents_in.astype(jnp.float32)
+        init_rng = self.rng
+        for res in range(2, resolution_log2 + 1):
+            init_rng, init_key = random.split(init_rng)
+            x, y = SynthesisBlock(fmaps=nf(res - 1),
+                                  res=res,
+                                  num_layers=1 if res == 2 else 2,
+                                  num_channels=self.num_channels,
+                                  activation=self.activation,
+                                  use_noise=self.use_noise,
+                                  resample_kernel=self.resample_kernel,
+                                  fused_modconv=self.fused_modconv,
+                                  param_dict=self.param_dict_[f'block_{2 ** res}x{2 ** res}'] if self.param_dict_ is not None else None,
+                                  clip_conv=self.clip_conv,
+                                  dtype=self.dtype if res > resolution_log2 - self.num_fp16_res else 'float32',
+                                  rng=init_key)(x, y, dlatents_in, noise_mode, rng)
+        return y
+class Generator(nn.Module):
+    """
+    Generator.
+    Attributes:
+        resolution (int): Output resolution.
+        num_channels (int): Number of output color channels.
+        z_dim (int): Input latent (Z) dimensionality.
+        c_dim (int): Conditioning label (C) dimensionality, 0 = no label.
+        w_dim (int): Intermediate latent (W) dimensionality.
+        mapping_layer_features (int): Number of intermediate features in the mapping layers, None = same as w_dim.
+        mapping_embed_features (int): Label embedding dimensionality, None = same as w_dim.
+        num_ws (int): Number of intermediate latents to output, None = do not broadcast.
+        num_mapping_layers (int): Number of mapping layers.
+        fmap_base (int): Overall multiplier for the number of feature maps.
+        fmap_decay (int): Log2 feature map reduction when doubling the resolution.
+        fmap_min (int): Minimum number of feature maps in any layer.
+        fmap_max (int): Maximum number of feature maps in any layer.
+        fmap_const (int): Number of feature maps in the constant input layer. None = default.
+        pretrained (str): Which pretrained model to use, None for random initialization.
+        ckpt_dir (str): Directory to which the pretrained weights are downloaded. If None, a temp directory will be used.
+        use_noise (bool): If True, add spatial-specific noise.
+        activation (str): Activation function: 'relu', 'lrelu', etc.
+        w_avg_beta (float): Decay for tracking the moving average of W during training, None = do not track.
+        mapping_lr_multiplier (float): Learning rate multiplier for the mapping network.
+        resample_kernel (Tuple): Kernel that is used for FIR filter.
+        fused_modconv (bool): If True, Perform modulation, convolution, and demodulation as a single fused operation.
+        num_fp16_res (int): Use float16 for the 'num_fp16_res' highest resolutions.
+        clip_conv (float): Clip the output of convolution layers to [-clip_conv, +clip_conv], None = disable clipping.
+        dtype (str): Data type.
+        rng (jax.random.PRNGKey): PRNG for initialization.
+    """
+    # Dimensionality
+    resolution: int=1024
+    num_channels: int=3
+    z_dim: int=512
+    c_dim: int=0
+    w_dim: int=512
+    mapping_layer_features: int=512
+    mapping_embed_features: int=None
+    # Layers
+    num_ws: int=18
+    num_mapping_layers: int=8
+    # Capacity
+    fmap_base: int=16384
+    fmap_decay: int=1
+    fmap_min: int=1
+    fmap_max: int=512
+    fmap_const: int=None
+    # Pretraining
+    pretrained: str=None
+    ckpt_dir: str=None
+    # Internal details
+    use_noise: bool=True
+    activation: str='leaky_relu'
+    w_avg_beta: float=0.995
+    mapping_lr_multiplier: float=0.01
+    resample_kernel: Tuple=(1, 3, 3, 1)
+    fused_modconv: bool=False
+    num_fp16_res: int=0
+    clip_conv: float=None
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    def setup(self):
+        self.resolution_ = self.resolution
+        self.c_dim_ = self.c_dim
+        self.num_mapping_layers_ = self.num_mapping_layers
+        if self.pretrained is not None:
+            assert self.pretrained in URLS.keys(), f'Pretrained model not available: {self.pretrained}'
+            ckpt_file = utils.download(self.ckpt_dir, URLS[self.pretrained])
+            self.param_dict = h5py.File(ckpt_file, 'r')
+            self.resolution_ = RESOLUTION[self.pretrained]
+            self.c_dim_ = C_DIM[self.pretrained]
+            self.num_mapping_layers_ = NUM_MAPPING_LAYERS[self.pretrained]
+        else:
+            self.param_dict = None
+        self.init_rng_mapping, self.init_rng_synthesis = random.split(self.rng)
+    @nn.compact
+    def __call__(self, z, c=None, truncation_psi=1, truncation_cutoff=None, skip_w_avg_update=False, train=True, noise_mode='random', rng=random.PRNGKey(0)):
+        """
+        Run Generator.
+        Args:
+            z (tensor): Input noise, shape [N, z_dim].
+            c (tensor): Input labels, shape [N, c_dim].
+            truncation_psi (float): Controls truncation (trading off variation for quality). If 1, truncation is disabled.
+            truncation_cutoff (int): Controls truncation. None = disable.
+            skip_w_avg_update (bool): If True, updates the exponential moving average of W.
+            train (bool): Training mode.
+            noise_mode (str): Noise type.
+                              - 'const': Constant noise.
+                              - 'random': Random noise.
+                              - 'none': No noise.
+            rng (jax.random.PRNGKey): PRNG for spatialwise noise.
+        Returns:
+            (tensor): Image of shape [N, H, W, num_channels].
+        """
+        dlatents_in = MappingNetwork(z_dim=self.z_dim,
+                                     c_dim=self.c_dim_,
+                                     w_dim=self.w_dim,
+                                     num_ws=self.num_ws,
+                                     num_layers=self.num_mapping_layers_,
+                                     embed_features=self.mapping_embed_features,
+                                     layer_features=self.mapping_layer_features,
+                                     activation=self.activation,
+                                     lr_multiplier=self.mapping_lr_multiplier,
+                                     w_avg_beta=self.w_avg_beta,
+                                     param_dict=self.param_dict['mapping_network'] if self.param_dict is not None else None,
+                                     dtype=self.dtype,
+                                     rng=self.init_rng_mapping,
+                                     name='mapping_network')(z, c, truncation_psi, truncation_cutoff, skip_w_avg_update, train)
+        x = SynthesisNetwork(resolution=self.resolution_,
+                             num_channels=self.num_channels,
+                             w_dim=self.w_dim,
+                             fmap_base=self.fmap_base,
+                             fmap_decay=self.fmap_decay,
+                             fmap_min=self.fmap_min,
+                             fmap_max=self.fmap_max,
+                             fmap_const=self.fmap_const,
+                             param_dict=self.param_dict['synthesis_network'] if self.param_dict is not None else None,
+                             activation=self.activation,
+                             use_noise=self.use_noise,
+                             resample_kernel=self.resample_kernel,
+                             fused_modconv=self.fused_modconv,
+                             num_fp16_res=self.num_fp16_res,
+                             clip_conv=self.clip_conv,
+                             dtype=self.dtype,
+                             rng=self.init_rng_synthesis,
+                             name='synthesis_network')(dlatents_in, noise_mode, rng)
+        return x

stylegan2/ops.py ADDED Viewed

	@@ -0,0 +1,674 @@

+import jax
+import jax.numpy as jnp
+from jax import random
+import flax.linen as nn
+from jax import jit
+import numpy as np
+from functools import partial
+from typing import Any
+import h5py
+#------------------------------------------------------
+# Other
+#------------------------------------------------------
+def minibatch_stddev_layer(x, group_size=None, num_new_features=1):
+    if group_size is None:
+        group_size = x.shape[0]
+    else:
+        # Minibatch must be divisible by (or smaller than) group_size.
+        group_size = min(group_size, x.shape[0])
+    G = group_size
+    F = num_new_features
+    _, H, W, C = x.shape
+    c = C // F
+    # [NHWC] Cast to FP32.
+    y = x.astype(jnp.float32)
+    # [GnHWFc] Split minibatch N into n groups of size G, and channels C into F groups of size c.
+    y = jnp.reshape(y, newshape=(G, -1, H, W, F, c))
+    # [GnHWFc] Subtract mean over group.
+    y -= jnp.mean(y, axis=0)
+    # [nHWFc] Calc variance over group.
+    y = jnp.mean(jnp.square(y), axis=0)
+    # [nHWFc] Calc stddev over group.
+    y = jnp.sqrt(y + 1e-8)
+    # [nF] Take average over channels and pixels.
+    y = jnp.mean(y, axis=(1, 2, 4))
+    # [nF] Cast back to original data type.
+    y = y.astype(x.dtype)
+    # [n11F] Add missing dimensions.
+    y = jnp.reshape(y, newshape=(-1, 1, 1, F))
+    # [NHWC] Replicate over group and pixels.
+    y = jnp.tile(y, (G, H, W, 1))
+    return jnp.concatenate((x, y), axis=3)
+#------------------------------------------------------
+# Activation
+#------------------------------------------------------
+def apply_activation(x, activation='linear', alpha=0.2, gain=np.sqrt(2)):
+    gain = jnp.array(gain, dtype=x.dtype)
+    if activation == 'relu':
+        return jax.nn.relu(x) * gain
+    if activation == 'leaky_relu':
+        return jax.nn.leaky_relu(x, negative_slope=alpha) * gain
+    return x
+#------------------------------------------------------
+# Weights
+#------------------------------------------------------
+def get_weight(shape, lr_multiplier=1, bias=True, param_dict=None, layer_name='', key=None):
+    if param_dict is None:
+        w = random.normal(key, shape=shape, dtype=jnp.float32) / lr_multiplier
+        if bias: b = jnp.zeros(shape=(shape[-1],), dtype=jnp.float32)
+    else:
+        w = jnp.array(param_dict[layer_name]['weight']).astype(jnp.float32)
+        if bias: b = jnp.array(param_dict[layer_name]['bias']).astype(jnp.float32)
+    if bias: return w, b
+    return w
+def equalize_lr_weight(w, lr_multiplier=1):
+    """
+    Equalized learning rate, see: https://arxiv.org/pdf/1710.10196.pdf.
+    Args:
+        w (tensor): Weight parameter. Shape [kernel, kernel, fmaps_in, fmaps_out]
+                    for convolutions and shape [in, out] for MLPs.
+        lr_multiplier (float): Learning rate multiplier.
+    Returns:
+        (tensor): Scaled weight parameter.
+    """
+    in_features = np.prod(w.shape[:-1])
+    gain = lr_multiplier / np.sqrt(in_features)
+    w *= gain
+    return w
+def equalize_lr_bias(b, lr_multiplier=1):
+    """
+    Equalized learning rate, see: https://arxiv.org/pdf/1710.10196.pdf.
+    Args:
+        b (tensor): Bias parameter.
+        lr_multiplier (float): Learning rate multiplier.
+    Returns:
+        (tensor): Scaled bias parameter.
+    """
+    gain = lr_multiplier
+    b *= gain
+    return b
+#------------------------------------------------------
+# Normalization
+#------------------------------------------------------
+def normalize_2nd_moment(x, eps=1e-8):
+    return x * jax.lax.rsqrt(jnp.mean(jnp.square(x), axis=1, keepdims=True) + eps)
+#------------------------------------------------------
+# Upsampling
+#------------------------------------------------------
+def setup_filter(f, normalize=True, flip_filter=False, gain=1, separable=None):
+    """
+    Convenience function to setup 2D FIR filter for `upfirdn2d()`.
+    Args:
+        f (tensor): Tensor or python list of the shape.
+        normalize (bool): Normalize the filter so that it retains the magnitude.
+                          for constant input signal (DC)? (default: True).
+        flip_filter (bool): Flip the filter? (default: False).
+        gain (int): Overall scaling factor for signal magnitude (default: 1).
+        separable: Return a separable filter? (default: select automatically).
+    Returns:
+        (tensor): Output filter of shape [filter_height, filter_width] or [filter_taps]
+    """
+    # Validate.
+    if f is None:
+        f = 1
+    f = jnp.array(f, dtype=jnp.float32)
+    assert f.ndim in [0, 1, 2]
+    assert f.size > 0
+    if f.ndim == 0:
+        f = f[jnp.newaxis]
+    # Separable?
+    if separable is None:
+        separable = (f.ndim == 1 and f.size >= 8)
+    if f.ndim == 1 and not separable:
+        f = jnp.outer(f, f)
+    assert f.ndim == (1 if separable else 2)
+    # Apply normalize, flip, gain, and device.
+    if normalize:
+        f /= jnp.sum(f)
+    if flip_filter:
+        for i in range(f.ndim):
+            f = jnp.flip(f, axis=i)
+    f = f * (gain ** (f.ndim / 2))
+    return f
+def upfirdn2d(x, f, padding=(2, 1, 2, 1), up=1, down=1, strides=(1, 1), flip_filter=False, gain=1):
+    if f is None:
+        f = jnp.ones((1, 1), dtype=jnp.float32)
+    B, H, W, C = x.shape
+    padx0, padx1, pady0, pady1 = padding
+    # upsample by inserting zeros
+    x = jnp.reshape(x, newshape=(B, H, 1, W, 1, C))
+    x = jnp.pad(x, pad_width=((0, 0), (0, 0), (0, up - 1), (0, 0), (0, up - 1), (0, 0)))
+    x = jnp.reshape(x, newshape=(B, H * up, W * up, C))
+    # padding
+    x = jnp.pad(x, pad_width=((0, 0), (max(pady0, 0), max(pady1, 0)), (max(padx0, 0), max(padx1, 0)), (0, 0)))
+    x = x[:, max(-pady0, 0) : x.shape[1] - max(-pady1, 0), max(-padx0, 0) : x.shape[2] - max(-padx1, 0)]
+    # setup filter
+    f = f * (gain ** (f.ndim / 2))
+    if not flip_filter:
+        for i in range(f.ndim):
+            f = jnp.flip(f, axis=i)
+    # convole filter
+    f = jnp.repeat(jnp.expand_dims(f, axis=(-2, -1)), repeats=C, axis=-1)
+    if f.ndim == 4:
+        x = jax.lax.conv_general_dilated(x,
+                                         f.astype(x.dtype),
+                                         window_strides=strides or (1,) * (x.ndim - 2),
+                                         padding='valid',
+                                         dimension_numbers=nn.linear._conv_dimension_numbers(x.shape),
+                                         feature_group_count=C)
+    else:
+        x = jax.lax.conv_general_dilated(x,
+                                         jnp.expand_dims(f, axis=0).astype(x.dtype),
+                                         window_strides=strides or (1,) * (x.ndim - 2),
+                                         padding='valid',
+                                         dimension_numbers=nn.linear._conv_dimension_numbers(x.shape),
+                                         feature_group_count=C)
+        x = jax.lax.conv_general_dilated(x,
+                                         jnp.expand_dims(f, axis=1).astype(x.dtype),
+                                         window_strides=strides or (1,) * (x.ndim - 2),
+                                         padding='valid',
+                                         dimension_numbers=nn.linear._conv_dimension_numbers(x.shape),
+                                         feature_group_count=C)
+    x = x[:, ::down, ::down]
+    return x
+def upsample2d(x, f, up=2, padding=0, flip_filter=False, gain=1):
+    if f.ndim == 1:
+        fh, fw = f.shape[0], f.shape[0]
+    elif f.ndim == 2:
+        fh, fw = f.shape[0], f.shape[1]
+    else:
+        raise ValueError('Invalid filter shape:', f.shape)
+    padx0 = padding + (fw + up - 1) // 2
+    padx1 = padding + (fw - up) // 2
+    pady0 = padding + (fh + up - 1) // 2
+    pady1 = padding + (fh - up) // 2
+    return upfirdn2d(x, f=f, up=up, padding=(padx0, padx1, pady0, pady1), flip_filter=flip_filter, gain=gain * up * up)
+#------------------------------------------------------
+# Linear
+#------------------------------------------------------
+class LinearLayer(nn.Module):
+    """
+    Linear Layer.
+    Attributes:
+        in_features (int): Input dimension.
+        out_features (int): Output dimension.
+        use_bias (bool): If True, use bias.
+        bias_init (int): Bias init.
+        lr_multiplier (float): Learning rate multiplier.
+        activation (str): Activation function: 'relu', 'lrelu', etc.
+        param_dict (h5py.Group): Parameter dict with pretrained parameters.
+        layer_name (str): Layer name.
+        dtype (str): Data type.
+        rng (jax.random.PRNGKey): Random seed for initialization.
+    """
+    in_features: int
+    out_features: int
+    use_bias: bool=True
+    bias_init: int=0
+    lr_multiplier: float=1
+    activation: str='linear'
+    param_dict: h5py.Group=None
+    layer_name: str=None
+    dtype: str='float32'
+    rng: Any=random.PRNGKey(0)
+    @nn.compact
+    def __call__(self, x):
+        """
+        Run Linear Layer.
+        Args:
+            x (tensor): Input tensor of shape [N, in_features].
+        Returns:
+            (tensor): Output tensor of shape [N, out_features].
+        """
+        w_shape = [self.in_features, self.out_features]
+        params = get_weight(w_shape, self.lr_multiplier, self.use_bias, self.param_dict, self.layer_name, self.rng)
+        if self.use_bias:
+            w, b = params
+        else:
+            w = params
+        w = self.param(name='weight', init_fn=lambda *_ : w)
+        w = equalize_lr_weight(w, self.lr_multiplier)
+        x = jnp.matmul(x, w.astype(x.dtype))
+        if self.use_bias:
+            b = self.param(name='bias', init_fn=lambda *_ : b)
+            b = equalize_lr_bias(b, self.lr_multiplier)
+            x += b.astype(x.dtype)
+            x += self.bias_init
+        x = apply_activation(x, activation=self.activation)
+        return x
+#------------------------------------------------------
+# Convolution
+#------------------------------------------------------
+def conv_downsample_2d(x, w, k=None, factor=2, gain=1, padding=0):
+    """
+    Fused downsample convolution.
+    Padding is performed only once at the beginning, not between the operations.
+    The fused op is considerably more efficient than performing the same calculation
+    using standard TensorFlow ops. It supports gradients of arbitrary order.
+    Args:
+        x (tensor): Input tensor of the shape [N, H, W, C].
+        w (tensor): Weight tensor of the shape [filterH, filterW, inChannels, outChannels].
+                    Grouped convolution can be performed by inChannels = x.shape[0] // numGroups.
+        k (tensor): FIR filter of the shape [firH, firW] or [firN].
+                    The default is `[1] * factor`, which corresponds to average pooling.
+        factor (int): Downsampling factor (default: 2).
+        gain (float): Scaling factor for signal magnitude (default: 1.0).
+        padding (int): Number of pixels to pad or crop the output on each side (default: 0).
+    Returns:
+        (tensor): Output of the shape [N, H // factor, W // factor, C].
+    """
+    assert isinstance(factor, int) and factor >= 1
+    assert isinstance(padding, int)
+    # Check weight shape.
+    ch, cw, _inC, _outC = w.shape
+    assert cw == ch
+    # Setup filter kernel.
+    k = setup_filter(k, gain=gain)
+    assert k.shape[0] == k.shape[1]
+    # Execute.
+    pad0 = (k.shape[0] - factor + cw) // 2 + padding * factor
+    pad1 = (k.shape[0] - factor + cw - 1) // 2 + padding * factor
+    x = upfirdn2d(x=x, f=k, padding=(pad0, pad0, pad1, pad1))
+    x = jax.lax.conv_general_dilated(x,
+                                     w,
+                                     window_strides=(factor, factor),
+                                     padding='VALID',
+                                     dimension_numbers=nn.linear._conv_dimension_numbers(x.shape))
+    return x
+def upsample_conv_2d(x, w, k=None, factor=2, gain=1, padding=0):
+    """
+    Fused upsample convolution.
+    Padding is performed only once at the beginning, not between the operations.
+    The fused op is considerably more efficient than performing the same calculation
+    using standard TensorFlow ops. It supports gradients of arbitrary order.
+    Args:
+        x (tensor): Input tensor of the shape [N, H, W, C].
+        w (tensor): Weight tensor of the shape [filterH, filterW, inChannels, outChannels].
+                    Grouped convolution can be performed by inChannels = x.shape[0] // numGroups.
+        k (tensor): FIR filter of the shape [firH, firW] or [firN].
+                    The default is [1] * factor, which corresponds to nearest-neighbor upsampling.
+        factor (int): Integer upsampling factor (default: 2).
+        gain (float): Scaling factor for signal magnitude (default: 1.0).
+        padding (int): Number of pixels to pad or crop the output on each side (default: 0).
+    Returns:
+        (tensor): Output of the shape [N, H * factor, W * factor, C].
+    """
+    assert isinstance(factor, int) and factor >= 1
+    assert isinstance(padding, int)
+    # Check weight shape.
+    ch, cw, _inC, _outC = w.shape
+    inC = w.shape[2]
+    outC = w.shape[3]
+    assert cw == ch
+    # Fast path for 1x1 convolution.
+    if cw == 1 and ch == 1:
+        x = jax.lax.conv_general_dilated(x,
+                                         w,
+                                         window_strides=(1, 1),
+                                         padding='VALID',
+                                         dimension_numbers=nn.linear._conv_dimension_numbers(x.shape))
+        k = setup_filter(k, gain=gain * (factor ** 2))
+        pad0 = (k.shape[0] + factor - cw) // 2 + padding
+        pad1 = (k.shape[0] - factor) // 2 + padding
+        x = upfirdn2d(x, f=k, up=factor, padding=(pad0, pad1, pad0, pad1))
+        return x
+    # Setup filter kernel.
+    k = setup_filter(k, gain=gain * (factor ** 2))
+    assert k.shape[0] == k.shape[1]
+    # Determine data dimensions.
+    stride = (factor, factor)
+    output_shape = ((x.shape[1] - 1) * factor + ch, (x.shape[2] - 1) * factor + cw)
+    num_groups = x.shape[3] // inC
+    # Transpose weights.
+    w = jnp.reshape(w, (ch, cw, inC, num_groups, -1))
+    w = jnp.transpose(w[::-1, ::-1], (0, 1, 4, 3, 2))
+    w = jnp.reshape(w, (ch, cw, -1, num_groups * inC))
+    # Execute.
+    x = gradient_based_conv_transpose(lhs=x,
+                                      rhs=w,
+                                      strides=stride,
+                                      padding='VALID',
+                                      output_padding=(0, 0, 0, 0),
+                                      output_shape=output_shape,
+                                      )
+    pad0 = (k.shape[0] + factor - cw) // 2 + padding
+    pad1 = (k.shape[0] - factor - cw + 3) // 2 + padding
+    x = upfirdn2d(x=x, f=k, padding=(pad0, pad1, pad0, pad1))
+    return x
+def conv2d(x, w, up=False, down=False, resample_kernel=None, padding=0):
+    assert not (up and down)
+    kernel = w.shape[0]
+    assert w.shape[1] == kernel
+    assert kernel >= 1 and kernel % 2 == 1
+    num_groups = x.shape[3] // w.shape[2]
+    w = w.astype(x.dtype)
+    if up:
+        x = upsample_conv_2d(x, w, k=resample_kernel, padding=padding)
+    elif down:
+        x = conv_downsample_2d(x, w, k=resample_kernel, padding=padding)
+    else:
+        padding_mode = {0: 'SAME', -(kernel // 2): 'VALID'}[padding]
+        x = jax.lax.conv_general_dilated(x,
+                                         w,
+                                         window_strides=(1, 1),
+                                         padding=padding_mode,
+                                         dimension_numbers=nn.linear._conv_dimension_numbers(x.shape),
+                                         feature_group_count=num_groups)
+    return x
+def modulated_conv2d_layer(x, w, s, fmaps, kernel, up=False, down=False, demodulate=True, resample_kernel=None, fused_modconv=False):
+    assert not (up and down)
+    assert kernel >= 1 and kernel % 2 == 1
+    # Get weight.
+    wshape = (kernel, kernel, x.shape[3], fmaps)
+    if x.dtype.name == 'float16' and not fused_modconv and demodulate:
+        w *= jnp.sqrt(1 / np.prod(wshape[:-1])) / jnp.max(jnp.abs(w), axis=(0, 1, 2)) # Pre-normalize to avoid float16 overflow.
+    ww = w[jnp.newaxis] # [BkkIO] Introduce minibatch dimension.
+    # Modulate.
+    if x.dtype.name == 'float16' and not fused_modconv and demodulate:
+        s *= 1 / jnp.max(jnp.abs(s)) # Pre-normalize to avoid float16 overflow.
+    ww *= s[:, jnp.newaxis, jnp.newaxis, :, jnp.newaxis].astype(w.dtype) # [BkkIO] Scale input feature maps.
+    # Demodulate.
+    if demodulate:
+        d = jax.lax.rsqrt(jnp.sum(jnp.square(ww), axis=(1, 2, 3)) + 1e-8) # [BO] Scaling factor.
+        ww *= d[:, jnp.newaxis, jnp.newaxis, jnp.newaxis, :] # [BkkIO] Scale output feature maps.
+    # Reshape/scale input.
+    if fused_modconv:
+        x = jnp.transpose(x, axes=(0, 3, 1, 2))
+        x = jnp.reshape(x, (1, -1, x.shape[2], x.shape[3])) # Fused => reshape minibatch to convolution groups.
+        x = jnp.transpose(x, axes=(0, 2, 3, 1))
+        w = jnp.reshape(jnp.transpose(ww, (1, 2, 3, 0, 4)), (ww.shape[1], ww.shape[2], ww.shape[3], -1))
+    else:
+        x *= s[:, jnp.newaxis, jnp.newaxis].astype(x.dtype)  # [BIhw] Not fused => scale input activations.
+    # 2D convolution.
+    x = conv2d(x, w.astype(x.dtype), up=up, down=down, resample_kernel=resample_kernel)
+    # Reshape/scale output.
+    if fused_modconv:
+        x = jnp.transpose(x, axes=(0, 3, 1, 2))
+        x = jnp.reshape(x, (-1, fmaps, x.shape[2], x.shape[3])) # Fused => reshape convolution groups back to minibatch.
+        x = jnp.transpose(x, axes=(0, 2, 3, 1))
+    elif demodulate:
+        x *= d[:, jnp.newaxis, jnp.newaxis].astype(x.dtype) # [BOhw] Not fused => scale output activations.
+    return x
+def _deconv_output_length(input_length, filter_size, padding, output_padding=None, stride=0, dilation=1):
+    """
+    Taken from: https://github.com/google/jax/pull/5772/commits
+    Determines the output length of a transposed convolution given the input length.
+    Function modified from Keras.
+    Arguments:
+      input_length: Integer.
+      filter_size: Integer.
+      padding: one of `"SAME"`, `"VALID"`, or a 2-integer tuple.
+      output_padding: Integer, amount of padding along the output dimension. Can
+        be set to `None` in which case the output length is inferred.
+      stride: Integer.
+      dilation: Integer.
+    Returns:
+      The output length (integer).
+    """
+    if input_length is None:
+        return None
+    # Get the dilated kernel size
+    filter_size = filter_size + (filter_size - 1) * (dilation - 1)
+    # Infer length if output padding is None, else compute the exact length
+    if output_padding is None:
+        if padding == 'VALID':
+            length = input_length * stride + max(filter_size - stride, 0)
+        elif padding == 'SAME':
+            length = input_length * stride
+        else:
+            length = ((input_length - 1) * stride + filter_size - padding[0] - padding[1])
+    else:
+        if padding == 'SAME':
+            pad = filter_size // 2
+            total_pad = pad * 2
+        elif padding == 'VALID':
+            total_pad = 0
+        else:
+            total_pad = padding[0] + padding[1]
+    length = ((input_length - 1) * stride + filter_size - total_pad + output_padding)
+    return length
+def _compute_adjusted_padding(input_size, output_size, kernel_size, stride, padding, dilation=1):
+    """
+    Taken from: https://github.com/google/jax/pull/5772/commits
+    Computes adjusted padding for desired ConvTranspose `output_size`.
+    Ported from DeepMind Haiku.
+    """
+    kernel_size = (kernel_size - 1) * dilation + 1
+    if padding == 'VALID':
+        expected_input_size = (output_size - kernel_size + stride) // stride
+        if input_size != expected_input_size:
+            raise ValueError(f'The expected input size with the current set of input '
+                             f'parameters is {expected_input_size} which doesn\'t '
+                             f'match the actual input size {input_size}.')
+        padding_before = 0
+    elif padding == 'SAME':
+        expected_input_size = (output_size + stride - 1) // stride
+        if input_size != expected_input_size:
+            raise ValueError(f'The expected input size with the current set of input '
+                             f'parameters is {expected_input_size} which doesn\'t '
+                             f'match the actual input size {input_size}.')
+        padding_needed = max(0, (input_size - 1) * stride + kernel_size - output_size)
+        padding_before = padding_needed // 2
+    else:
+        padding_before = padding[0]  # type: ignore[assignment]
+    expanded_input_size = (input_size - 1) * stride + 1
+    padded_out_size = output_size + kernel_size - 1
+    pad_before = kernel_size - 1 - padding_before
+    pad_after = padded_out_size - expanded_input_size - pad_before
+    return (pad_before, pad_after)
+def _flip_axes(x, axes):
+    """
+    Taken from: https://github.com/google/jax/blob/master/jax/_src/lax/lax.py
+    Flip ndarray 'x' along each axis specified in axes tuple.
+    """
+    for axis in axes:
+        x = jnp.flip(x, axis)
+    return x
+def gradient_based_conv_transpose(lhs,
+                                  rhs,
+                                  strides,
+                                  padding,
+                                  output_padding,
+                                  output_shape=None,
+                                  dilation=None,
+                                  dimension_numbers=None,
+                                  transpose_kernel=True,
+                                  feature_group_count=1,
+                                  precision=None):
+    """
+    Taken from: https://github.com/google/jax/pull/5772/commits
+    Convenience wrapper for calculating the N-d transposed convolution.
+    Much like `conv_transpose`, this function calculates transposed convolutions
+    via fractionally strided convolution rather than calculating the gradient
+    (transpose) of a forward convolution. However, the latter is more common
+    among deep learning frameworks, such as TensorFlow, PyTorch, and Keras.
+    This function provides the same set of APIs to help reproduce results in these frameworks.
+    Args:
+        lhs: a rank `n+2` dimensional input array.
+        rhs: a rank `n+2` dimensional array of kernel weights.
+        strides: sequence of `n` integers, amounts to strides of the corresponding forward convolution.
+        padding: `"SAME"`, `"VALID"`, or a sequence of `n` integer 2-tuples that controls
+                 the before-and-after padding for each `n` spatial dimension of
+                 the corresponding forward convolution.
+        output_padding: A sequence of integers specifying the amount of padding along
+                        each spacial dimension of the output tensor, used to disambiguate the output shape of
+                        transposed convolutions when the stride is larger than 1.
+                        (see a detailed description at https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html)
+                        The amount of output padding along a given dimension must
+                        be lower than the stride along that same dimension.
+                        If set to `None` (default), the output shape is inferred.
+                        If both `output_padding` and `output_shape` are specified, they have to be mutually compatible.
+        output_shape: Output shape of the spatial dimensions of a transpose
+                      convolution. Can be `None` or an iterable of `n` integers. If a `None` value is given (default),
+                      the shape is automatically calculated.
+                      Similar to `output_padding`, `output_shape` is also for disambiguating the output shape
+                      when stride > 1 (see also
+                      https://www.tensorflow.org/api_docs/python/tf/nn/conv2d_transpose)
+                      If both `output_padding` and `output_shape` are specified, they have to be mutually compatible.
+        dilation: `None`, or a sequence of `n` integers, giving the
+                   dilation factor to apply in each spatial dimension of `rhs`. Dilated convolution
+                   is also known as atrous convolution.
+        dimension_numbers: tuple of dimension descriptors as in lax.conv_general_dilated. Defaults to tensorflow convention.
+        transpose_kernel: if `True` flips spatial axes and swaps the input/output
+                          channel axes of the kernel. This makes the output of this function identical
+                          to the gradient-derived functions like keras.layers.Conv2DTranspose and
+                          torch.nn.ConvTranspose2d applied to the same kernel.
+                          Although for typical use in neural nets this is unnecessary
+                          and makes input/output channel specification confusing, you need to set this to `True`
+                          in order to match the behavior in many deep learning frameworks, such as TensorFlow, Keras, and PyTorch.
+        precision: Optional. Either ``None``, which means the default precision for
+                   the backend, a ``lax.Precision`` enum value (``Precision.DEFAULT``,
+                   ``Precision.HIGH`` or ``Precision.HIGHEST``) or a tuple of two
+                   ``lax.Precision`` enums indicating precision of ``lhs``` and ``rhs``.
+    Returns:
+        Transposed N-d convolution.
+    """
+    assert len(lhs.shape) == len(rhs.shape) and len(lhs.shape) >= 2
+    ndims = len(lhs.shape)
+    one = (1,) * (ndims - 2)
+    # Set dimensional layout defaults if not specified.
+    if dimension_numbers is None:
+        if ndims == 2:
+            dimension_numbers = ('NC', 'IO', 'NC')
+        elif ndims == 3:
+            dimension_numbers = ('NHC', 'HIO', 'NHC')
+        elif ndims == 4:
+            dimension_numbers = ('NHWC', 'HWIO', 'NHWC')
+        elif ndims == 5:
+            dimension_numbers = ('NHWDC', 'HWDIO', 'NHWDC')
+        else:
+            raise ValueError('No 4+ dimensional dimension_number defaults.')
+    dn = jax.lax.conv_dimension_numbers(lhs.shape, rhs.shape, dimension_numbers)
+    k_shape = np.take(rhs.shape, dn.rhs_spec)
+    k_sdims = k_shape[2:]  # type: ignore[index]
+    i_shape = np.take(lhs.shape, dn.lhs_spec)
+    i_sdims = i_shape[2:]  # type: ignore[index]
+    # Calculate correct output shape given padding and strides.
+    if dilation is None:
+        dilation = (1,) * (rhs.ndim - 2)
+    if output_padding is None:
+        output_padding = [None] * (rhs.ndim - 2)  # type: ignore[list-item]
+    if isinstance(padding, str):
+        if padding in {'SAME', 'VALID'}:
+            padding = [padding] * (rhs.ndim - 2)  # type: ignore[list-item]
+        else:
+            raise ValueError(f"`padding` must be 'VALID' or 'SAME'. Passed: {padding}.")
+    inferred_output_shape = tuple(map(_deconv_output_length, i_sdims, k_sdims, padding, output_padding, strides, dilation))
+    if output_shape is None:
+        output_shape = inferred_output_shape  # type: ignore[assignment]
+    else:
+        if not output_shape == inferred_output_shape:
+            raise ValueError(f'`output_padding` and `output_shape` are not compatible.'
+                             f'Inferred output shape from `output_padding`: {inferred_output_shape}, '
+                             f'but got `output_shape` {output_shape}')
+    pads = tuple(map(_compute_adjusted_padding, i_sdims, output_shape, k_sdims, strides, padding, dilation))
+    if transpose_kernel:
+        # flip spatial dims and swap input / output channel axes
+        rhs = _flip_axes(rhs, np.array(dn.rhs_spec)[2:])
+        rhs = np.swapaxes(rhs, dn.rhs_spec[0], dn.rhs_spec[1])
+    return jax.lax.conv_general_dilated(lhs, rhs, one, pads, strides, dilation, dn, feature_group_count, precision=precision)

stylegan2/utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from tqdm import tqdm
+import requests
+import os
+import tempfile
+def download(ckpt_dir, url):
+    name = url[url.rfind('/') + 1 : url.rfind('?')]
+    if ckpt_dir is None:
+        ckpt_dir = tempfile.gettempdir()
+    ckpt_dir = os.path.join(ckpt_dir, 'flaxmodels')
+    ckpt_file = os.path.join(ckpt_dir, name)
+    if not os.path.exists(ckpt_file):
+        print(f'Downloading: \"{url[:url.rfind("?")]}\" to {ckpt_file}')
+        if not os.path.exists(ckpt_dir):
+            os.makedirs(ckpt_dir)
+        response = requests.get(url, stream=True)
+        total_size_in_bytes = int(response.headers.get('content-length', 0))
+        progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
+        # first create temp file, in case the download fails
+        ckpt_file_temp = os.path.join(ckpt_dir, name + '.temp')
+        with open(ckpt_file_temp, 'wb') as file:
+            for data in response.iter_content(chunk_size=1024):
+                progress_bar.update(len(data))
+                file.write(data)
+        progress_bar.close()
+        if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
+            print('An error occured while downloading, please try again.')
+            if os.path.exists(ckpt_file_temp):
+                os.remove(ckpt_file_temp)
+        else:
+            # if download was successful, rename the temp file
+            os.rename(ckpt_file_temp, ckpt_file)
+    return ckpt_file

training.py ADDED Viewed

	@@ -0,0 +1,382 @@

+import jax
+import jax.numpy as jnp
+import flax
+from flax.optim import dynamic_scale as dynamic_scale_lib
+from flax.core import frozen_dict
+import optax
+import numpy as np
+import functools
+import wandb
+import time
+import stylegan2
+import data_pipeline
+import checkpoint
+import training_utils
+import training_steps
+from fid import FID
+import logging
+logger = logging.getLogger(__name__)
+def tree_shape(item):
+    return jax.tree_map(lambda c: c.shape, item)
+def train_and_evaluate(config):
+    num_devices = jax.device_count()   # 8
+    num_local_devices = jax.local_device_count()  # 4
+    num_workers = jax.process_count()
+    # --------------------------------------
+    # Data
+    # --------------------------------------
+    ds_train, dataset_info = data_pipeline.get_data(data_dir=config.data_dir,
+                                                    img_size=config.resolution,
+                                                    img_channels=config.img_channels,
+                                                    num_classes=config.c_dim,
+                                                    num_local_devices=num_local_devices,
+                                                    batch_size=config.batch_size)
+    # --------------------------------------
+    # Seeding and Precision
+    # --------------------------------------
+    rng = jax.random.PRNGKey(config.random_seed)
+    if config.mixed_precision:
+        dtype = jnp.float16
+    elif config.bf16:
+        dtype = jnp.bfloat16
+    else:
+        dtype = jnp.float32
+    logger.info(f'Running on dtype {dtype}')
+    platform = jax.local_devices()[0].platform
+    if config.mixed_precision and platform == 'gpu':
+        dynamic_scale_G_main = dynamic_scale_lib.DynamicScale()
+        dynamic_scale_D_main = dynamic_scale_lib.DynamicScale()
+        dynamic_scale_G_reg = dynamic_scale_lib.DynamicScale()
+        dynamic_scale_D_reg = dynamic_scale_lib.DynamicScale()
+        clip_conv = 256
+        num_fp16_res = 4
+    else:
+        dynamic_scale_G_main = None
+        dynamic_scale_D_main = None
+        dynamic_scale_G_reg = None
+        dynamic_scale_D_reg = None
+        clip_conv = None
+        num_fp16_res = 0
+    # --------------------------------------
+    # Initialize Models
+    # --------------------------------------
+    logger.info('Initialize models...')
+    rng, init_rng = jax.random.split(rng)
+    # Generator initialization for training
+    start_mn = time.time()
+    logger.info("Creating MappingNetwork...")
+    mapping_net = stylegan2.MappingNetwork(z_dim=config.z_dim,
+                                           c_dim=config.c_dim,
+                                           w_dim=config.w_dim,
+                                           num_ws=int(np.log2(config.resolution)) * 2 - 3,
+                                           num_layers=8,
+                                           dtype=dtype)
+    mapping_net_vars = mapping_net.init(init_rng,
+                                        jnp.ones((1, config.z_dim)),
+                                        jnp.ones((1, config.c_dim)))
+    mapping_net_params, moving_stats = mapping_net_vars['params'], mapping_net_vars['moving_stats']
+    logger.info(f"MappingNetwork took {time.time() - start_mn:.2f}s")
+    logger.info("Creating SynthesisNetwork...")
+    start_sn = time.time()
+    synthesis_net = stylegan2.SynthesisNetwork(resolution=config.resolution,
+                                               num_channels=config.img_channels,
+                                               w_dim=config.w_dim,
+                                               fmap_base=config.fmap_base,
+                                               num_fp16_res=num_fp16_res,
+                                               clip_conv=clip_conv,
+                                               dtype=dtype)
+    synthesis_net_vars = synthesis_net.init(init_rng,
+                                            jnp.ones((1, mapping_net.num_ws, config.w_dim)))
+    synthesis_net_params, noise_consts = synthesis_net_vars['params'], synthesis_net_vars['noise_consts']
+    logger.info(f"SynthesisNetwork took {time.time() - start_sn:.2f}s")
+    params_G = frozen_dict.FrozenDict(
+        {'mapping': mapping_net_params,
+         'synthesis': synthesis_net_params}
+    )
+    # Discriminator initialization for training
+    logger.info("Creating Discriminator...")
+    start_d = time.time()
+    discriminator = stylegan2.Discriminator(resolution=config.resolution,
+                                            num_channels=config.img_channels,
+                                            c_dim=config.c_dim,
+                                            mbstd_group_size=config.mbstd_group_size,
+                                            num_fp16_res=num_fp16_res,
+                                            clip_conv=clip_conv,
+                                            dtype=dtype)
+    rng, init_rng = jax.random.split(rng)
+    params_D = discriminator.init(init_rng,
+                                  jnp.ones((1, config.resolution, config.resolution, config.img_channels)),
+                                  jnp.ones((1, config.c_dim)))
+    logger.info(f"Discriminator took {time.time() - start_d:.2f}s")
+    # Exponential average Generator initialization
+    logger.info("Creating Generator EMA...")
+    start_g = time.time()
+    generator_ema = stylegan2.Generator(resolution=config.resolution,
+                                        num_channels=config.img_channels,
+                                        z_dim=config.z_dim,
+                                        c_dim=config.c_dim,
+                                        w_dim=config.w_dim,
+                                        num_ws=int(np.log2(config.resolution)) * 2 - 3,
+                                        num_mapping_layers=8,
+                                        fmap_base=config.fmap_base,
+                                        num_fp16_res=num_fp16_res,
+                                        clip_conv=clip_conv,
+                                        dtype=dtype)
+    params_ema_G = generator_ema.init(init_rng,
+                                      jnp.ones((1, config.z_dim)),
+                                      jnp.ones((1, config.c_dim)))
+    logger.info(f"Took {time.time() - start_g:.2f}s")
+    # --------------------------------------
+    # Initialize States and Optimizers
+    # --------------------------------------
+    logger.info('Initialize states...')
+    tx_G = optax.adam(learning_rate=config.learning_rate, b1=0.0, b2=0.99)
+    tx_D = optax.adam(learning_rate=config.learning_rate, b1=0.0, b2=0.99)
+    state_G = training_utils.TrainStateG.create(apply_fn=None,
+                                                apply_mapping=mapping_net.apply,
+                                                apply_synthesis=synthesis_net.apply,
+                                                params=params_G,
+                                                moving_stats=moving_stats,
+                                                noise_consts=noise_consts,
+                                                tx=tx_G,
+                                                dynamic_scale_main=dynamic_scale_G_main,
+                                                dynamic_scale_reg=dynamic_scale_G_reg,
+                                                epoch=0)
+    state_D = training_utils.TrainStateD.create(apply_fn=discriminator.apply,
+                                                params=params_D,
+                                                tx=tx_D,
+                                                dynamic_scale_main=dynamic_scale_D_main,
+                                                dynamic_scale_reg=dynamic_scale_D_reg,
+                                                epoch=0)
+    # Copy over the parameters from the training generator to the ema generator
+    params_ema_G = training_utils.update_generator_ema(state_G, params_ema_G, config, ema_beta=0)
+    # Running mean of path length for path length regularization
+    pl_mean = jnp.zeros((), dtype=dtype)
+    step = 0
+    epoch_offset = 0
+    best_fid_score = np.inf
+    ckpt_path = None
+    if config.resume_run_id is not None:
+        #  Resume training from existing checkpoint
+        ckpt_path = checkpoint.get_latest_checkpoint(config.ckpt_dir)
+        logger.info(f'Resume training from checkpoint: {ckpt_path}')
+        ckpt = checkpoint.load_checkpoint(ckpt_path)
+        step = ckpt['step']
+        epoch_offset = ckpt['epoch']
+        best_fid_score = ckpt['fid_score']
+        pl_mean = ckpt['pl_mean']
+        state_G = ckpt['state_G']
+        state_D = ckpt['state_D']
+        params_ema_G = ckpt['params_ema_G']
+        config = ckpt['config']
+    elif config.load_from_pkl is not None:
+        # Load checkpoint and start new run
+        ckpt_path = config.load_from_pkl
+        logger.info(f'Load model state from from : {ckpt_path}')
+        ckpt = checkpoint.load_checkpoint(ckpt_path)
+        pl_mean = ckpt['pl_mean']
+        state_G = ckpt['state_G']
+        state_D = ckpt['state_D']
+        params_ema_G = ckpt['params_ema_G']
+    # Replicate states across devices
+    pl_mean = flax.jax_utils.replicate(pl_mean)
+    state_G = flax.jax_utils.replicate(state_G)
+    state_D = flax.jax_utils.replicate(state_D)
+    # --------------------------------------
+    # Precompile train and eval steps
+    # --------------------------------------
+    logger.info('Precompile training steps...')
+    p_main_step_G = jax.pmap(training_steps.main_step_G, axis_name='batch')
+    p_regul_step_G = jax.pmap(functools.partial(training_steps.regul_step_G, config=config), axis_name='batch')
+    p_main_step_D = jax.pmap(training_steps.main_step_D, axis_name='batch')
+    p_regul_step_D = jax.pmap(functools.partial(training_steps.regul_step_D, config=config), axis_name='batch')
+    # --------------------------------------
+    # Training
+    # --------------------------------------
+    logger.info('Start training...')
+    fid_metric = FID(generator_ema, ds_train, config)
+    # Dict to collect training statistics / losses
+    metrics = {}
+    num_imgs_processed = 0
+    num_steps_per_epoch = dataset_info['num_examples'] // (config.batch_size * num_devices)
+    effective_batch_size = config.batch_size * num_devices
+    if config.wandb and jax.process_index() == 0:
+        # do some more logging
+        wandb.config.effective_batch_size = effective_batch_size
+        wandb.config.num_steps_per_epoch = num_steps_per_epoch
+        wandb.config.num_workers = num_workers
+        wandb.config.device_count = num_devices
+        wandb.config.num_examples = dataset_info['num_examples']
+        wandb.config.vm_name = training_utils.get_vm_name()
+    for epoch in range(epoch_offset, config.num_epochs):
+        if config.wandb and jax.process_index() == 0:
+            wandb.log({'training/epochs': epoch}, step=step)
+        for batch in data_pipeline.prefetch(ds_train, config.num_prefetch):
+            assert batch['image'].shape[1] == config.batch_size, f"Mismatched batch (batch size: {config.batch_size}, this batch: {batch['image'].shape[1]})"
+            # pbar.update(num_devices * config.batch_size)
+            iteration_start_time = time.time()
+            if config.c_dim == 0:
+                # No labels in the dataset
+                batch['label'] = None
+            # Create two latent noise vectors and combine them for the style mixing regularization
+            rng, key = jax.random.split(rng)
+            z_latent1 = jax.random.normal(key, (num_local_devices, config.batch_size, config.z_dim), dtype)
+            rng, key = jax.random.split(rng)
+            z_latent2 = jax.random.normal(key, (num_local_devices, config.batch_size, config.z_dim), dtype)
+            # Split PRNGs across devices
+            rkey = jax.random.split(key, num=num_local_devices)
+            mixing_prob = flax.jax_utils.replicate(config.mixing_prob)
+            # --------------------------------------
+            # Update Discriminator
+            # --------------------------------------
+            time_d_start = time.time()
+            state_D, metrics = p_main_step_D(state_G, state_D, batch, z_latent1, z_latent2, metrics, mixing_prob, rkey)
+            time_d_end = time.time()
+            if step % config.D_reg_interval == 0:
+                state_D, metrics = p_regul_step_D(state_D, batch, metrics)
+            # --------------------------------------
+            # Update Generator
+            # --------------------------------------
+            time_g_start = time.time()
+            state_G, metrics = p_main_step_G(state_G, state_D, batch, z_latent1, z_latent2, metrics, mixing_prob, rkey)
+            if step % config.G_reg_interval == 0:
+                H, W = batch['image'].shape[-3], batch['image'].shape[-2]
+                rng, key = jax.random.split(rng)
+                pl_noise = jax.random.normal(key, batch['image'].shape, dtype=dtype) / np.sqrt(H * W)
+                state_G, metrics, pl_mean = p_regul_step_G(state_G, batch, z_latent1, pl_noise, pl_mean, metrics,
+                                                           rng=rkey)
+            params_ema_G = training_utils.update_generator_ema(flax.jax_utils.unreplicate(state_G),
+                                                               params_ema_G,
+                                                               config)
+            time_g_end = time.time()
+            # --------------------------------------
+            # Logging and Checkpointing
+            # --------------------------------------
+            if step % config.save_every == 0 and config.disable_fid:
+                # If FID evaluation is disabled, a checkpoint will be saved every 'save_every' steps.
+                if jax.process_index() == 0:
+                    logger.info('Saving checkpoint...')
+                    checkpoint.save_checkpoint(config.ckpt_dir, state_G, state_D, params_ema_G, pl_mean, config, step,
+                                               epoch)
+            num_imgs_processed += num_devices * config.batch_size
+            if step % config.eval_fid_every == 0 and not config.disable_fid:
+                # If FID evaluation is enabled, only save a checkpoint if FID score is better.
+                if jax.process_index() == 0:
+                    logger.info('Computing FID...')
+                    fid_score = fid_metric.compute_fid(params_ema_G).item()
+                    if config.wandb:
+                        wandb.log({'training/gen/fid': fid_score}, step=step)
+                    logger.info(f'Computed FID: {fid_score:.2f}')
+                    if fid_score < best_fid_score:
+                        best_fid_score = fid_score
+                        logger.info(f'New best FID score ({best_fid_score:.3f}). Saving checkpoint...')
+                        ts = time.time()
+                        checkpoint.save_checkpoint(config.ckpt_dir, state_G, state_D, params_ema_G, pl_mean, config, step, epoch, fid_score=fid_score)
+                        te = time.time()
+                        logger.info(f'... successfully saved checkpoint in {(te-ts)/60:.1f}min')
+            sec_per_kimg = (time.time() - iteration_start_time) / (num_devices * config.batch_size / 1000.0)
+            time_taken_g = time_g_end - time_g_start
+            time_taken_d = time_d_end - time_d_start
+            time_taken_per_step = time.time() - iteration_start_time
+            g_loss = jnp.mean(metrics['G_loss']).item()
+            d_loss = jnp.mean(metrics['D_loss']).item()
+            if config.wandb and jax.process_index() == 0:
+                # wandb logging - happens every step
+                wandb.log({'training/gen/loss': jnp.mean(metrics['G_loss']).item()}, step=step, commit=False)
+                wandb.log({'training/dis/loss': jnp.mean(metrics['D_loss']).item()}, step=step, commit=False)
+                wandb.log({'training/dis/fake_logits': jnp.mean(metrics['fake_logits']).item()}, step=step, commit=False)
+                wandb.log({'training/dis/real_logits': jnp.mean(metrics['real_logits']).item()}, step=step, commit=False)
+                wandb.log({'training/time_taken_g': time_taken_g, 'training/time_taken_d': time_taken_d}, step=step, commit=False)
+                wandb.log({'training/time_taken_per_step': time_taken_per_step}, step=step, commit=False)
+                wandb.log({'training/num_imgs_trained': num_imgs_processed}, step=step, commit=False)
+                wandb.log({'training/sec_per_kimg': sec_per_kimg}, step=step)
+            if step % config.log_every == 0:
+                # console logging - happens every log_every steps
+                logger.info(f'Total steps: {step:>6,} - epoch {epoch:>3,}/{config.num_epochs} @ {step % num_steps_per_epoch:>6,}/{num_steps_per_epoch:,} - G loss: {g_loss:.5f} - D loss: {d_loss:.5f} - sec/kimg: {sec_per_kimg:.2f}s - time per step: {time_taken_per_step:.3f}s')
+            if step % config.generate_samples_every == 0 and config.wandb and jax.process_index() == 0:
+                # Generate training images
+                train_snapshot = training_utils.get_training_snapshot(
+                    image_real=flax.jax_utils.unreplicate(batch['image']),
+                    image_gen=flax.jax_utils.unreplicate(metrics['image_gen']),
+                    max_num=10
+                )
+                wandb.log({'training/snapshot': wandb.Image(train_snapshot)}, commit=False, step=step)
+                # Generate evaluation images
+                labels = None if config.c_dim == 0 else batch['label'][0]
+                image_gen_eval = training_steps.eval_step_G(
+                    generator_ema, params=params_ema_G,
+                    z_latent=z_latent1[0],
+                    labels=labels,
+                    truncation=1
+                )
+                image_gen_eval_trunc = training_steps.eval_step_G(
+                    generator_ema,
+                    params=params_ema_G,
+                    z_latent=z_latent1[0],
+                    labels=labels,
+                    truncation=0.5
+                )
+                eval_snapshot = training_utils.get_eval_snapshot(image=image_gen_eval, max_num=10)
+                eval_snapshot_trunc = training_utils.get_eval_snapshot(image=image_gen_eval_trunc, max_num=10)
+                wandb.log({'eval/snapshot': wandb.Image(eval_snapshot)}, commit=False, step=step)
+                wandb.log({'eval/snapshot_trunc': wandb.Image(eval_snapshot_trunc)}, step=step)
+            step += 1
+        # Sync moving stats across devices
+        state_G = training_utils.sync_moving_stats(state_G)
+        # Sync moving average of path length mean (Generator regularization)
+        pl_mean = jax.pmap(lambda x: jax.lax.pmean(x, axis_name='batch'), axis_name='batch')(pl_mean)

training_steps.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import jax
+import jax.numpy as jnp
+import functools
+def main_step_G(state_G, state_D, batch, z_latent1, z_latent2, metrics, mixing_prob, rng):
+    def loss_fn(params):
+        w_latent1, new_state_G = state_G.apply_mapping({'params': params['mapping'], 'moving_stats': state_G.moving_stats},
+                                                       z_latent1,
+                                                       batch['label'],
+                                                       mutable=['moving_stats'])
+        w_latent2 = state_G.apply_mapping({'params': params['mapping'], 'moving_stats': state_G.moving_stats},
+                                          z_latent2,
+                                          batch['label'],
+                                          skip_w_avg_update=True)
+        # style mixing
+        cutoff_rng, layer_select_rng, synth_rng = jax.random.split(rng, num=3)
+        num_layers = w_latent1.shape[1]
+        layer_idx = jnp.arange(num_layers)[jnp.newaxis, :, jnp.newaxis]
+        mixing_cutoff = jax.lax.cond(jax.random.uniform(cutoff_rng, (), minval=0.0, maxval=1.0) < mixing_prob,
+                                     lambda _: jax.random.randint(layer_select_rng, (), 1, num_layers, dtype=jnp.int32),
+                                     lambda _: num_layers,
+                                     operand=None)
+        mixing_cond = jnp.broadcast_to(layer_idx < mixing_cutoff, w_latent1.shape)
+        w_latent = jnp.where(mixing_cond, w_latent1, w_latent2)
+        image_gen = state_G.apply_synthesis({'params': params['synthesis'], 'noise_consts': state_G.noise_consts},
+                                            w_latent,
+                                            rng=synth_rng)
+        fake_logits = state_D.apply_fn(state_D.params, image_gen, batch['label'])
+        loss = jnp.mean(jax.nn.softplus(-fake_logits))
+        return loss, (fake_logits, image_gen, new_state_G)
+    dynamic_scale = state_G.dynamic_scale_main
+    if dynamic_scale:
+        grad_fn = dynamic_scale.value_and_grad(loss_fn, has_aux=True, axis_name='batch')
+        dynamic_scale, is_fin, aux, grads = grad_fn(state_G.params)
+    else:
+        grad_fn = jax.value_and_grad(loss_fn, has_aux=True)
+        aux, grads = grad_fn(state_G.params)
+        grads = jax.lax.pmean(grads, axis_name='batch')
+    loss = aux[0]
+    _, image_gen, new_state = aux[1]
+    metrics['G_loss'] = loss
+    metrics['image_gen'] = image_gen
+    new_state_G = state_G.apply_gradients(grads=grads, moving_stats=new_state['moving_stats'])
+    if dynamic_scale:
+        new_state_G = new_state_G.replace(opt_state=jax.tree_multimap(functools.partial(jnp.where, is_fin),
+                                                                      new_state_G.opt_state,
+                                                                      state_G.opt_state),
+                                          params=jax.tree_multimap(functools.partial(jnp.where, is_fin),
+                                                                   new_state_G.params,
+                                                                   state_G.params))
+        metrics['G_scale'] = dynamic_scale.scale
+    return new_state_G, metrics
+def regul_step_G(state_G, batch, z_latent, pl_noise, pl_mean, metrics, config, rng):
+    def loss_fn(params):
+        w_latent, new_state_G = state_G.apply_mapping({'params': params['mapping'], 'moving_stats': state_G.moving_stats},
+                                                      z_latent,
+                                                      batch['label'],
+                                                      mutable=['moving_stats'])
+        pl_grads = jax.grad(lambda *args: jnp.sum(state_G.apply_synthesis(*args) * pl_noise), argnums=1)({'params': params['synthesis'],
+                                                                                                          'noise_consts': state_G.noise_consts},
+                                                                                                          w_latent,
+                                                                                                          'random',
+                                                                                                          rng)
+        pl_lengths = jnp.sqrt(jnp.mean(jnp.sum(jnp.square(pl_grads), axis=2), axis=1))
+        pl_mean_new = pl_mean + config.pl_decay * (jnp.mean(pl_lengths) - pl_mean)
+        pl_penalty = jnp.square(pl_lengths - pl_mean_new) * config.pl_weight
+        loss = jnp.mean(pl_penalty) * config.G_reg_interval
+        return loss, pl_mean_new
+    dynamic_scale = state_G.dynamic_scale_reg
+    if dynamic_scale:
+        grad_fn = dynamic_scale.value_and_grad(loss_fn, has_aux=True)
+        dynamic_scale, is_fin, aux, grads = grad_fn(state_G.params)
+    else:
+        grad_fn = jax.value_and_grad(loss_fn, has_aux=True)
+        aux, grads = grad_fn(state_G.params)
+        grads = jax.lax.pmean(grads, axis_name='batch')
+    loss = aux[0]
+    pl_mean_new = aux[1]
+    metrics['G_regul_loss'] = loss
+    new_state_G = state_G.apply_gradients(grads=grads)
+    if dynamic_scale:
+        new_state_G = new_state_G.replace(opt_state=jax.tree_multimap(functools.partial(jnp.where, is_fin),
+                                                                      new_state_G.opt_state,
+                                                                      state_G.opt_state),
+                                          params=jax.tree_multimap(functools.partial(jnp.where, is_fin),
+                                                                   new_state_G.params,
+                                                                   state_G.params))
+        metrics['G_regul_scale'] = dynamic_scale.scale
+    return new_state_G, metrics, pl_mean_new
+def main_step_D(state_G, state_D, batch, z_latent1, z_latent2, metrics, mixing_prob, rng):
+    def loss_fn(params):
+        w_latent1 = state_G.apply_mapping({'params': state_G.params['mapping'], 'moving_stats': state_G.moving_stats},
+                                         z_latent1,
+                                         batch['label'],
+                                         train=False)
+        w_latent2 = state_G.apply_mapping({'params': state_G.params['mapping'], 'moving_stats': state_G.moving_stats},
+                                          z_latent2,
+                                          batch['label'],
+                                          train=False)
+        # style mixing
+        cutoff_rng, layer_select_rng, synth_rng = jax.random.split(rng, num=3)
+        num_layers = w_latent1.shape[1]
+        layer_idx = jnp.arange(num_layers)[jnp.newaxis, :, jnp.newaxis]
+        mixing_cutoff = jax.lax.cond(jax.random.uniform(cutoff_rng, (), minval=0.0, maxval=1.0) < mixing_prob,
+                                     lambda _: jax.random.randint(layer_select_rng, (), 1, num_layers, dtype=jnp.int32),
+                                     lambda _: num_layers,
+                                     operand=None)
+        mixing_cond = jnp.broadcast_to(layer_idx < mixing_cutoff, w_latent1.shape)
+        w_latent = jnp.where(mixing_cond, w_latent1, w_latent2)
+        image_gen = state_G.apply_synthesis({'params': state_G.params['synthesis'], 'noise_consts': state_G.noise_consts},
+                                            w_latent,
+                                            rng=synth_rng)
+        fake_logits = state_D.apply_fn(params, image_gen, batch['label'])
+        real_logits = state_D.apply_fn(params, batch['image'], batch['label'])
+        loss_fake = jax.nn.softplus(fake_logits)
+        loss_real = jax.nn.softplus(-real_logits)
+        loss = jnp.mean(loss_fake + loss_real)
+        return loss, (fake_logits, real_logits)
+    dynamic_scale = state_D.dynamic_scale_main
+    if dynamic_scale:
+        grad_fn = dynamic_scale.value_and_grad(loss_fn, has_aux=True)
+        dynamic_scale, is_fin, aux, grads = grad_fn(state_D.params)
+    else:
+        grad_fn = jax.value_and_grad(loss_fn, has_aux=True)
+        aux, grads = grad_fn(state_D.params)
+        grads = jax.lax.pmean(grads, axis_name='batch')
+    loss = aux[0]
+    fake_logits, real_logits = aux[1]
+    metrics['D_loss'] = loss
+    metrics['fake_logits'] = jnp.mean(fake_logits)
+    metrics['real_logits'] = jnp.mean(real_logits)
+    new_state_D = state_D.apply_gradients(grads=grads)
+    if dynamic_scale:
+        new_state_D = new_state_D.replace(opt_state=jax.tree_multimap(functools.partial(jnp.where, is_fin),
+                                                                      new_state_D.opt_state,
+                                                                      state_D.opt_state),
+                                          params=jax.tree_multimap(functools.partial(jnp.where, is_fin),
+                                                                   new_state_D.params,
+                                                                   state_D.params))
+        metrics['D_scale'] = dynamic_scale.scale
+    return new_state_D, metrics
+def regul_step_D(state_D, batch, metrics, config):
+    def loss_fn(params):
+        r1_grads = jax.grad(lambda *args: jnp.sum(state_D.apply_fn(*args)), argnums=1)(params, batch['image'], batch['label'])
+        r1_penalty = jnp.sum(jnp.square(r1_grads), axis=(1, 2, 3)) * (config.r1_gamma / 2) * config.D_reg_interval
+        loss = jnp.mean(r1_penalty)
+        return loss, None
+    dynamic_scale = state_D.dynamic_scale_reg
+    if dynamic_scale:
+        grad_fn = dynamic_scale.value_and_grad(loss_fn, has_aux=True)
+        dynamic_scale, is_fin, aux, grads = grad_fn(state_D.params)
+    else:
+        grad_fn = jax.value_and_grad(loss_fn, has_aux=True)
+        aux, grads = grad_fn(state_D.params)
+        grads = jax.lax.pmean(grads, axis_name='batch')
+    loss = aux[0]
+    metrics['D_regul_loss'] = loss
+    new_state_D = state_D.apply_gradients(grads=grads)
+    if dynamic_scale:
+        new_state_D = new_state_D.replace(opt_state=jax.tree_multimap(functools.partial(jnp.where, is_fin),
+                                                                      new_state_D.opt_state,
+                                                                      state_D.opt_state),
+                                          params=jax.tree_multimap(functools.partial(jnp.where, is_fin),
+                                                                   new_state_D.params,
+                                                                   state_D.params))
+        metrics['D_regul_scale'] = dynamic_scale.scale
+    return new_state_D, metrics
+def eval_step_G(generator, params, z_latent, labels, truncation):
+    image_gen = generator.apply(params, z_latent, labels, truncation_psi=truncation, train=False, noise_mode='const')
+    return image_gen

training_utils.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import jax
+import jax.numpy as jnp
+from jaxlib.xla_extension import DeviceArray
+import flax
+from flax.optim import dynamic_scale as dynamic_scale_lib
+from flax.core import frozen_dict
+from flax.training import train_state
+from flax import struct
+import numpy as np
+from PIL import Image
+from urllib.request import Request, urlopen
+import urllib.error
+from typing import Any, Callable
+def sync_moving_stats(state):
+    """
+    Sync moving statistics across devices.
+    Args:
+        state (train_state.TrainState): Training state.
+    Returns:
+        (train_state.TrainState): Updated training state.
+    """
+    cross_replica_mean = jax.pmap(lambda x: jax.lax.pmean(x, 'x'), 'x')
+    return state.replace(moving_stats=cross_replica_mean(state.moving_stats))
+def update_generator_ema(state_G, params_ema_G, config, ema_beta=None):
+    """
+    Update exponentially moving average of the generator weights.
+    Moving stats and noise constants will be copied over.
+    Args:
+        state_G (train_state.TrainState): Generator state.
+        params_ema_G (frozen_dict.FrozenDict): Parameters of the ema generator.
+        config (Any): Config object.
+        ema_beta (float): Beta parameter of the ema. If None, will be computed
+                          from 'ema_nimg' and 'batch_size'.
+    Returns:
+        (frozen_dict.FrozenDict): Updates parameters of the ema generator.
+    """
+    def _update_ema(src, trg, beta):
+        for name, src_child in src.items():
+            if isinstance(src_child, DeviceArray):
+                trg[name] = src[name] + ema_beta * (trg[name] - src[name])
+            else:
+                _update_ema(src_child, trg[name], beta)
+    if ema_beta is None:
+        ema_nimg = config.ema_kimg * 1000
+        ema_beta = 0.5 ** (config.batch_size / max(ema_nimg, 1e-8))
+    params_ema_G = params_ema_G.unfreeze()
+    # Copy over moving stats
+    params_ema_G['moving_stats']['mapping_network'] = state_G.moving_stats
+    params_ema_G['noise_consts']['synthesis_network'] = state_G.noise_consts
+    # Update exponentially moving average of the trainable parameters
+    _update_ema(state_G.params['mapping'], params_ema_G['params']['mapping_network'], ema_beta)
+    _update_ema(state_G.params['synthesis'], params_ema_G['params']['synthesis_network'], ema_beta)
+    params_ema_G = frozen_dict.freeze(params_ema_G)
+    return params_ema_G
+class TrainStateG(train_state.TrainState):
+    """
+    Generator train state for a single Optax optimizer.
+    Attributes:
+        apply_mapping (Callable): Apply function of the Mapping Network.
+        apply_synthesis (Callable): Apply function of the Synthesis Network.
+        dynamic_scale (dynamic_scale_lib.DynamicScale): Dynamic loss scaling for mixed precision gradients.
+        epoch (int): Current epoch.
+        moving_stats (Any): Moving average of the latent W.
+        noise_consts (Any): Noise constants from synthesis layers.
+    """
+    apply_mapping: Callable = struct.field(pytree_node=False)
+    apply_synthesis: Callable = struct.field(pytree_node=False)
+    dynamic_scale_main: dynamic_scale_lib.DynamicScale
+    dynamic_scale_reg: dynamic_scale_lib.DynamicScale
+    epoch: int
+    moving_stats: Any=None
+    noise_consts: Any=None
+class TrainStateD(train_state.TrainState):
+    """
+    Discriminator train state for a single Optax optimizer.
+    Attributes:
+        dynamic_scale (dynamic_scale_lib.DynamicScale): Dynamic loss scaling for mixed precision gradients.
+        epoch (int): Current epoch.
+    """
+    dynamic_scale_main: dynamic_scale_lib.DynamicScale
+    dynamic_scale_reg: dynamic_scale_lib.DynamicScale
+    epoch: int
+def get_training_snapshot(image_real, image_gen, max_num=10):
+    """
+    Creates a snapshot of generated images and real images.
+    Args:
+        images_real (DeviceArray): Batch of real images, shape [B, H, W, C].
+        images_gen (DeviceArray): Batch of generated images, shape [B, H, W, C].
+        max_num (int): Maximum number of images used for snapshot.
+    Returns:
+        (PIL.Image): Training snapshot. Top row: generated images, bottom row: real images.
+    """
+    if image_real.shape[0] > max_num:
+        image_real = image_real[:max_num]
+    if image_gen.shape[0] > max_num:
+        image_gen = image_gen[:max_num]
+    image_real = jnp.split(image_real, image_real.shape[0], axis=0)
+    image_gen = jnp.split(image_gen, image_gen.shape[0], axis=0)
+    image_real = [jnp.squeeze(x, axis=0) for x in image_real]
+    image_gen = [jnp.squeeze(x, axis=0) for x in image_gen]
+    image_real = jnp.concatenate(image_real, axis=1)
+    image_gen = jnp.concatenate(image_gen, axis=1)
+    image_gen = (image_gen - np.min(image_gen)) / (np.max(image_gen) - np.min(image_gen))
+    image_real = (image_real - np.min(image_real)) / (np.max(image_real) - np.min(image_real))
+    image = jnp.concatenate((image_gen, image_real), axis=0)
+    image = np.uint8(image * 255)
+    if image.shape[-1] == 1:
+        image = np.repeat(image, 3, axis=-1)
+    return Image.fromarray(image)
+def get_eval_snapshot(image, max_num=10):
+    """
+    Creates a snapshot of generated images.
+    Args:
+        image (DeviceArray): Generated images, shape [B, H, W, C].
+    Returns:
+        (PIL.Image): Eval snapshot.
+    """
+    if image.shape[0] > max_num:
+        image = image[:max_num]
+    image = jnp.split(image, image.shape[0], axis=0)
+    image = [jnp.squeeze(x, axis=0) for x in image]
+    image = jnp.concatenate(image, axis=1)
+    image = (image - np.min(image)) / (np.max(image) - np.min(image))
+    image = np.uint8(image * 255)
+    if image.shape[-1] == 1:
+        image = np.repeat(image, 3, axis=-1)
+    return Image.fromarray(image)
+def get_vm_name():
+    gcp_metadata_url = "http://metadata.google.internal/computeMetadata/v1/instance/attributes/instance-id"
+    req = Request(gcp_metadata_url)
+    req.add_header('Metadata-Flavor', 'Google')
+    instance_id = None
+    try:
+        with urlopen(req) as url:
+            instance_id = url.read().decode()
+    except urllib.error.URLError:
+        # metadata.google.internal not reachable: use dev
+        pass
+    return instance_id