diff --git a/.gitattributes b/.gitattributes index 132cac7ef1b0de9c1feb6bfbc192f630f5553abc..2bf52903f3adde0094eb2260b218c0c03498943f 100644 --- a/.gitattributes +++ b/.gitattributes @@ -25,7 +25,6 @@ *.safetensors filter=lfs diff=lfs merge=lfs -text saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text *.tflite filter=lfs diff=lfs merge=lfs -text *.tgz filter=lfs diff=lfs merge=lfs -text *.wasm filter=lfs diff=lfs merge=lfs -text @@ -33,7 +32,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text -inpaint/inpaint_v26.fooocus.patch filter=lfs diff=lfs merge=lfs -text -sapiens/seg/sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript_bf16.pt2 filter=lfs diff=lfs merge=lfs -text -sapiens/seg/sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2 filter=lfs diff=lfs merge=lfs -text -unet/flux1-dev-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text +unet/svdq-int4-flux.1-fill-dev/demo.jpg filter=lfs diff=lfs merge=lfs -text +BiRefNet/RMBG-2.0/t4.png filter=lfs diff=lfs merge=lfs -text +unet/svdq-int4-flux.1-fill-dev/example.png filter=lfs diff=lfs merge=lfs -text +BiRefNet/RMBG-2.0/collage5.png filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..d0e300c55f010e9077e7befa1bb4df35ccf0a818 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.ipynb_checkpoints +test.ipynb \ No newline at end of file diff --git a/BiRefNet/RMBG-2.0/.gitattributes b/BiRefNet/RMBG-2.0/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..b87521d95669729e736ec68afe701b5fdb690083 --- /dev/null +++ b/BiRefNet/RMBG-2.0/.gitattributes @@ -0,0 +1,40 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +model_not_working.not_safetensors filter=lfs diff=lfs merge=lfs -text +t4.png filter=lfs diff=lfs merge=lfs -text +collage.png filter=lfs diff=lfs merge=lfs -text +collage3.png filter=lfs diff=lfs merge=lfs -text +collage5.png filter=lfs diff=lfs merge=lfs -text diff --git a/BiRefNet/RMBG-2.0/BiRefNet_config.py b/BiRefNet/RMBG-2.0/BiRefNet_config.py new file mode 100644 index 0000000000000000000000000000000000000000..37c8ac58bec2f52dac34204978a7b61b69e3da76 --- /dev/null +++ b/BiRefNet/RMBG-2.0/BiRefNet_config.py @@ -0,0 +1,11 @@ +from transformers import PretrainedConfig + +class BiRefNetConfig(PretrainedConfig): + model_type = "SegformerForSemanticSegmentation" + def __init__( + self, + bb_pretrained=False, + **kwargs + ): + self.bb_pretrained = bb_pretrained + super().__init__(**kwargs) diff --git a/BiRefNet/RMBG-2.0/birefnet.py b/BiRefNet/RMBG-2.0/birefnet.py new file mode 100644 index 0000000000000000000000000000000000000000..d68d7d39d6b4537d3dac2be8dfd7d63b72d4b83d --- /dev/null +++ b/BiRefNet/RMBG-2.0/birefnet.py @@ -0,0 +1,2244 @@ +### config.py + +import os +import math +from transformers import PretrainedConfig + +class Config(PretrainedConfig): + def __init__(self) -> None: + # PATH settings + self.sys_home_dir = os.path.expanduser('~') # Make up your file system as: SYS_HOME_DIR/codes/dis/BiRefNet, SYS_HOME_DIR/datasets/dis/xx, SYS_HOME_DIR/weights/xx + + # TASK settings + self.task = ['DIS5K', 'COD', 'HRSOD', 'DIS5K+HRSOD+HRS10K', 'P3M-10k'][0] + self.training_set = { + 'DIS5K': ['DIS-TR', 'DIS-TR+DIS-TE1+DIS-TE2+DIS-TE3+DIS-TE4'][0], + 'COD': 'TR-COD10K+TR-CAMO', + 'HRSOD': ['TR-DUTS', 'TR-HRSOD', 'TR-UHRSD', 'TR-DUTS+TR-HRSOD', 'TR-DUTS+TR-UHRSD', 'TR-HRSOD+TR-UHRSD', 'TR-DUTS+TR-HRSOD+TR-UHRSD'][5], + 'DIS5K+HRSOD+HRS10K': 'DIS-TE1+DIS-TE2+DIS-TE3+DIS-TE4+DIS-TR+TE-HRS10K+TE-HRSOD+TE-UHRSD+TR-HRS10K+TR-HRSOD+TR-UHRSD', # leave DIS-VD for evaluation. + 'P3M-10k': 'TR-P3M-10k', + }[self.task] + self.prompt4loc = ['dense', 'sparse'][0] + + # Faster-Training settings + self.load_all = True + self.compile = True # 1. Trigger CPU memory leak in some extend, which is an inherent problem of PyTorch. + # Machines with > 70GB CPU memory can run the whole training on DIS5K with default setting. + # 2. Higher PyTorch version may fix it: https://github.com/pytorch/pytorch/issues/119607. + # 3. But compile in Pytorch > 2.0.1 seems to bring no acceleration for training. + self.precisionHigh = True + + # MODEL settings + self.ms_supervision = True + self.out_ref = self.ms_supervision and True + self.dec_ipt = True + self.dec_ipt_split = True + self.cxt_num = [0, 3][1] # multi-scale skip connections from encoder + self.mul_scl_ipt = ['', 'add', 'cat'][2] + self.dec_att = ['', 'ASPP', 'ASPPDeformable'][2] + self.squeeze_block = ['', 'BasicDecBlk_x1', 'ResBlk_x4', 'ASPP_x3', 'ASPPDeformable_x3'][1] + self.dec_blk = ['BasicDecBlk', 'ResBlk', 'HierarAttDecBlk'][0] + + # TRAINING settings + self.batch_size = 4 + self.IoU_finetune_last_epochs = [ + 0, + { + 'DIS5K': -50, + 'COD': -20, + 'HRSOD': -20, + 'DIS5K+HRSOD+HRS10K': -20, + 'P3M-10k': -20, + }[self.task] + ][1] # choose 0 to skip + self.lr = (1e-4 if 'DIS5K' in self.task else 1e-5) * math.sqrt(self.batch_size / 4) # DIS needs high lr to converge faster. Adapt the lr linearly + self.size = 1024 + self.num_workers = max(4, self.batch_size) # will be decrease to min(it, batch_size) at the initialization of the data_loader + + # Backbone settings + self.bb = [ + 'vgg16', 'vgg16bn', 'resnet50', # 0, 1, 2 + 'swin_v1_t', 'swin_v1_s', # 3, 4 + 'swin_v1_b', 'swin_v1_l', # 5-bs9, 6-bs4 + 'pvt_v2_b0', 'pvt_v2_b1', # 7, 8 + 'pvt_v2_b2', 'pvt_v2_b5', # 9-bs10, 10-bs5 + ][6] + self.lateral_channels_in_collection = { + 'vgg16': [512, 256, 128, 64], 'vgg16bn': [512, 256, 128, 64], 'resnet50': [1024, 512, 256, 64], + 'pvt_v2_b2': [512, 320, 128, 64], 'pvt_v2_b5': [512, 320, 128, 64], + 'swin_v1_b': [1024, 512, 256, 128], 'swin_v1_l': [1536, 768, 384, 192], + 'swin_v1_t': [768, 384, 192, 96], 'swin_v1_s': [768, 384, 192, 96], + 'pvt_v2_b0': [256, 160, 64, 32], 'pvt_v2_b1': [512, 320, 128, 64], + }[self.bb] + if self.mul_scl_ipt == 'cat': + self.lateral_channels_in_collection = [channel * 2 for channel in self.lateral_channels_in_collection] + self.cxt = self.lateral_channels_in_collection[1:][::-1][-self.cxt_num:] if self.cxt_num else [] + + # MODEL settings - inactive + self.lat_blk = ['BasicLatBlk'][0] + self.dec_channels_inter = ['fixed', 'adap'][0] + self.refine = ['', 'itself', 'RefUNet', 'Refiner', 'RefinerPVTInChannels4'][0] + self.progressive_ref = self.refine and True + self.ender = self.progressive_ref and False + self.scale = self.progressive_ref and 2 + self.auxiliary_classification = False # Only for DIS5K, where class labels are saved in `dataset.py`. + self.refine_iteration = 1 + self.freeze_bb = False + self.model = [ + 'BiRefNet', + ][0] + if self.dec_blk == 'HierarAttDecBlk': + self.batch_size = 2 ** [0, 1, 2, 3, 4][2] + + # TRAINING settings - inactive + self.preproc_methods = ['flip', 'enhance', 'rotate', 'pepper', 'crop'][:4] + self.optimizer = ['Adam', 'AdamW'][1] + self.lr_decay_epochs = [1e5] # Set to negative N to decay the lr in the last N-th epoch. + self.lr_decay_rate = 0.5 + # Loss + self.lambdas_pix_last = { + # not 0 means opening this loss + # original rate -- 1 : 30 : 1.5 : 0.2, bce x 30 + 'bce': 30 * 1, # high performance + 'iou': 0.5 * 1, # 0 / 255 + 'iou_patch': 0.5 * 0, # 0 / 255, win_size = (64, 64) + 'mse': 150 * 0, # can smooth the saliency map + 'triplet': 3 * 0, + 'reg': 100 * 0, + 'ssim': 10 * 1, # help contours, + 'cnt': 5 * 0, # help contours + 'structure': 5 * 0, # structure loss from codes of MVANet. A little improvement on DIS-TE[1,2,3], a bit more decrease on DIS-TE4. + } + self.lambdas_cls = { + 'ce': 5.0 + } + # Adv + self.lambda_adv_g = 10. * 0 # turn to 0 to avoid adv training + self.lambda_adv_d = 3. * (self.lambda_adv_g > 0) + + # PATH settings - inactive + self.data_root_dir = os.path.join(self.sys_home_dir, 'datasets/dis') + self.weights_root_dir = os.path.join(self.sys_home_dir, 'weights') + self.weights = { + 'pvt_v2_b2': os.path.join(self.weights_root_dir, 'pvt_v2_b2.pth'), + 'pvt_v2_b5': os.path.join(self.weights_root_dir, ['pvt_v2_b5.pth', 'pvt_v2_b5_22k.pth'][0]), + 'swin_v1_b': os.path.join(self.weights_root_dir, ['swin_base_patch4_window12_384_22kto1k.pth', 'swin_base_patch4_window12_384_22k.pth'][0]), + 'swin_v1_l': os.path.join(self.weights_root_dir, ['swin_large_patch4_window12_384_22kto1k.pth', 'swin_large_patch4_window12_384_22k.pth'][0]), + 'swin_v1_t': os.path.join(self.weights_root_dir, ['swin_tiny_patch4_window7_224_22kto1k_finetune.pth'][0]), + 'swin_v1_s': os.path.join(self.weights_root_dir, ['swin_small_patch4_window7_224_22kto1k_finetune.pth'][0]), + 'pvt_v2_b0': os.path.join(self.weights_root_dir, ['pvt_v2_b0.pth'][0]), + 'pvt_v2_b1': os.path.join(self.weights_root_dir, ['pvt_v2_b1.pth'][0]), + } + + # Callbacks - inactive + self.verbose_eval = True + self.only_S_MAE = False + self.use_fp16 = False # Bugs. It may cause nan in training. + self.SDPA_enabled = False # Bugs. Slower and errors occur in multi-GPUs + + # others + self.device = [0, 'cpu'][0] # .to(0) == .to('cuda:0') + + self.batch_size_valid = 1 + self.rand_seed = 7 + # run_sh_file = [f for f in os.listdir('.') if 'train.sh' == f] + [os.path.join('..', f) for f in os.listdir('..') if 'train.sh' == f] + # with open(run_sh_file[0], 'r') as f: + # lines = f.readlines() + # self.save_last = int([l.strip() for l in lines if '"{}")'.format(self.task) in l and 'val_last=' in l][0].split('val_last=')[-1].split()[0]) + # self.save_step = int([l.strip() for l in lines if '"{}")'.format(self.task) in l and 'step=' in l][0].split('step=')[-1].split()[0]) + # self.val_step = [0, self.save_step][0] + + def print_task(self) -> None: + # Return task for choosing settings in shell scripts. + print(self.task) + + + +### models/backbones/pvt_v2.py + +import torch +import torch.nn as nn +from functools import partial + +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from timm.models.registry import register_model + +import math + +# from config import Config + +# config = Config() + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.dwconv = DWConv(hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x, H, W): + x = self.fc1(x) + x = self.dwconv(x, H, W) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Module): + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1): + super().__init__() + assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}." + + self.dim = dim + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + self.q = nn.Linear(dim, dim, bias=qkv_bias) + self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias) + self.attn_drop_prob = attn_drop + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + self.sr_ratio = sr_ratio + if sr_ratio > 1: + self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio) + self.norm = nn.LayerNorm(dim) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x, H, W): + B, N, C = x.shape + q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3) + + if self.sr_ratio > 1: + x_ = x.permute(0, 2, 1).reshape(B, C, H, W) + x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1) + x_ = self.norm(x_) + kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + else: + kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + k, v = kv[0], kv[1] + + if config.SDPA_enabled: + x = torch.nn.functional.scaled_dot_product_attention( + q, k, v, + attn_mask=None, dropout_p=self.attn_drop_prob, is_causal=False + ).transpose(1, 2).reshape(B, N, C) + else: + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + + return x + + +class Block(nn.Module): + + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1): + super().__init__() + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, + attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x, H, W): + x = x + self.drop_path(self.attn(self.norm1(x), H, W)) + x = x + self.drop_path(self.mlp(self.norm2(x), H, W)) + + return x + + +class OverlapPatchEmbed(nn.Module): + """ Image to Patch Embedding + """ + + def __init__(self, img_size=224, patch_size=7, stride=4, in_channels=3, embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + + self.img_size = img_size + self.patch_size = patch_size + self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1] + self.num_patches = self.H * self.W + self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=stride, + padding=(patch_size[0] // 2, patch_size[1] // 2)) + self.norm = nn.LayerNorm(embed_dim) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x): + x = self.proj(x) + _, _, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + + return x, H, W + + +class PyramidVisionTransformerImpr(nn.Module): + def __init__(self, img_size=224, patch_size=16, in_channels=3, num_classes=1000, embed_dims=[64, 128, 256, 512], + num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0., + attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm, + depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1]): + super().__init__() + self.num_classes = num_classes + self.depths = depths + + # patch_embed + self.patch_embed1 = OverlapPatchEmbed(img_size=img_size, patch_size=7, stride=4, in_channels=in_channels, + embed_dim=embed_dims[0]) + self.patch_embed2 = OverlapPatchEmbed(img_size=img_size // 4, patch_size=3, stride=2, in_channels=embed_dims[0], + embed_dim=embed_dims[1]) + self.patch_embed3 = OverlapPatchEmbed(img_size=img_size // 8, patch_size=3, stride=2, in_channels=embed_dims[1], + embed_dim=embed_dims[2]) + self.patch_embed4 = OverlapPatchEmbed(img_size=img_size // 16, patch_size=3, stride=2, in_channels=embed_dims[2], + embed_dim=embed_dims[3]) + + # transformer encoder + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule + cur = 0 + self.block1 = nn.ModuleList([Block( + dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, + sr_ratio=sr_ratios[0]) + for i in range(depths[0])]) + self.norm1 = norm_layer(embed_dims[0]) + + cur += depths[0] + self.block2 = nn.ModuleList([Block( + dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, + sr_ratio=sr_ratios[1]) + for i in range(depths[1])]) + self.norm2 = norm_layer(embed_dims[1]) + + cur += depths[1] + self.block3 = nn.ModuleList([Block( + dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, + sr_ratio=sr_ratios[2]) + for i in range(depths[2])]) + self.norm3 = norm_layer(embed_dims[2]) + + cur += depths[2] + self.block4 = nn.ModuleList([Block( + dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, + sr_ratio=sr_ratios[3]) + for i in range(depths[3])]) + self.norm4 = norm_layer(embed_dims[3]) + + # classification head + # self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity() + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = 1 + #load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger) + + def reset_drop_path(self, drop_path_rate): + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths))] + cur = 0 + for i in range(self.depths[0]): + self.block1[i].drop_path.drop_prob = dpr[cur + i] + + cur += self.depths[0] + for i in range(self.depths[1]): + self.block2[i].drop_path.drop_prob = dpr[cur + i] + + cur += self.depths[1] + for i in range(self.depths[2]): + self.block3[i].drop_path.drop_prob = dpr[cur + i] + + cur += self.depths[2] + for i in range(self.depths[3]): + self.block4[i].drop_path.drop_prob = dpr[cur + i] + + def freeze_patch_emb(self): + self.patch_embed1.requires_grad = False + + @torch.jit.ignore + def no_weight_decay(self): + return {'pos_embed1', 'pos_embed2', 'pos_embed3', 'pos_embed4', 'cls_token'} # has pos_embed may be better + + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=''): + self.num_classes = num_classes + self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + + def forward_features(self, x): + B = x.shape[0] + outs = [] + + # stage 1 + x, H, W = self.patch_embed1(x) + for i, blk in enumerate(self.block1): + x = blk(x, H, W) + x = self.norm1(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + # stage 2 + x, H, W = self.patch_embed2(x) + for i, blk in enumerate(self.block2): + x = blk(x, H, W) + x = self.norm2(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + # stage 3 + x, H, W = self.patch_embed3(x) + for i, blk in enumerate(self.block3): + x = blk(x, H, W) + x = self.norm3(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + # stage 4 + x, H, W = self.patch_embed4(x) + for i, blk in enumerate(self.block4): + x = blk(x, H, W) + x = self.norm4(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + return outs + + # return x.mean(dim=1) + + def forward(self, x): + x = self.forward_features(x) + # x = self.head(x) + + return x + + +class DWConv(nn.Module): + def __init__(self, dim=768): + super(DWConv, self).__init__() + self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim) + + def forward(self, x, H, W): + B, N, C = x.shape + x = x.transpose(1, 2).view(B, C, H, W).contiguous() + x = self.dwconv(x) + x = x.flatten(2).transpose(1, 2) + + return x + + +def _conv_filter(state_dict, patch_size=16): + """ convert patch embedding weight from manual patchify + linear proj to conv""" + out_dict = {} + for k, v in state_dict.items(): + if 'patch_embed.proj.weight' in k: + v = v.reshape((v.shape[0], 3, patch_size, patch_size)) + out_dict[k] = v + + return out_dict + + +## @register_model +class pvt_v2_b0(PyramidVisionTransformerImpr): + def __init__(self, **kwargs): + super(pvt_v2_b0, self).__init__( + patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1) + + + +## @register_model +class pvt_v2_b1(PyramidVisionTransformerImpr): + def __init__(self, **kwargs): + super(pvt_v2_b1, self).__init__( + patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1) + +## @register_model +class pvt_v2_b2(PyramidVisionTransformerImpr): + def __init__(self, in_channels=3, **kwargs): + super(pvt_v2_b2, self).__init__( + patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1, in_channels=in_channels) + +## @register_model +class pvt_v2_b3(PyramidVisionTransformerImpr): + def __init__(self, **kwargs): + super(pvt_v2_b3, self).__init__( + patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1) + +## @register_model +class pvt_v2_b4(PyramidVisionTransformerImpr): + def __init__(self, **kwargs): + super(pvt_v2_b4, self).__init__( + patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1) + + +## @register_model +class pvt_v2_b5(PyramidVisionTransformerImpr): + def __init__(self, **kwargs): + super(pvt_v2_b5, self).__init__( + patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1) + + + +### models/backbones/swin_v1.py + +# -------------------------------------------------------- +# Swin Transformer +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ze Liu, Yutong Lin, Yixuan Wei +# -------------------------------------------------------- + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint +import numpy as np +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ + +# from config import Config + + +# config = Config() + +class Mlp(nn.Module): + """ Multilayer perceptron.""" + + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class WindowAttention(nn.Module): + """ Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)) # 2*Wh-1 * 2*Ww-1, nH + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing='ij')) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop_prob = attn_drop + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + trunc_normal_(self.relative_position_bias_table, std=.02) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + """ Forward function. + + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + + if config.SDPA_enabled: + x = torch.nn.functional.scaled_dot_product_attention( + q, k, v, + attn_mask=None, dropout_p=self.attn_drop_prob, is_causal=False + ).transpose(1, 2).reshape(B_, N, C) + else: + attn = (q @ k.transpose(-2, -1)) + + relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view( + self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class SwinTransformerBlock(nn.Module): + """ Swin Transformer Block. + + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, dim, num_heads, window_size=7, shift_size=0, + mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0., + act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, window_size=to_2tuple(self.window_size), num_heads=num_heads, + qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) + + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + self.H = None + self.W = None + + def forward(self, x, mask_matrix): + """ Forward function. + + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + mask_matrix: Attention mask for cyclic shift. + """ + B, L, C = x.shape + H, W = self.H, self.W + assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.view(B, H, W, C) + + # pad feature maps to multiples of window size + pad_l = pad_t = 0 + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + _, Hp, Wp, _ = x.shape + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + attn_mask = mask_matrix + else: + shifted_x = x + attn_mask = None + + # partition windows + x_windows = window_partition(shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn(x_windows, mask=attn_mask) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) + else: + x = shifted_x + + if pad_r > 0 or pad_b > 0: + x = x[:, :H, :W, :].contiguous() + + x = x.view(B, H * W, C) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + +class PatchMerging(nn.Module): + """ Patch Merging Layer + + Args: + dim (int): Number of input channels. + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + def __init__(self, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x, H, W): + """ Forward function. + + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + """ + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + + x = x.view(B, H, W, C) + + # padding + pad_input = (H % 2 == 1) or (W % 2 == 1) + if pad_input: + x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2)) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + +class BasicLayer(nn.Module): + """ A basic Swin Transformer layer for one stage. + + Args: + dim (int): Number of feature channels + depth (int): Depths of this stage. + num_heads (int): Number of attention head. + window_size (int): Local window size. Default: 7. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__(self, + dim, + depth, + num_heads, + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False): + super().__init__() + self.window_size = window_size + self.shift_size = window_size // 2 + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList([ + SwinTransformerBlock( + dim=dim, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer) + for i in range(depth)]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x, H, W): + """ Forward function. + + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + """ + + # calculate attention mask for SW-MSA + Hp = int(np.ceil(H / self.window_size)) * self.window_size + Wp = int(np.ceil(W / self.window_size)) * self.window_size + img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + + for blk in self.blocks: + blk.H, blk.W = H, W + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x, attn_mask) + else: + x = blk(x, attn_mask) + if self.downsample is not None: + x_down = self.downsample(x, H, W) + Wh, Ww = (H + 1) // 2, (W + 1) // 2 + return x, H, W, x_down, Wh, Ww + else: + return x, H, W, x, H, W + + +class PatchEmbed(nn.Module): + """ Image to Patch Embedding + + Args: + patch_size (int): Patch token size. Default: 4. + in_channels (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__(self, patch_size=4, in_channels=3, embed_dim=96, norm_layer=None): + super().__init__() + patch_size = to_2tuple(patch_size) + self.patch_size = patch_size + + self.in_channels = in_channels + self.embed_dim = embed_dim + + self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size) + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + """Forward function.""" + # padding + _, _, H, W = x.size() + if W % self.patch_size[1] != 0: + x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1])) + if H % self.patch_size[0] != 0: + x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0])) + + x = self.proj(x) # B C Wh Ww + if self.norm is not None: + Wh, Ww = x.size(2), x.size(3) + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww) + + return x + + +class SwinTransformer(nn.Module): + """ Swin Transformer backbone. + A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` - + https://arxiv.org/pdf/2103.14030 + + Args: + pretrain_img_size (int): Input image size for training the pretrained model, + used in absolute postion embedding. Default 224. + patch_size (int | tuple(int)): Patch size. Default: 4. + in_channels (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + depths (tuple[int]): Depths of each Swin Transformer stage. + num_heads (tuple[int]): Number of attention head of each stage. + window_size (int): Window size. Default: 7. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4. + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + drop_rate (float): Dropout rate. + attn_drop_rate (float): Attention dropout rate. Default: 0. + drop_path_rate (float): Stochastic depth rate. Default: 0.2. + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False. + patch_norm (bool): If True, add normalization after patch embedding. Default: True. + out_indices (Sequence[int]): Output from which stages. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__(self, + pretrain_img_size=224, + patch_size=4, + in_channels=3, + embed_dim=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.2, + norm_layer=nn.LayerNorm, + ape=False, + patch_norm=True, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + use_checkpoint=False): + super().__init__() + + self.pretrain_img_size = pretrain_img_size + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.out_indices = out_indices + self.frozen_stages = frozen_stages + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + + # absolute position embedding + if self.ape: + pretrain_img_size = to_2tuple(pretrain_img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [pretrain_img_size[0] // patch_size[0], pretrain_img_size[1] // patch_size[1]] + + self.absolute_pos_embed = nn.Parameter(torch.zeros(1, embed_dim, patches_resolution[0], patches_resolution[1])) + trunc_normal_(self.absolute_pos_embed, std=.02) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule + + # build layers + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = BasicLayer( + dim=int(embed_dim * 2 ** i_layer), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], + norm_layer=norm_layer, + downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, + use_checkpoint=use_checkpoint) + self.layers.append(layer) + + num_features = [int(embed_dim * 2 ** i) for i in range(self.num_layers)] + self.num_features = num_features + + # add a norm layer for each output + for i_layer in out_indices: + layer = norm_layer(num_features[i_layer]) + layer_name = f'norm{i_layer}' + self.add_module(layer_name, layer) + + self._freeze_stages() + + def _freeze_stages(self): + if self.frozen_stages >= 0: + self.patch_embed.eval() + for param in self.patch_embed.parameters(): + param.requires_grad = False + + if self.frozen_stages >= 1 and self.ape: + self.absolute_pos_embed.requires_grad = False + + if self.frozen_stages >= 2: + self.pos_drop.eval() + for i in range(0, self.frozen_stages - 1): + m = self.layers[i] + m.eval() + for param in m.parameters(): + param.requires_grad = False + + + def forward(self, x): + """Forward function.""" + x = self.patch_embed(x) + + Wh, Ww = x.size(2), x.size(3) + if self.ape: + # interpolate the position embedding to the corresponding size + absolute_pos_embed = F.interpolate(self.absolute_pos_embed, size=(Wh, Ww), mode='bicubic') + x = (x + absolute_pos_embed) # B Wh*Ww C + + outs = []#x.contiguous()] + x = x.flatten(2).transpose(1, 2) + x = self.pos_drop(x) + for i in range(self.num_layers): + layer = self.layers[i] + x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww) + + if i in self.out_indices: + norm_layer = getattr(self, f'norm{i}') + x_out = norm_layer(x_out) + + out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous() + outs.append(out) + + return tuple(outs) + + def train(self, mode=True): + """Convert the model into training mode while keep layers freezed.""" + super(SwinTransformer, self).train(mode) + self._freeze_stages() + +def swin_v1_t(): + model = SwinTransformer(embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], window_size=7) + return model + +def swin_v1_s(): + model = SwinTransformer(embed_dim=96, depths=[2, 2, 18, 2], num_heads=[3, 6, 12, 24], window_size=7) + return model + +def swin_v1_b(): + model = SwinTransformer(embed_dim=128, depths=[2, 2, 18, 2], num_heads=[4, 8, 16, 32], window_size=12) + return model + +def swin_v1_l(): + model = SwinTransformer(embed_dim=192, depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48], window_size=12) + return model + + + +### models/modules/deform_conv.py + +import torch +import torch.nn as nn +from torchvision.ops import deform_conv2d + + +class DeformableConv2d(nn.Module): + def __init__(self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False): + + super(DeformableConv2d, self).__init__() + + assert type(kernel_size) == tuple or type(kernel_size) == int + + kernel_size = kernel_size if type(kernel_size) == tuple else (kernel_size, kernel_size) + self.stride = stride if type(stride) == tuple else (stride, stride) + self.padding = padding + + self.offset_conv = nn.Conv2d(in_channels, + 2 * kernel_size[0] * kernel_size[1], + kernel_size=kernel_size, + stride=stride, + padding=self.padding, + bias=True) + + nn.init.constant_(self.offset_conv.weight, 0.) + nn.init.constant_(self.offset_conv.bias, 0.) + + self.modulator_conv = nn.Conv2d(in_channels, + 1 * kernel_size[0] * kernel_size[1], + kernel_size=kernel_size, + stride=stride, + padding=self.padding, + bias=True) + + nn.init.constant_(self.modulator_conv.weight, 0.) + nn.init.constant_(self.modulator_conv.bias, 0.) + + self.regular_conv = nn.Conv2d(in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=self.padding, + bias=bias) + + def forward(self, x): + #h, w = x.shape[2:] + #max_offset = max(h, w)/4. + + offset = self.offset_conv(x)#.clamp(-max_offset, max_offset) + modulator = 2. * torch.sigmoid(self.modulator_conv(x)) + + x = deform_conv2d( + input=x, + offset=offset, + weight=self.regular_conv.weight, + bias=self.regular_conv.bias, + padding=self.padding, + mask=modulator, + stride=self.stride, + ) + return x + + + + +### utils.py + +import torch.nn as nn + + +def build_act_layer(act_layer): + if act_layer == 'ReLU': + return nn.ReLU(inplace=True) + elif act_layer == 'SiLU': + return nn.SiLU(inplace=True) + elif act_layer == 'GELU': + return nn.GELU() + + raise NotImplementedError(f'build_act_layer does not support {act_layer}') + + +def build_norm_layer(dim, + norm_layer, + in_format='channels_last', + out_format='channels_last', + eps=1e-6): + layers = [] + if norm_layer == 'BN': + if in_format == 'channels_last': + layers.append(to_channels_first()) + layers.append(nn.BatchNorm2d(dim)) + if out_format == 'channels_last': + layers.append(to_channels_last()) + elif norm_layer == 'LN': + if in_format == 'channels_first': + layers.append(to_channels_last()) + layers.append(nn.LayerNorm(dim, eps=eps)) + if out_format == 'channels_first': + layers.append(to_channels_first()) + else: + raise NotImplementedError( + f'build_norm_layer does not support {norm_layer}') + return nn.Sequential(*layers) + + +class to_channels_first(nn.Module): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x.permute(0, 3, 1, 2) + + +class to_channels_last(nn.Module): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x.permute(0, 2, 3, 1) + + + +### dataset.py + +_class_labels_TR_sorted = ( + 'Airplane, Ant, Antenna, Archery, Axe, BabyCarriage, Bag, BalanceBeam, Balcony, Balloon, Basket, BasketballHoop, Beatle, Bed, Bee, Bench, Bicycle, ' + 'BicycleFrame, BicycleStand, Boat, Bonsai, BoomLift, Bridge, BunkBed, Butterfly, Button, Cable, CableLift, Cage, Camcorder, Cannon, Canoe, Car, ' + 'CarParkDropArm, Carriage, Cart, Caterpillar, CeilingLamp, Centipede, Chair, Clip, Clock, Clothes, CoatHanger, Comb, ConcretePumpTruck, Crack, Crane, ' + 'Cup, DentalChair, Desk, DeskChair, Diagram, DishRack, DoorHandle, Dragonfish, Dragonfly, Drum, Earphone, Easel, ElectricIron, Excavator, Eyeglasses, ' + 'Fan, Fence, Fencing, FerrisWheel, FireExtinguisher, Fishing, Flag, FloorLamp, Forklift, GasStation, Gate, Gear, Goal, Golf, GymEquipment, Hammock, ' + 'Handcart, Handcraft, Handrail, HangGlider, Harp, Harvester, Headset, Helicopter, Helmet, Hook, HorizontalBar, Hydrovalve, IroningTable, Jewelry, Key, ' + 'KidsPlayground, Kitchenware, Kite, Knife, Ladder, LaundryRack, Lightning, Lobster, Locust, Machine, MachineGun, MagazineRack, Mantis, Medal, MemorialArchway, ' + 'Microphone, Missile, MobileHolder, Monitor, Mosquito, Motorcycle, MovingTrolley, Mower, MusicPlayer, MusicStand, ObservationTower, Octopus, OilWell, ' + 'OlympicLogo, OperatingTable, OutdoorFitnessEquipment, Parachute, Pavilion, Piano, Pipe, PlowHarrow, PoleVault, Punchbag, Rack, Racket, Rifle, Ring, Robot, ' + 'RockClimbing, Rope, Sailboat, Satellite, Scaffold, Scale, Scissor, Scooter, Sculpture, Seadragon, Seahorse, Seal, SewingMachine, Ship, Shoe, ShoppingCart, ' + 'ShoppingTrolley, Shower, Shrimp, Signboard, Skateboarding, Skeleton, Skiing, Spade, SpeedBoat, Spider, Spoon, Stair, Stand, Stationary, SteeringWheel, ' + 'Stethoscope, Stool, Stove, StreetLamp, SweetStand, Swing, Sword, TV, Table, TableChair, TableLamp, TableTennis, Tank, Tapeline, Teapot, Telescope, Tent, ' + 'TobaccoPipe, Toy, Tractor, TrafficLight, TrafficSign, Trampoline, TransmissionTower, Tree, Tricycle, TrimmerCover, Tripod, Trombone, Truck, Trumpet, Tuba, ' + 'UAV, Umbrella, UnevenBars, UtilityPole, VacuumCleaner, Violin, Wakesurfing, Watch, WaterTower, WateringPot, Well, WellLid, Wheel, Wheelchair, WindTurbine, Windmill, WineGlass, WireWhisk, Yacht' +) +class_labels_TR_sorted = _class_labels_TR_sorted.split(', ') + + +### models/backbones/build_backbones.py + +import torch +import torch.nn as nn +from collections import OrderedDict +from torchvision.models import vgg16, vgg16_bn, VGG16_Weights, VGG16_BN_Weights, resnet50, ResNet50_Weights +# from models.pvt_v2 import pvt_v2_b0, pvt_v2_b1, pvt_v2_b2, pvt_v2_b5 +# from models.swin_v1 import swin_v1_t, swin_v1_s, swin_v1_b, swin_v1_l +# from config import Config + + +config = Config() + +def build_backbone(bb_name, pretrained=True, params_settings=''): + if bb_name == 'vgg16': + bb_net = list(vgg16(pretrained=VGG16_Weights.DEFAULT if pretrained else None).children())[0] + bb = nn.Sequential(OrderedDict({'conv1': bb_net[:4], 'conv2': bb_net[4:9], 'conv3': bb_net[9:16], 'conv4': bb_net[16:23]})) + elif bb_name == 'vgg16bn': + bb_net = list(vgg16_bn(pretrained=VGG16_BN_Weights.DEFAULT if pretrained else None).children())[0] + bb = nn.Sequential(OrderedDict({'conv1': bb_net[:6], 'conv2': bb_net[6:13], 'conv3': bb_net[13:23], 'conv4': bb_net[23:33]})) + elif bb_name == 'resnet50': + bb_net = list(resnet50(pretrained=ResNet50_Weights.DEFAULT if pretrained else None).children()) + bb = nn.Sequential(OrderedDict({'conv1': nn.Sequential(*bb_net[0:3]), 'conv2': bb_net[4], 'conv3': bb_net[5], 'conv4': bb_net[6]})) + else: + bb = eval('{}({})'.format(bb_name, params_settings)) + if pretrained: + bb = load_weights(bb, bb_name) + return bb + +def load_weights(model, model_name): + save_model = torch.load(config.weights[model_name], map_location='cpu') + model_dict = model.state_dict() + state_dict = {k: v if v.size() == model_dict[k].size() else model_dict[k] for k, v in save_model.items() if k in model_dict.keys()} + # to ignore the weights with mismatched size when I modify the backbone itself. + if not state_dict: + save_model_keys = list(save_model.keys()) + sub_item = save_model_keys[0] if len(save_model_keys) == 1 else None + state_dict = {k: v if v.size() == model_dict[k].size() else model_dict[k] for k, v in save_model[sub_item].items() if k in model_dict.keys()} + if not state_dict or not sub_item: + print('Weights are not successully loaded. Check the state dict of weights file.') + return None + else: + print('Found correct weights in the "{}" item of loaded state_dict.'.format(sub_item)) + model_dict.update(state_dict) + model.load_state_dict(model_dict) + return model + + + +### models/modules/decoder_blocks.py + +import torch +import torch.nn as nn +# from models.aspp import ASPP, ASPPDeformable +# from config import Config + + +# config = Config() + + +class BasicDecBlk(nn.Module): + def __init__(self, in_channels=64, out_channels=64, inter_channels=64): + super(BasicDecBlk, self).__init__() + inter_channels = in_channels // 4 if config.dec_channels_inter == 'adap' else 64 + self.conv_in = nn.Conv2d(in_channels, inter_channels, 3, 1, padding=1) + self.relu_in = nn.ReLU(inplace=True) + if config.dec_att == 'ASPP': + self.dec_att = ASPP(in_channels=inter_channels) + elif config.dec_att == 'ASPPDeformable': + self.dec_att = ASPPDeformable(in_channels=inter_channels) + self.conv_out = nn.Conv2d(inter_channels, out_channels, 3, 1, padding=1) + self.bn_in = nn.BatchNorm2d(inter_channels) if config.batch_size > 1 else nn.Identity() + self.bn_out = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity() + + def forward(self, x): + x = self.conv_in(x) + x = self.bn_in(x) + x = self.relu_in(x) + if hasattr(self, 'dec_att'): + x = self.dec_att(x) + x = self.conv_out(x) + x = self.bn_out(x) + return x + + +class ResBlk(nn.Module): + def __init__(self, in_channels=64, out_channels=None, inter_channels=64): + super(ResBlk, self).__init__() + if out_channels is None: + out_channels = in_channels + inter_channels = in_channels // 4 if config.dec_channels_inter == 'adap' else 64 + + self.conv_in = nn.Conv2d(in_channels, inter_channels, 3, 1, padding=1) + self.bn_in = nn.BatchNorm2d(inter_channels) if config.batch_size > 1 else nn.Identity() + self.relu_in = nn.ReLU(inplace=True) + + if config.dec_att == 'ASPP': + self.dec_att = ASPP(in_channels=inter_channels) + elif config.dec_att == 'ASPPDeformable': + self.dec_att = ASPPDeformable(in_channels=inter_channels) + + self.conv_out = nn.Conv2d(inter_channels, out_channels, 3, 1, padding=1) + self.bn_out = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity() + + self.conv_resi = nn.Conv2d(in_channels, out_channels, 1, 1, 0) + + def forward(self, x): + _x = self.conv_resi(x) + x = self.conv_in(x) + x = self.bn_in(x) + x = self.relu_in(x) + if hasattr(self, 'dec_att'): + x = self.dec_att(x) + x = self.conv_out(x) + x = self.bn_out(x) + return x + _x + + + +### models/modules/lateral_blocks.py + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from functools import partial + +# from config import Config + + +# config = Config() + + +class BasicLatBlk(nn.Module): + def __init__(self, in_channels=64, out_channels=64, inter_channels=64): + super(BasicLatBlk, self).__init__() + inter_channels = in_channels // 4 if config.dec_channels_inter == 'adap' else 64 + self.conv = nn.Conv2d(in_channels, out_channels, 1, 1, 0) + + def forward(self, x): + x = self.conv(x) + return x + + + +### models/modules/aspp.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +# from models.deform_conv import DeformableConv2d +# from config import Config + + +# config = Config() + + +class _ASPPModule(nn.Module): + def __init__(self, in_channels, planes, kernel_size, padding, dilation): + super(_ASPPModule, self).__init__() + self.atrous_conv = nn.Conv2d(in_channels, planes, kernel_size=kernel_size, + stride=1, padding=padding, dilation=dilation, bias=False) + self.bn = nn.BatchNorm2d(planes) if config.batch_size > 1 else nn.Identity() + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.atrous_conv(x) + x = self.bn(x) + + return self.relu(x) + + +class ASPP(nn.Module): + def __init__(self, in_channels=64, out_channels=None, output_stride=16): + super(ASPP, self).__init__() + self.down_scale = 1 + if out_channels is None: + out_channels = in_channels + self.in_channelster = 256 // self.down_scale + if output_stride == 16: + dilations = [1, 6, 12, 18] + elif output_stride == 8: + dilations = [1, 12, 24, 36] + else: + raise NotImplementedError + + self.aspp1 = _ASPPModule(in_channels, self.in_channelster, 1, padding=0, dilation=dilations[0]) + self.aspp2 = _ASPPModule(in_channels, self.in_channelster, 3, padding=dilations[1], dilation=dilations[1]) + self.aspp3 = _ASPPModule(in_channels, self.in_channelster, 3, padding=dilations[2], dilation=dilations[2]) + self.aspp4 = _ASPPModule(in_channels, self.in_channelster, 3, padding=dilations[3], dilation=dilations[3]) + + self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), + nn.Conv2d(in_channels, self.in_channelster, 1, stride=1, bias=False), + nn.BatchNorm2d(self.in_channelster) if config.batch_size > 1 else nn.Identity(), + nn.ReLU(inplace=True)) + self.conv1 = nn.Conv2d(self.in_channelster * 5, out_channels, 1, bias=False) + self.bn1 = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity() + self.relu = nn.ReLU(inplace=True) + self.dropout = nn.Dropout(0.5) + + def forward(self, x): + x1 = self.aspp1(x) + x2 = self.aspp2(x) + x3 = self.aspp3(x) + x4 = self.aspp4(x) + x5 = self.global_avg_pool(x) + x5 = F.interpolate(x5, size=x1.size()[2:], mode='bilinear', align_corners=True) + x = torch.cat((x1, x2, x3, x4, x5), dim=1) + + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + return self.dropout(x) + + +##################### Deformable +class _ASPPModuleDeformable(nn.Module): + def __init__(self, in_channels, planes, kernel_size, padding): + super(_ASPPModuleDeformable, self).__init__() + self.atrous_conv = DeformableConv2d(in_channels, planes, kernel_size=kernel_size, + stride=1, padding=padding, bias=False) + self.bn = nn.BatchNorm2d(planes) if config.batch_size > 1 else nn.Identity() + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.atrous_conv(x) + x = self.bn(x) + + return self.relu(x) + + +class ASPPDeformable(nn.Module): + def __init__(self, in_channels, out_channels=None, parallel_block_sizes=[1, 3, 7]): + super(ASPPDeformable, self).__init__() + self.down_scale = 1 + if out_channels is None: + out_channels = in_channels + self.in_channelster = 256 // self.down_scale + + self.aspp1 = _ASPPModuleDeformable(in_channels, self.in_channelster, 1, padding=0) + self.aspp_deforms = nn.ModuleList([ + _ASPPModuleDeformable(in_channels, self.in_channelster, conv_size, padding=int(conv_size//2)) for conv_size in parallel_block_sizes + ]) + + self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), + nn.Conv2d(in_channels, self.in_channelster, 1, stride=1, bias=False), + nn.BatchNorm2d(self.in_channelster) if config.batch_size > 1 else nn.Identity(), + nn.ReLU(inplace=True)) + self.conv1 = nn.Conv2d(self.in_channelster * (2 + len(self.aspp_deforms)), out_channels, 1, bias=False) + self.bn1 = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity() + self.relu = nn.ReLU(inplace=True) + self.dropout = nn.Dropout(0.5) + + def forward(self, x): + x1 = self.aspp1(x) + x_aspp_deforms = [aspp_deform(x) for aspp_deform in self.aspp_deforms] + x5 = self.global_avg_pool(x) + x5 = F.interpolate(x5, size=x1.size()[2:], mode='bilinear', align_corners=True) + x = torch.cat((x1, *x_aspp_deforms, x5), dim=1) + + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + return self.dropout(x) + + + +### models/refinement/refiner.py + +import torch +import torch.nn as nn +from collections import OrderedDict +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchvision.models import vgg16, vgg16_bn +from torchvision.models import resnet50 + +# from config import Config +# from dataset import class_labels_TR_sorted +# from models.build_backbone import build_backbone +# from models.decoder_blocks import BasicDecBlk +# from models.lateral_blocks import BasicLatBlk +# from models.ing import * +# from models.stem_layer import StemLayer + + +class RefinerPVTInChannels4(nn.Module): + def __init__(self, in_channels=3+1): + super(RefinerPVTInChannels4, self).__init__() + self.config = Config() + self.epoch = 1 + self.bb = build_backbone(self.config.bb, params_settings='in_channels=4') + + lateral_channels_in_collection = { + 'vgg16': [512, 256, 128, 64], 'vgg16bn': [512, 256, 128, 64], 'resnet50': [1024, 512, 256, 64], + 'pvt_v2_b2': [512, 320, 128, 64], 'pvt_v2_b5': [512, 320, 128, 64], + 'swin_v1_b': [1024, 512, 256, 128], 'swin_v1_l': [1536, 768, 384, 192], + } + channels = lateral_channels_in_collection[self.config.bb] + self.squeeze_module = BasicDecBlk(channels[0], channels[0]) + + self.decoder = Decoder(channels) + + if 0: + for key, value in self.named_parameters(): + if 'bb.' in key: + value.requires_grad = False + + def forward(self, x): + if isinstance(x, list): + x = torch.cat(x, dim=1) + ########## Encoder ########## + if self.config.bb in ['vgg16', 'vgg16bn', 'resnet50']: + x1 = self.bb.conv1(x) + x2 = self.bb.conv2(x1) + x3 = self.bb.conv3(x2) + x4 = self.bb.conv4(x3) + else: + x1, x2, x3, x4 = self.bb(x) + + x4 = self.squeeze_module(x4) + + ########## Decoder ########## + + features = [x, x1, x2, x3, x4] + scaled_preds = self.decoder(features) + + return scaled_preds + + +class Refiner(nn.Module): + def __init__(self, in_channels=3+1): + super(Refiner, self).__init__() + self.config = Config() + self.epoch = 1 + self.stem_layer = StemLayer(in_channels=in_channels, inter_channels=48, out_channels=3, norm_layer='BN' if self.config.batch_size > 1 else 'LN') + self.bb = build_backbone(self.config.bb) + + lateral_channels_in_collection = { + 'vgg16': [512, 256, 128, 64], 'vgg16bn': [512, 256, 128, 64], 'resnet50': [1024, 512, 256, 64], + 'pvt_v2_b2': [512, 320, 128, 64], 'pvt_v2_b5': [512, 320, 128, 64], + 'swin_v1_b': [1024, 512, 256, 128], 'swin_v1_l': [1536, 768, 384, 192], + } + channels = lateral_channels_in_collection[self.config.bb] + self.squeeze_module = BasicDecBlk(channels[0], channels[0]) + + self.decoder = Decoder(channels) + + if 0: + for key, value in self.named_parameters(): + if 'bb.' in key: + value.requires_grad = False + + def forward(self, x): + if isinstance(x, list): + x = torch.cat(x, dim=1) + x = self.stem_layer(x) + ########## Encoder ########## + if self.config.bb in ['vgg16', 'vgg16bn', 'resnet50']: + x1 = self.bb.conv1(x) + x2 = self.bb.conv2(x1) + x3 = self.bb.conv3(x2) + x4 = self.bb.conv4(x3) + else: + x1, x2, x3, x4 = self.bb(x) + + x4 = self.squeeze_module(x4) + + ########## Decoder ########## + + features = [x, x1, x2, x3, x4] + scaled_preds = self.decoder(features) + + return scaled_preds + + +class Decoder(nn.Module): + def __init__(self, channels): + super(Decoder, self).__init__() + self.config = Config() + DecoderBlock = eval('BasicDecBlk') + LateralBlock = eval('BasicLatBlk') + + self.decoder_block4 = DecoderBlock(channels[0], channels[1]) + self.decoder_block3 = DecoderBlock(channels[1], channels[2]) + self.decoder_block2 = DecoderBlock(channels[2], channels[3]) + self.decoder_block1 = DecoderBlock(channels[3], channels[3]//2) + + self.lateral_block4 = LateralBlock(channels[1], channels[1]) + self.lateral_block3 = LateralBlock(channels[2], channels[2]) + self.lateral_block2 = LateralBlock(channels[3], channels[3]) + + if self.config.ms_supervision: + self.conv_ms_spvn_4 = nn.Conv2d(channels[1], 1, 1, 1, 0) + self.conv_ms_spvn_3 = nn.Conv2d(channels[2], 1, 1, 1, 0) + self.conv_ms_spvn_2 = nn.Conv2d(channels[3], 1, 1, 1, 0) + self.conv_out1 = nn.Sequential(nn.Conv2d(channels[3]//2, 1, 1, 1, 0)) + + def forward(self, features): + x, x1, x2, x3, x4 = features + outs = [] + p4 = self.decoder_block4(x4) + _p4 = F.interpolate(p4, size=x3.shape[2:], mode='bilinear', align_corners=True) + _p3 = _p4 + self.lateral_block4(x3) + + p3 = self.decoder_block3(_p3) + _p3 = F.interpolate(p3, size=x2.shape[2:], mode='bilinear', align_corners=True) + _p2 = _p3 + self.lateral_block3(x2) + + p2 = self.decoder_block2(_p2) + _p2 = F.interpolate(p2, size=x1.shape[2:], mode='bilinear', align_corners=True) + _p1 = _p2 + self.lateral_block2(x1) + + _p1 = self.decoder_block1(_p1) + _p1 = F.interpolate(_p1, size=x.shape[2:], mode='bilinear', align_corners=True) + p1_out = self.conv_out1(_p1) + + if self.config.ms_supervision: + outs.append(self.conv_ms_spvn_4(p4)) + outs.append(self.conv_ms_spvn_3(p3)) + outs.append(self.conv_ms_spvn_2(p2)) + outs.append(p1_out) + return outs + + +class RefUNet(nn.Module): + # Refinement + def __init__(self, in_channels=3+1): + super(RefUNet, self).__init__() + self.encoder_1 = nn.Sequential( + nn.Conv2d(in_channels, 64, 3, 1, 1), + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.encoder_2 = nn.Sequential( + nn.MaxPool2d(2, 2, ceil_mode=True), + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.encoder_3 = nn.Sequential( + nn.MaxPool2d(2, 2, ceil_mode=True), + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.encoder_4 = nn.Sequential( + nn.MaxPool2d(2, 2, ceil_mode=True), + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.pool4 = nn.MaxPool2d(2, 2, ceil_mode=True) + ##### + self.decoder_5 = nn.Sequential( + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + ##### + self.decoder_4 = nn.Sequential( + nn.Conv2d(128, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.decoder_3 = nn.Sequential( + nn.Conv2d(128, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.decoder_2 = nn.Sequential( + nn.Conv2d(128, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.decoder_1 = nn.Sequential( + nn.Conv2d(128, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.conv_d0 = nn.Conv2d(64, 1, 3, 1, 1) + + self.upscore2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) + + def forward(self, x): + outs = [] + if isinstance(x, list): + x = torch.cat(x, dim=1) + hx = x + + hx1 = self.encoder_1(hx) + hx2 = self.encoder_2(hx1) + hx3 = self.encoder_3(hx2) + hx4 = self.encoder_4(hx3) + + hx = self.decoder_5(self.pool4(hx4)) + hx = torch.cat((self.upscore2(hx), hx4), 1) + + d4 = self.decoder_4(hx) + hx = torch.cat((self.upscore2(d4), hx3), 1) + + d3 = self.decoder_3(hx) + hx = torch.cat((self.upscore2(d3), hx2), 1) + + d2 = self.decoder_2(hx) + hx = torch.cat((self.upscore2(d2), hx1), 1) + + d1 = self.decoder_1(hx) + + x = self.conv_d0(d1) + outs.append(x) + return outs + + + +### models/stem_layer.py + +import torch.nn as nn +# from utils import build_act_layer, build_norm_layer + + +class StemLayer(nn.Module): + r""" Stem layer of InternImage + Args: + in_channels (int): number of input channels + out_channels (int): number of output channels + act_layer (str): activation layer + norm_layer (str): normalization layer + """ + + def __init__(self, + in_channels=3+1, + inter_channels=48, + out_channels=96, + act_layer='GELU', + norm_layer='BN'): + super().__init__() + self.conv1 = nn.Conv2d(in_channels, + inter_channels, + kernel_size=3, + stride=1, + padding=1) + self.norm1 = build_norm_layer( + inter_channels, norm_layer, 'channels_first', 'channels_first' + ) + self.act = build_act_layer(act_layer) + self.conv2 = nn.Conv2d(inter_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1) + self.norm2 = build_norm_layer( + out_channels, norm_layer, 'channels_first', 'channels_first' + ) + + def forward(self, x): + x = self.conv1(x) + x = self.norm1(x) + x = self.act(x) + x = self.conv2(x) + x = self.norm2(x) + return x + + +### models/birefnet.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +from kornia.filters import laplacian +from transformers import PreTrainedModel + +# from config import Config +# from dataset import class_labels_TR_sorted +# from models.build_backbone import build_backbone +# from models.decoder_blocks import BasicDecBlk, ResBlk, HierarAttDecBlk +# from models.lateral_blocks import BasicLatBlk +# from models.aspp import ASPP, ASPPDeformable +# from models.ing import * +# from models.refiner import Refiner, RefinerPVTInChannels4, RefUNet +# from models.stem_layer import StemLayer +from .BiRefNet_config import BiRefNetConfig + + +class BiRefNet( + PreTrainedModel +): + config_class = BiRefNetConfig + def __init__(self, bb_pretrained=True, config=BiRefNetConfig()): + super(BiRefNet, self).__init__(config) + bb_pretrained = config.bb_pretrained + self.config = Config() + self.epoch = 1 + self.bb = build_backbone(self.config.bb, pretrained=bb_pretrained) + + channels = self.config.lateral_channels_in_collection + + if self.config.auxiliary_classification: + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.cls_head = nn.Sequential( + nn.Linear(channels[0], len(class_labels_TR_sorted)) + ) + + if self.config.squeeze_block: + self.squeeze_module = nn.Sequential(*[ + eval(self.config.squeeze_block.split('_x')[0])(channels[0]+sum(self.config.cxt), channels[0]) + for _ in range(eval(self.config.squeeze_block.split('_x')[1])) + ]) + + self.decoder = Decoder(channels) + + if self.config.ender: + self.dec_end = nn.Sequential( + nn.Conv2d(1, 16, 3, 1, 1), + nn.Conv2d(16, 1, 3, 1, 1), + nn.ReLU(inplace=True), + ) + + # refine patch-level segmentation + if self.config.refine: + if self.config.refine == 'itself': + self.stem_layer = StemLayer(in_channels=3+1, inter_channels=48, out_channels=3, norm_layer='BN' if self.config.batch_size > 1 else 'LN') + else: + self.refiner = eval('{}({})'.format(self.config.refine, 'in_channels=3+1')) + + if self.config.freeze_bb: + # Freeze the backbone... + print(self.named_parameters()) + for key, value in self.named_parameters(): + if 'bb.' in key and 'refiner.' not in key: + value.requires_grad = False + + def forward_enc(self, x): + if self.config.bb in ['vgg16', 'vgg16bn', 'resnet50']: + x1 = self.bb.conv1(x); x2 = self.bb.conv2(x1); x3 = self.bb.conv3(x2); x4 = self.bb.conv4(x3) + else: + x1, x2, x3, x4 = self.bb(x) + if self.config.mul_scl_ipt == 'cat': + B, C, H, W = x.shape + x1_, x2_, x3_, x4_ = self.bb(F.interpolate(x, size=(H//2, W//2), mode='bilinear', align_corners=True)) + x1 = torch.cat([x1, F.interpolate(x1_, size=x1.shape[2:], mode='bilinear', align_corners=True)], dim=1) + x2 = torch.cat([x2, F.interpolate(x2_, size=x2.shape[2:], mode='bilinear', align_corners=True)], dim=1) + x3 = torch.cat([x3, F.interpolate(x3_, size=x3.shape[2:], mode='bilinear', align_corners=True)], dim=1) + x4 = torch.cat([x4, F.interpolate(x4_, size=x4.shape[2:], mode='bilinear', align_corners=True)], dim=1) + elif self.config.mul_scl_ipt == 'add': + B, C, H, W = x.shape + x1_, x2_, x3_, x4_ = self.bb(F.interpolate(x, size=(H//2, W//2), mode='bilinear', align_corners=True)) + x1 = x1 + F.interpolate(x1_, size=x1.shape[2:], mode='bilinear', align_corners=True) + x2 = x2 + F.interpolate(x2_, size=x2.shape[2:], mode='bilinear', align_corners=True) + x3 = x3 + F.interpolate(x3_, size=x3.shape[2:], mode='bilinear', align_corners=True) + x4 = x4 + F.interpolate(x4_, size=x4.shape[2:], mode='bilinear', align_corners=True) + class_preds = self.cls_head(self.avgpool(x4).view(x4.shape[0], -1)) if self.training and self.config.auxiliary_classification else None + if self.config.cxt: + x4 = torch.cat( + ( + *[ + F.interpolate(x1, size=x4.shape[2:], mode='bilinear', align_corners=True), + F.interpolate(x2, size=x4.shape[2:], mode='bilinear', align_corners=True), + F.interpolate(x3, size=x4.shape[2:], mode='bilinear', align_corners=True), + ][-len(self.config.cxt):], + x4 + ), + dim=1 + ) + return (x1, x2, x3, x4), class_preds + + def forward_ori(self, x): + ########## Encoder ########## + (x1, x2, x3, x4), class_preds = self.forward_enc(x) + if self.config.squeeze_block: + x4 = self.squeeze_module(x4) + ########## Decoder ########## + features = [x, x1, x2, x3, x4] + if self.training and self.config.out_ref: + features.append(laplacian(torch.mean(x, dim=1).unsqueeze(1), kernel_size=5)) + scaled_preds = self.decoder(features) + return scaled_preds, class_preds + + def forward(self, x): + scaled_preds, class_preds = self.forward_ori(x) + class_preds_lst = [class_preds] + return [scaled_preds, class_preds_lst] if self.training else scaled_preds + + +class Decoder(nn.Module): + def __init__(self, channels): + super(Decoder, self).__init__() + self.config = Config() + DecoderBlock = eval(self.config.dec_blk) + LateralBlock = eval(self.config.lat_blk) + + if self.config.dec_ipt: + self.split = self.config.dec_ipt_split + N_dec_ipt = 64 + DBlock = SimpleConvs + ic = 64 + ipt_cha_opt = 1 + self.ipt_blk5 = DBlock(2**10*3 if self.split else 3, [N_dec_ipt, channels[0]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk4 = DBlock(2**8*3 if self.split else 3, [N_dec_ipt, channels[0]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk3 = DBlock(2**6*3 if self.split else 3, [N_dec_ipt, channels[1]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk2 = DBlock(2**4*3 if self.split else 3, [N_dec_ipt, channels[2]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk1 = DBlock(2**0*3 if self.split else 3, [N_dec_ipt, channels[3]//8][ipt_cha_opt], inter_channels=ic) + else: + self.split = None + + self.decoder_block4 = DecoderBlock(channels[0]+([N_dec_ipt, channels[0]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[1]) + self.decoder_block3 = DecoderBlock(channels[1]+([N_dec_ipt, channels[0]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[2]) + self.decoder_block2 = DecoderBlock(channels[2]+([N_dec_ipt, channels[1]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[3]) + self.decoder_block1 = DecoderBlock(channels[3]+([N_dec_ipt, channels[2]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[3]//2) + self.conv_out1 = nn.Sequential(nn.Conv2d(channels[3]//2+([N_dec_ipt, channels[3]//8][ipt_cha_opt] if self.config.dec_ipt else 0), 1, 1, 1, 0)) + + self.lateral_block4 = LateralBlock(channels[1], channels[1]) + self.lateral_block3 = LateralBlock(channels[2], channels[2]) + self.lateral_block2 = LateralBlock(channels[3], channels[3]) + + if self.config.ms_supervision: + self.conv_ms_spvn_4 = nn.Conv2d(channels[1], 1, 1, 1, 0) + self.conv_ms_spvn_3 = nn.Conv2d(channels[2], 1, 1, 1, 0) + self.conv_ms_spvn_2 = nn.Conv2d(channels[3], 1, 1, 1, 0) + + if self.config.out_ref: + _N = 16 + self.gdt_convs_4 = nn.Sequential(nn.Conv2d(channels[1], _N, 3, 1, 1), nn.BatchNorm2d(_N) if self.config.batch_size > 1 else nn.Identity(), nn.ReLU(inplace=True)) + self.gdt_convs_3 = nn.Sequential(nn.Conv2d(channels[2], _N, 3, 1, 1), nn.BatchNorm2d(_N) if self.config.batch_size > 1 else nn.Identity(), nn.ReLU(inplace=True)) + self.gdt_convs_2 = nn.Sequential(nn.Conv2d(channels[3], _N, 3, 1, 1), nn.BatchNorm2d(_N) if self.config.batch_size > 1 else nn.Identity(), nn.ReLU(inplace=True)) + + self.gdt_convs_pred_4 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + self.gdt_convs_pred_3 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + self.gdt_convs_pred_2 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + + self.gdt_convs_attn_4 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + self.gdt_convs_attn_3 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + self.gdt_convs_attn_2 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + + def get_patches_batch(self, x, p): + _size_h, _size_w = p.shape[2:] + patches_batch = [] + for idx in range(x.shape[0]): + columns_x = torch.split(x[idx], split_size_or_sections=_size_w, dim=-1) + patches_x = [] + for column_x in columns_x: + patches_x += [p.unsqueeze(0) for p in torch.split(column_x, split_size_or_sections=_size_h, dim=-2)] + patch_sample = torch.cat(patches_x, dim=1) + patches_batch.append(patch_sample) + return torch.cat(patches_batch, dim=0) + + def forward(self, features): + if self.training and self.config.out_ref: + outs_gdt_pred = [] + outs_gdt_label = [] + x, x1, x2, x3, x4, gdt_gt = features + else: + x, x1, x2, x3, x4 = features + outs = [] + + if self.config.dec_ipt: + patches_batch = self.get_patches_batch(x, x4) if self.split else x + x4 = torch.cat((x4, self.ipt_blk5(F.interpolate(patches_batch, size=x4.shape[2:], mode='bilinear', align_corners=True))), 1) + p4 = self.decoder_block4(x4) + m4 = self.conv_ms_spvn_4(p4) if self.config.ms_supervision else None + if self.config.out_ref: + p4_gdt = self.gdt_convs_4(p4) + if self.training: + # >> GT: + m4_dia = m4 + gdt_label_main_4 = gdt_gt * F.interpolate(m4_dia, size=gdt_gt.shape[2:], mode='bilinear', align_corners=True) + outs_gdt_label.append(gdt_label_main_4) + # >> Pred: + gdt_pred_4 = self.gdt_convs_pred_4(p4_gdt) + outs_gdt_pred.append(gdt_pred_4) + gdt_attn_4 = self.gdt_convs_attn_4(p4_gdt).sigmoid() + # >> Finally: + p4 = p4 * gdt_attn_4 + _p4 = F.interpolate(p4, size=x3.shape[2:], mode='bilinear', align_corners=True) + _p3 = _p4 + self.lateral_block4(x3) + + if self.config.dec_ipt: + patches_batch = self.get_patches_batch(x, _p3) if self.split else x + _p3 = torch.cat((_p3, self.ipt_blk4(F.interpolate(patches_batch, size=x3.shape[2:], mode='bilinear', align_corners=True))), 1) + p3 = self.decoder_block3(_p3) + m3 = self.conv_ms_spvn_3(p3) if self.config.ms_supervision else None + if self.config.out_ref: + p3_gdt = self.gdt_convs_3(p3) + if self.training: + # >> GT: + # m3 --dilation--> m3_dia + # G_3^gt * m3_dia --> G_3^m, which is the label of gradient + m3_dia = m3 + gdt_label_main_3 = gdt_gt * F.interpolate(m3_dia, size=gdt_gt.shape[2:], mode='bilinear', align_corners=True) + outs_gdt_label.append(gdt_label_main_3) + # >> Pred: + # p3 --conv--BN--> F_3^G, where F_3^G predicts the \hat{G_3} with xx + # F_3^G --sigmoid--> A_3^G + gdt_pred_3 = self.gdt_convs_pred_3(p3_gdt) + outs_gdt_pred.append(gdt_pred_3) + gdt_attn_3 = self.gdt_convs_attn_3(p3_gdt).sigmoid() + # >> Finally: + # p3 = p3 * A_3^G + p3 = p3 * gdt_attn_3 + _p3 = F.interpolate(p3, size=x2.shape[2:], mode='bilinear', align_corners=True) + _p2 = _p3 + self.lateral_block3(x2) + + if self.config.dec_ipt: + patches_batch = self.get_patches_batch(x, _p2) if self.split else x + _p2 = torch.cat((_p2, self.ipt_blk3(F.interpolate(patches_batch, size=x2.shape[2:], mode='bilinear', align_corners=True))), 1) + p2 = self.decoder_block2(_p2) + m2 = self.conv_ms_spvn_2(p2) if self.config.ms_supervision else None + if self.config.out_ref: + p2_gdt = self.gdt_convs_2(p2) + if self.training: + # >> GT: + m2_dia = m2 + gdt_label_main_2 = gdt_gt * F.interpolate(m2_dia, size=gdt_gt.shape[2:], mode='bilinear', align_corners=True) + outs_gdt_label.append(gdt_label_main_2) + # >> Pred: + gdt_pred_2 = self.gdt_convs_pred_2(p2_gdt) + outs_gdt_pred.append(gdt_pred_2) + gdt_attn_2 = self.gdt_convs_attn_2(p2_gdt).sigmoid() + # >> Finally: + p2 = p2 * gdt_attn_2 + _p2 = F.interpolate(p2, size=x1.shape[2:], mode='bilinear', align_corners=True) + _p1 = _p2 + self.lateral_block2(x1) + + if self.config.dec_ipt: + patches_batch = self.get_patches_batch(x, _p1) if self.split else x + _p1 = torch.cat((_p1, self.ipt_blk2(F.interpolate(patches_batch, size=x1.shape[2:], mode='bilinear', align_corners=True))), 1) + _p1 = self.decoder_block1(_p1) + _p1 = F.interpolate(_p1, size=x.shape[2:], mode='bilinear', align_corners=True) + + if self.config.dec_ipt: + patches_batch = self.get_patches_batch(x, _p1) if self.split else x + _p1 = torch.cat((_p1, self.ipt_blk1(F.interpolate(patches_batch, size=x.shape[2:], mode='bilinear', align_corners=True))), 1) + p1_out = self.conv_out1(_p1) + + if self.config.ms_supervision: + outs.append(m4) + outs.append(m3) + outs.append(m2) + outs.append(p1_out) + return outs if not (self.config.out_ref and self.training) else ([outs_gdt_pred, outs_gdt_label], outs) + + +class SimpleConvs(nn.Module): + def __init__( + self, in_channels: int, out_channels: int, inter_channels=64 + ) -> None: + super().__init__() + self.conv1 = nn.Conv2d(in_channels, inter_channels, 3, 1, 1) + self.conv_out = nn.Conv2d(inter_channels, out_channels, 3, 1, 1) + + def forward(self, x): + return self.conv_out(self.conv1(x)) diff --git a/BiRefNet/RMBG-2.0/collage5.png b/BiRefNet/RMBG-2.0/collage5.png new file mode 100644 index 0000000000000000000000000000000000000000..d7da7e162727a536e2d6456a3692f401d892f049 --- /dev/null +++ b/BiRefNet/RMBG-2.0/collage5.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f802564aa1e3a7c90762c7e65b77007f081cb179cdd9b42607bad3b1fdaf16 +size 4515604 diff --git a/BiRefNet/RMBG-2.0/config.json b/BiRefNet/RMBG-2.0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..85f92b5194dcf44bc1d59096b26389c6a386b22b --- /dev/null +++ b/BiRefNet/RMBG-2.0/config.json @@ -0,0 +1,21 @@ +{ + "_name_or_path": "ZhengPeng7/BiRefNet", + "architectures": [ + "BiRefNet" + ], + "auto_map": { + "AutoConfig": "BiRefNet_config.BiRefNetConfig", + "AutoModelForImageSegmentation": "birefnet.BiRefNet" + }, + "custom_pipelines": { + "image-segmentation": { + "pt": [ + "AutoModelForImageSegmentation" + ], + "tf": [], + "type": "image" + } + }, + "bb_pretrained": false, + "model_type": "birefnet" +} \ No newline at end of file diff --git a/BiRefNet/RMBG-2.0/diagram1.png b/BiRefNet/RMBG-2.0/diagram1.png new file mode 100644 index 0000000000000000000000000000000000000000..0a120d09a7fcbf76cf4812bbf86adc9ff94dd438 Binary files /dev/null and b/BiRefNet/RMBG-2.0/diagram1.png differ diff --git a/BiRefNet/RMBG-2.0/onnx/model_bnb4.onnx b/BiRefNet/RMBG-2.0/onnx/model_bnb4.onnx new file mode 100644 index 0000000000000000000000000000000000000000..88329fa9206f961528a211269893146901f83a7b --- /dev/null +++ b/BiRefNet/RMBG-2.0/onnx/model_bnb4.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadc9222fbffa53a348efea52d97475350ecee463a4a46f452e6e6b7b8757d25 +size 355288046 diff --git a/BiRefNet/RMBG-2.0/onnx/model_int8.onnx b/BiRefNet/RMBG-2.0/onnx/model_int8.onnx new file mode 100644 index 0000000000000000000000000000000000000000..81ac9c9b83b6f4a1f4b8a57e253f9680156e88dc --- /dev/null +++ b/BiRefNet/RMBG-2.0/onnx/model_int8.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8ee7690d8c5e7fc45d7b4938ac2fe4eab63fdeddd537673cda2d4c6e74809af +size 366087445 diff --git a/BiRefNet/RMBG-2.0/onnx/model_q4.onnx b/BiRefNet/RMBG-2.0/onnx/model_q4.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cd4c61fbcc7bb391d515f65a2170cf824d396ac7 --- /dev/null +++ b/BiRefNet/RMBG-2.0/onnx/model_q4.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a813e0eab56c982b71254214f41fa860cc7b565a6f2aab55d1f99f41c646ece1 +size 367451512 diff --git a/BiRefNet/RMBG-2.0/onnx/model_q4f16.onnx b/BiRefNet/RMBG-2.0/onnx/model_q4f16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9180f8adbb4b2b6c1550ea4658334b091ceb339b --- /dev/null +++ b/BiRefNet/RMBG-2.0/onnx/model_q4f16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bfeb5f93220eb19f6747c217b62cf04342840c4e973f55bf64e9762919f446d +size 233815293 diff --git a/BiRefNet/RMBG-2.0/onnx/model_quantized.onnx b/BiRefNet/RMBG-2.0/onnx/model_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..14d566a4bed9042d2373debfdd8f68abdfe23e3d --- /dev/null +++ b/BiRefNet/RMBG-2.0/onnx/model_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcea23951a378f92634834888896cc1eec54655366ae6e949282646ce17c5420 +size 366087549 diff --git a/BiRefNet/RMBG-2.0/onnx/model_uint8.onnx b/BiRefNet/RMBG-2.0/onnx/model_uint8.onnx new file mode 100644 index 0000000000000000000000000000000000000000..14d566a4bed9042d2373debfdd8f68abdfe23e3d --- /dev/null +++ b/BiRefNet/RMBG-2.0/onnx/model_uint8.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcea23951a378f92634834888896cc1eec54655366ae6e949282646ce17c5420 +size 366087549 diff --git a/BiRefNet/RMBG-2.0/preprocessor_config.json b/BiRefNet/RMBG-2.0/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..825398cfd94a348babce456f2bcc8422c9bebb93 --- /dev/null +++ b/BiRefNet/RMBG-2.0/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "feature_extractor_type": "ViTFeatureExtractor", + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "ViTFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 1024, + "width": 1024 + } +} \ No newline at end of file diff --git a/BiRefNet/RMBG-2.0/t4.png b/BiRefNet/RMBG-2.0/t4.png new file mode 100644 index 0000000000000000000000000000000000000000..b5ce68a4bf42e4c39251d1d23a82d0e5f2dd1c7e --- /dev/null +++ b/BiRefNet/RMBG-2.0/t4.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a9453f567d9bff7fe4481205575bbf302499379047ee6073247315452ba8fb +size 2159885 diff --git a/RMBG/RMBG-2.0/BiRefNet_config.py b/RMBG/RMBG-2.0/BiRefNet_config.py new file mode 100644 index 0000000000000000000000000000000000000000..37c8ac58bec2f52dac34204978a7b61b69e3da76 --- /dev/null +++ b/RMBG/RMBG-2.0/BiRefNet_config.py @@ -0,0 +1,11 @@ +from transformers import PretrainedConfig + +class BiRefNetConfig(PretrainedConfig): + model_type = "SegformerForSemanticSegmentation" + def __init__( + self, + bb_pretrained=False, + **kwargs + ): + self.bb_pretrained = bb_pretrained + super().__init__(**kwargs) diff --git a/RMBG/RMBG-2.0/birefnet.py b/RMBG/RMBG-2.0/birefnet.py new file mode 100644 index 0000000000000000000000000000000000000000..1ed28de11ce84e5f6b2dbf2a0dbb66412bc28513 --- /dev/null +++ b/RMBG/RMBG-2.0/birefnet.py @@ -0,0 +1,2244 @@ +### config.py + +import os +import math + + +class Config(): + def __init__(self) -> None: + # PATH settings + self.sys_home_dir = os.path.expanduser('~') # Make up your file system as: SYS_HOME_DIR/codes/dis/BiRefNet, SYS_HOME_DIR/datasets/dis/xx, SYS_HOME_DIR/weights/xx + + # TASK settings + self.task = ['DIS5K', 'COD', 'HRSOD', 'DIS5K+HRSOD+HRS10K', 'P3M-10k'][0] + self.training_set = { + 'DIS5K': ['DIS-TR', 'DIS-TR+DIS-TE1+DIS-TE2+DIS-TE3+DIS-TE4'][0], + 'COD': 'TR-COD10K+TR-CAMO', + 'HRSOD': ['TR-DUTS', 'TR-HRSOD', 'TR-UHRSD', 'TR-DUTS+TR-HRSOD', 'TR-DUTS+TR-UHRSD', 'TR-HRSOD+TR-UHRSD', 'TR-DUTS+TR-HRSOD+TR-UHRSD'][5], + 'DIS5K+HRSOD+HRS10K': 'DIS-TE1+DIS-TE2+DIS-TE3+DIS-TE4+DIS-TR+TE-HRS10K+TE-HRSOD+TE-UHRSD+TR-HRS10K+TR-HRSOD+TR-UHRSD', # leave DIS-VD for evaluation. + 'P3M-10k': 'TR-P3M-10k', + }[self.task] + self.prompt4loc = ['dense', 'sparse'][0] + + # Faster-Training settings + self.load_all = True + self.compile = True # 1. Trigger CPU memory leak in some extend, which is an inherent problem of PyTorch. + # Machines with > 70GB CPU memory can run the whole training on DIS5K with default setting. + # 2. Higher PyTorch version may fix it: https://github.com/pytorch/pytorch/issues/119607. + # 3. But compile in Pytorch > 2.0.1 seems to bring no acceleration for training. + self.precisionHigh = True + + # MODEL settings + self.ms_supervision = True + self.out_ref = self.ms_supervision and True + self.dec_ipt = True + self.dec_ipt_split = True + self.cxt_num = [0, 3][1] # multi-scale skip connections from encoder + self.mul_scl_ipt = ['', 'add', 'cat'][2] + self.dec_att = ['', 'ASPP', 'ASPPDeformable'][2] + self.squeeze_block = ['', 'BasicDecBlk_x1', 'ResBlk_x4', 'ASPP_x3', 'ASPPDeformable_x3'][1] + self.dec_blk = ['BasicDecBlk', 'ResBlk', 'HierarAttDecBlk'][0] + + # TRAINING settings + self.batch_size = 4 + self.IoU_finetune_last_epochs = [ + 0, + { + 'DIS5K': -50, + 'COD': -20, + 'HRSOD': -20, + 'DIS5K+HRSOD+HRS10K': -20, + 'P3M-10k': -20, + }[self.task] + ][1] # choose 0 to skip + self.lr = (1e-4 if 'DIS5K' in self.task else 1e-5) * math.sqrt(self.batch_size / 4) # DIS needs high lr to converge faster. Adapt the lr linearly + self.size = 1024 + self.num_workers = max(4, self.batch_size) # will be decrease to min(it, batch_size) at the initialization of the data_loader + + # Backbone settings + self.bb = [ + 'vgg16', 'vgg16bn', 'resnet50', # 0, 1, 2 + 'swin_v1_t', 'swin_v1_s', # 3, 4 + 'swin_v1_b', 'swin_v1_l', # 5-bs9, 6-bs4 + 'pvt_v2_b0', 'pvt_v2_b1', # 7, 8 + 'pvt_v2_b2', 'pvt_v2_b5', # 9-bs10, 10-bs5 + ][6] + self.lateral_channels_in_collection = { + 'vgg16': [512, 256, 128, 64], 'vgg16bn': [512, 256, 128, 64], 'resnet50': [1024, 512, 256, 64], + 'pvt_v2_b2': [512, 320, 128, 64], 'pvt_v2_b5': [512, 320, 128, 64], + 'swin_v1_b': [1024, 512, 256, 128], 'swin_v1_l': [1536, 768, 384, 192], + 'swin_v1_t': [768, 384, 192, 96], 'swin_v1_s': [768, 384, 192, 96], + 'pvt_v2_b0': [256, 160, 64, 32], 'pvt_v2_b1': [512, 320, 128, 64], + }[self.bb] + if self.mul_scl_ipt == 'cat': + self.lateral_channels_in_collection = [channel * 2 for channel in self.lateral_channels_in_collection] + self.cxt = self.lateral_channels_in_collection[1:][::-1][-self.cxt_num:] if self.cxt_num else [] + + # MODEL settings - inactive + self.lat_blk = ['BasicLatBlk'][0] + self.dec_channels_inter = ['fixed', 'adap'][0] + self.refine = ['', 'itself', 'RefUNet', 'Refiner', 'RefinerPVTInChannels4'][0] + self.progressive_ref = self.refine and True + self.ender = self.progressive_ref and False + self.scale = self.progressive_ref and 2 + self.auxiliary_classification = False # Only for DIS5K, where class labels are saved in `dataset.py`. + self.refine_iteration = 1 + self.freeze_bb = False + self.model = [ + 'BiRefNet', + ][0] + if self.dec_blk == 'HierarAttDecBlk': + self.batch_size = 2 ** [0, 1, 2, 3, 4][2] + + # TRAINING settings - inactive + self.preproc_methods = ['flip', 'enhance', 'rotate', 'pepper', 'crop'][:4] + self.optimizer = ['Adam', 'AdamW'][1] + self.lr_decay_epochs = [1e5] # Set to negative N to decay the lr in the last N-th epoch. + self.lr_decay_rate = 0.5 + # Loss + self.lambdas_pix_last = { + # not 0 means opening this loss + # original rate -- 1 : 30 : 1.5 : 0.2, bce x 30 + 'bce': 30 * 1, # high performance + 'iou': 0.5 * 1, # 0 / 255 + 'iou_patch': 0.5 * 0, # 0 / 255, win_size = (64, 64) + 'mse': 150 * 0, # can smooth the saliency map + 'triplet': 3 * 0, + 'reg': 100 * 0, + 'ssim': 10 * 1, # help contours, + 'cnt': 5 * 0, # help contours + 'structure': 5 * 0, # structure loss from codes of MVANet. A little improvement on DIS-TE[1,2,3], a bit more decrease on DIS-TE4. + } + self.lambdas_cls = { + 'ce': 5.0 + } + # Adv + self.lambda_adv_g = 10. * 0 # turn to 0 to avoid adv training + self.lambda_adv_d = 3. * (self.lambda_adv_g > 0) + + # PATH settings - inactive + self.data_root_dir = os.path.join(self.sys_home_dir, 'datasets/dis') + self.weights_root_dir = os.path.join(self.sys_home_dir, 'weights') + self.weights = { + 'pvt_v2_b2': os.path.join(self.weights_root_dir, 'pvt_v2_b2.pth'), + 'pvt_v2_b5': os.path.join(self.weights_root_dir, ['pvt_v2_b5.pth', 'pvt_v2_b5_22k.pth'][0]), + 'swin_v1_b': os.path.join(self.weights_root_dir, ['swin_base_patch4_window12_384_22kto1k.pth', 'swin_base_patch4_window12_384_22k.pth'][0]), + 'swin_v1_l': os.path.join(self.weights_root_dir, ['swin_large_patch4_window12_384_22kto1k.pth', 'swin_large_patch4_window12_384_22k.pth'][0]), + 'swin_v1_t': os.path.join(self.weights_root_dir, ['swin_tiny_patch4_window7_224_22kto1k_finetune.pth'][0]), + 'swin_v1_s': os.path.join(self.weights_root_dir, ['swin_small_patch4_window7_224_22kto1k_finetune.pth'][0]), + 'pvt_v2_b0': os.path.join(self.weights_root_dir, ['pvt_v2_b0.pth'][0]), + 'pvt_v2_b1': os.path.join(self.weights_root_dir, ['pvt_v2_b1.pth'][0]), + } + + # Callbacks - inactive + self.verbose_eval = True + self.only_S_MAE = False + self.use_fp16 = False # Bugs. It may cause nan in training. + self.SDPA_enabled = False # Bugs. Slower and errors occur in multi-GPUs + + # others + self.device = [0, 'cpu'][0] # .to(0) == .to('cuda:0') + + self.batch_size_valid = 1 + self.rand_seed = 7 + # run_sh_file = [f for f in os.listdir('.') if 'train.sh' == f] + [os.path.join('..', f) for f in os.listdir('..') if 'train.sh' == f] + # with open(run_sh_file[0], 'r') as f: + # lines = f.readlines() + # self.save_last = int([l.strip() for l in lines if '"{}")'.format(self.task) in l and 'val_last=' in l][0].split('val_last=')[-1].split()[0]) + # self.save_step = int([l.strip() for l in lines if '"{}")'.format(self.task) in l and 'step=' in l][0].split('step=')[-1].split()[0]) + # self.val_step = [0, self.save_step][0] + + def print_task(self) -> None: + # Return task for choosing settings in shell scripts. + print(self.task) + + + +### models/backbones/pvt_v2.py + +import torch +import torch.nn as nn +from functools import partial + +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ +from timm.models.registry import register_model + +import math + +# from config import Config + +# config = Config() + +class Mlp(nn.Module): + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.dwconv = DWConv(hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x, H, W): + x = self.fc1(x) + x = self.dwconv(x, H, W) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Attention(nn.Module): + def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1): + super().__init__() + assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}." + + self.dim = dim + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + self.q = nn.Linear(dim, dim, bias=qkv_bias) + self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias) + self.attn_drop_prob = attn_drop + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + self.sr_ratio = sr_ratio + if sr_ratio > 1: + self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio) + self.norm = nn.LayerNorm(dim) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x, H, W): + B, N, C = x.shape + q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3) + + if self.sr_ratio > 1: + x_ = x.permute(0, 2, 1).reshape(B, C, H, W) + x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1) + x_ = self.norm(x_) + kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + else: + kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + k, v = kv[0], kv[1] + + if config.SDPA_enabled: + x = torch.nn.functional.scaled_dot_product_attention( + q, k, v, + attn_mask=None, dropout_p=self.attn_drop_prob, is_causal=False + ).transpose(1, 2).reshape(B, N, C) + else: + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + + return x + + +class Block(nn.Module): + + def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0., + drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1): + super().__init__() + self.norm1 = norm_layer(dim) + self.attn = Attention( + dim, + num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, + attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio) + # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x, H, W): + x = x + self.drop_path(self.attn(self.norm1(x), H, W)) + x = x + self.drop_path(self.mlp(self.norm2(x), H, W)) + + return x + + +class OverlapPatchEmbed(nn.Module): + """ Image to Patch Embedding + """ + + def __init__(self, img_size=224, patch_size=7, stride=4, in_channels=3, embed_dim=768): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + + self.img_size = img_size + self.patch_size = patch_size + self.H, self.W = img_size[0] // patch_size[0], img_size[1] // patch_size[1] + self.num_patches = self.H * self.W + self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=stride, + padding=(patch_size[0] // 2, patch_size[1] // 2)) + self.norm = nn.LayerNorm(embed_dim) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def forward(self, x): + x = self.proj(x) + _, _, H, W = x.shape + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + + return x, H, W + + +class PyramidVisionTransformerImpr(nn.Module): + def __init__(self, img_size=224, patch_size=16, in_channels=3, num_classes=1000, embed_dims=[64, 128, 256, 512], + num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0., + attn_drop_rate=0., drop_path_rate=0., norm_layer=nn.LayerNorm, + depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1]): + super().__init__() + self.num_classes = num_classes + self.depths = depths + + # patch_embed + self.patch_embed1 = OverlapPatchEmbed(img_size=img_size, patch_size=7, stride=4, in_channels=in_channels, + embed_dim=embed_dims[0]) + self.patch_embed2 = OverlapPatchEmbed(img_size=img_size // 4, patch_size=3, stride=2, in_channels=embed_dims[0], + embed_dim=embed_dims[1]) + self.patch_embed3 = OverlapPatchEmbed(img_size=img_size // 8, patch_size=3, stride=2, in_channels=embed_dims[1], + embed_dim=embed_dims[2]) + self.patch_embed4 = OverlapPatchEmbed(img_size=img_size // 16, patch_size=3, stride=2, in_channels=embed_dims[2], + embed_dim=embed_dims[3]) + + # transformer encoder + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule + cur = 0 + self.block1 = nn.ModuleList([Block( + dim=embed_dims[0], num_heads=num_heads[0], mlp_ratio=mlp_ratios[0], qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, + sr_ratio=sr_ratios[0]) + for i in range(depths[0])]) + self.norm1 = norm_layer(embed_dims[0]) + + cur += depths[0] + self.block2 = nn.ModuleList([Block( + dim=embed_dims[1], num_heads=num_heads[1], mlp_ratio=mlp_ratios[1], qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, + sr_ratio=sr_ratios[1]) + for i in range(depths[1])]) + self.norm2 = norm_layer(embed_dims[1]) + + cur += depths[1] + self.block3 = nn.ModuleList([Block( + dim=embed_dims[2], num_heads=num_heads[2], mlp_ratio=mlp_ratios[2], qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, + sr_ratio=sr_ratios[2]) + for i in range(depths[2])]) + self.norm3 = norm_layer(embed_dims[2]) + + cur += depths[2] + self.block4 = nn.ModuleList([Block( + dim=embed_dims[3], num_heads=num_heads[3], mlp_ratio=mlp_ratios[3], qkv_bias=qkv_bias, qk_scale=qk_scale, + drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + i], norm_layer=norm_layer, + sr_ratio=sr_ratios[3]) + for i in range(depths[3])]) + self.norm4 = norm_layer(embed_dims[3]) + + # classification head + # self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity() + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + fan_out //= m.groups + m.weight.data.normal_(0, math.sqrt(2.0 / fan_out)) + if m.bias is not None: + m.bias.data.zero_() + + def init_weights(self, pretrained=None): + if isinstance(pretrained, str): + logger = 1 + #load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger) + + def reset_drop_path(self, drop_path_rate): + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths))] + cur = 0 + for i in range(self.depths[0]): + self.block1[i].drop_path.drop_prob = dpr[cur + i] + + cur += self.depths[0] + for i in range(self.depths[1]): + self.block2[i].drop_path.drop_prob = dpr[cur + i] + + cur += self.depths[1] + for i in range(self.depths[2]): + self.block3[i].drop_path.drop_prob = dpr[cur + i] + + cur += self.depths[2] + for i in range(self.depths[3]): + self.block4[i].drop_path.drop_prob = dpr[cur + i] + + def freeze_patch_emb(self): + self.patch_embed1.requires_grad = False + + @torch.jit.ignore + def no_weight_decay(self): + return {'pos_embed1', 'pos_embed2', 'pos_embed3', 'pos_embed4', 'cls_token'} # has pos_embed may be better + + def get_classifier(self): + return self.head + + def reset_classifier(self, num_classes, global_pool=''): + self.num_classes = num_classes + self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() + + def forward_features(self, x): + B = x.shape[0] + outs = [] + + # stage 1 + x, H, W = self.patch_embed1(x) + for i, blk in enumerate(self.block1): + x = blk(x, H, W) + x = self.norm1(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + # stage 2 + x, H, W = self.patch_embed2(x) + for i, blk in enumerate(self.block2): + x = blk(x, H, W) + x = self.norm2(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + # stage 3 + x, H, W = self.patch_embed3(x) + for i, blk in enumerate(self.block3): + x = blk(x, H, W) + x = self.norm3(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + # stage 4 + x, H, W = self.patch_embed4(x) + for i, blk in enumerate(self.block4): + x = blk(x, H, W) + x = self.norm4(x) + x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous() + outs.append(x) + + return outs + + # return x.mean(dim=1) + + def forward(self, x): + x = self.forward_features(x) + # x = self.head(x) + + return x + + +class DWConv(nn.Module): + def __init__(self, dim=768): + super(DWConv, self).__init__() + self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim) + + def forward(self, x, H, W): + B, N, C = x.shape + x = x.transpose(1, 2).view(B, C, H, W).contiguous() + x = self.dwconv(x) + x = x.flatten(2).transpose(1, 2) + + return x + + +def _conv_filter(state_dict, patch_size=16): + """ convert patch embedding weight from manual patchify + linear proj to conv""" + out_dict = {} + for k, v in state_dict.items(): + if 'patch_embed.proj.weight' in k: + v = v.reshape((v.shape[0], 3, patch_size, patch_size)) + out_dict[k] = v + + return out_dict + + +## @register_model +class pvt_v2_b0(PyramidVisionTransformerImpr): + def __init__(self, **kwargs): + super(pvt_v2_b0, self).__init__( + patch_size=4, embed_dims=[32, 64, 160, 256], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1) + + + +## @register_model +class pvt_v2_b1(PyramidVisionTransformerImpr): + def __init__(self, **kwargs): + super(pvt_v2_b1, self).__init__( + patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[2, 2, 2, 2], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1) + +## @register_model +class pvt_v2_b2(PyramidVisionTransformerImpr): + def __init__(self, in_channels=3, **kwargs): + super(pvt_v2_b2, self).__init__( + patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1, in_channels=in_channels) + +## @register_model +class pvt_v2_b3(PyramidVisionTransformerImpr): + def __init__(self, **kwargs): + super(pvt_v2_b3, self).__init__( + patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 18, 3], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1) + +## @register_model +class pvt_v2_b4(PyramidVisionTransformerImpr): + def __init__(self, **kwargs): + super(pvt_v2_b4, self).__init__( + patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 8, 27, 3], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1) + + +## @register_model +class pvt_v2_b5(PyramidVisionTransformerImpr): + def __init__(self, **kwargs): + super(pvt_v2_b5, self).__init__( + patch_size=4, embed_dims=[64, 128, 320, 512], num_heads=[1, 2, 5, 8], mlp_ratios=[4, 4, 4, 4], + qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 6, 40, 3], sr_ratios=[8, 4, 2, 1], + drop_rate=0.0, drop_path_rate=0.1) + + + +### models/backbones/swin_v1.py + +# -------------------------------------------------------- +# Swin Transformer +# Copyright (c) 2021 Microsoft +# Licensed under The MIT License [see LICENSE for details] +# Written by Ze Liu, Yutong Lin, Yixuan Wei +# -------------------------------------------------------- + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as checkpoint +import numpy as np +from timm.models.layers import DropPath, to_2tuple, trunc_normal_ + +# from config import Config + + +# config = Config() + +class Mlp(nn.Module): + """ Multilayer perceptron.""" + + def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + +class WindowAttention(nn.Module): + """ Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim ** -0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)) # 2*Wh-1 * 2*Ww-1, nH + + # get pair-wise relative position index for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w], indexing='ij')) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :] # 2, Wh*Ww, Wh*Ww + relative_coords = relative_coords.permute(1, 2, 0).contiguous() # Wh*Ww, Wh*Ww, 2 + relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer("relative_position_index", relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop_prob = attn_drop + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop) + + trunc_normal_(self.relative_position_bias_table, std=.02) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + """ Forward function. + + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple) + + q = q * self.scale + + if config.SDPA_enabled: + x = torch.nn.functional.scaled_dot_product_attention( + q, k, v, + attn_mask=None, dropout_p=self.attn_drop_prob, is_causal=False + ).transpose(1, 2).reshape(B_, N, C) + else: + attn = (q @ k.transpose(-2, -1)) + + relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view( + self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class SwinTransformerBlock(nn.Module): + """ Swin Transformer Block. + + Args: + dim (int): Number of input channels. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + + def __init__(self, dim, num_heads, window_size=7, shift_size=0, + mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0., + act_layer=nn.GELU, norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size" + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, window_size=to_2tuple(self.window_size), num_heads=num_heads, + qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop) + + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) + + self.H = None + self.W = None + + def forward(self, x, mask_matrix): + """ Forward function. + + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + mask_matrix: Attention mask for cyclic shift. + """ + B, L, C = x.shape + H, W = self.H, self.W + assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.view(B, H, W, C) + + # pad feature maps to multiples of window size + pad_l = pad_t = 0 + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + _, Hp, Wp, _ = x.shape + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + attn_mask = mask_matrix + else: + shifted_x = x + attn_mask = None + + # partition windows + x_windows = window_partition(shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA + attn_windows = self.attn(x_windows, mask=attn_mask) # nW*B, window_size*window_size, C + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, Hp, Wp) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2)) + else: + x = shifted_x + + if pad_r > 0 or pad_b > 0: + x = x[:, :H, :W, :].contiguous() + + x = x.view(B, H * W, C) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + +class PatchMerging(nn.Module): + """ Patch Merging Layer + + Args: + dim (int): Number of input channels. + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + """ + def __init__(self, dim, norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False) + self.norm = norm_layer(4 * dim) + + def forward(self, x, H, W): + """ Forward function. + + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + """ + B, L, C = x.shape + assert L == H * W, "input feature has wrong size" + + x = x.view(B, H, W, C) + + # padding + pad_input = (H % 2 == 1) or (W % 2 == 1) + if pad_input: + x = F.pad(x, (0, 0, 0, W % 2, 0, H % 2)) + + x0 = x[:, 0::2, 0::2, :] # B H/2 W/2 C + x1 = x[:, 1::2, 0::2, :] # B H/2 W/2 C + x2 = x[:, 0::2, 1::2, :] # B H/2 W/2 C + x3 = x[:, 1::2, 1::2, :] # B H/2 W/2 C + x = torch.cat([x0, x1, x2, x3], -1) # B H/2 W/2 4*C + x = x.view(B, -1, 4 * C) # B H/2*W/2 4*C + + x = self.norm(x) + x = self.reduction(x) + + return x + + +class BasicLayer(nn.Module): + """ A basic Swin Transformer layer for one stage. + + Args: + dim (int): Number of feature channels + depth (int): Depths of this stage. + num_heads (int): Number of attention head. + window_size (int): Local window size. Default: 7. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__(self, + dim, + depth, + num_heads, + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False): + super().__init__() + self.window_size = window_size + self.shift_size = window_size // 2 + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList([ + SwinTransformerBlock( + dim=dim, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer) + for i in range(depth)]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample(dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x, H, W): + """ Forward function. + + Args: + x: Input feature, tensor size (B, H*W, C). + H, W: Spatial resolution of the input feature. + """ + + # calculate attention mask for SW-MSA + Hp = int(np.ceil(H / self.window_size)) * self.window_size + Wp = int(np.ceil(W / self.window_size)) * self.window_size + img_mask = torch.zeros((1, Hp, Wp, 1), device=x.device) # 1 Hp Wp 1 + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, -self.shift_size), + slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition(img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0)) + + for blk in self.blocks: + blk.H, blk.W = H, W + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x, attn_mask) + else: + x = blk(x, attn_mask) + if self.downsample is not None: + x_down = self.downsample(x, H, W) + Wh, Ww = (H + 1) // 2, (W + 1) // 2 + return x, H, W, x_down, Wh, Ww + else: + return x, H, W, x, H, W + + +class PatchEmbed(nn.Module): + """ Image to Patch Embedding + + Args: + patch_size (int): Patch token size. Default: 4. + in_channels (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__(self, patch_size=4, in_channels=3, embed_dim=96, norm_layer=None): + super().__init__() + patch_size = to_2tuple(patch_size) + self.patch_size = patch_size + + self.in_channels = in_channels + self.embed_dim = embed_dim + + self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size) + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + """Forward function.""" + # padding + _, _, H, W = x.size() + if W % self.patch_size[1] != 0: + x = F.pad(x, (0, self.patch_size[1] - W % self.patch_size[1])) + if H % self.patch_size[0] != 0: + x = F.pad(x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0])) + + x = self.proj(x) # B C Wh Ww + if self.norm is not None: + Wh, Ww = x.size(2), x.size(3) + x = x.flatten(2).transpose(1, 2) + x = self.norm(x) + x = x.transpose(1, 2).view(-1, self.embed_dim, Wh, Ww) + + return x + + +class SwinTransformer(nn.Module): + """ Swin Transformer backbone. + A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows` - + https://arxiv.org/pdf/2103.14030 + + Args: + pretrain_img_size (int): Input image size for training the pretrained model, + used in absolute postion embedding. Default 224. + patch_size (int | tuple(int)): Patch size. Default: 4. + in_channels (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. Default: 96. + depths (tuple[int]): Depths of each Swin Transformer stage. + num_heads (tuple[int]): Number of attention head of each stage. + window_size (int): Window size. Default: 7. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4. + qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + drop_rate (float): Dropout rate. + attn_drop_rate (float): Attention dropout rate. Default: 0. + drop_path_rate (float): Stochastic depth rate. Default: 0.2. + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the patch embedding. Default: False. + patch_norm (bool): If True, add normalization after patch embedding. Default: True. + out_indices (Sequence[int]): Output from which stages. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False. + """ + + def __init__(self, + pretrain_img_size=224, + patch_size=4, + in_channels=3, + embed_dim=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.2, + norm_layer=nn.LayerNorm, + ape=False, + patch_norm=True, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + use_checkpoint=False): + super().__init__() + + self.pretrain_img_size = pretrain_img_size + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.out_indices = out_indices + self.frozen_stages = frozen_stages + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + + # absolute position embedding + if self.ape: + pretrain_img_size = to_2tuple(pretrain_img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [pretrain_img_size[0] // patch_size[0], pretrain_img_size[1] // patch_size[1]] + + self.absolute_pos_embed = nn.Parameter(torch.zeros(1, embed_dim, patches_resolution[0], patches_resolution[1])) + trunc_normal_(self.absolute_pos_embed, std=.02) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule + + # build layers + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = BasicLayer( + dim=int(embed_dim * 2 ** i_layer), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], + norm_layer=norm_layer, + downsample=PatchMerging if (i_layer < self.num_layers - 1) else None, + use_checkpoint=use_checkpoint) + self.layers.append(layer) + + num_features = [int(embed_dim * 2 ** i) for i in range(self.num_layers)] + self.num_features = num_features + + # add a norm layer for each output + for i_layer in out_indices: + layer = norm_layer(num_features[i_layer]) + layer_name = f'norm{i_layer}' + self.add_module(layer_name, layer) + + self._freeze_stages() + + def _freeze_stages(self): + if self.frozen_stages >= 0: + self.patch_embed.eval() + for param in self.patch_embed.parameters(): + param.requires_grad = False + + if self.frozen_stages >= 1 and self.ape: + self.absolute_pos_embed.requires_grad = False + + if self.frozen_stages >= 2: + self.pos_drop.eval() + for i in range(0, self.frozen_stages - 1): + m = self.layers[i] + m.eval() + for param in m.parameters(): + param.requires_grad = False + + + def forward(self, x): + """Forward function.""" + x = self.patch_embed(x) + + Wh, Ww = x.size(2), x.size(3) + if self.ape: + # interpolate the position embedding to the corresponding size + absolute_pos_embed = F.interpolate(self.absolute_pos_embed, size=(Wh, Ww), mode='bicubic') + x = (x + absolute_pos_embed) # B Wh*Ww C + + outs = []#x.contiguous()] + x = x.flatten(2).transpose(1, 2) + x = self.pos_drop(x) + for i in range(self.num_layers): + layer = self.layers[i] + x_out, H, W, x, Wh, Ww = layer(x, Wh, Ww) + + if i in self.out_indices: + norm_layer = getattr(self, f'norm{i}') + x_out = norm_layer(x_out) + + out = x_out.view(-1, H, W, self.num_features[i]).permute(0, 3, 1, 2).contiguous() + outs.append(out) + + return tuple(outs) + + def train(self, mode=True): + """Convert the model into training mode while keep layers freezed.""" + super(SwinTransformer, self).train(mode) + self._freeze_stages() + +def swin_v1_t(): + model = SwinTransformer(embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24], window_size=7) + return model + +def swin_v1_s(): + model = SwinTransformer(embed_dim=96, depths=[2, 2, 18, 2], num_heads=[3, 6, 12, 24], window_size=7) + return model + +def swin_v1_b(): + model = SwinTransformer(embed_dim=128, depths=[2, 2, 18, 2], num_heads=[4, 8, 16, 32], window_size=12) + return model + +def swin_v1_l(): + model = SwinTransformer(embed_dim=192, depths=[2, 2, 18, 2], num_heads=[6, 12, 24, 48], window_size=12) + return model + + + +### models/modules/deform_conv.py + +import torch +import torch.nn as nn +from torchvision.ops import deform_conv2d + + +class DeformableConv2d(nn.Module): + def __init__(self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False): + + super(DeformableConv2d, self).__init__() + + assert type(kernel_size) == tuple or type(kernel_size) == int + + kernel_size = kernel_size if type(kernel_size) == tuple else (kernel_size, kernel_size) + self.stride = stride if type(stride) == tuple else (stride, stride) + self.padding = padding + + self.offset_conv = nn.Conv2d(in_channels, + 2 * kernel_size[0] * kernel_size[1], + kernel_size=kernel_size, + stride=stride, + padding=self.padding, + bias=True) + + nn.init.constant_(self.offset_conv.weight, 0.) + nn.init.constant_(self.offset_conv.bias, 0.) + + self.modulator_conv = nn.Conv2d(in_channels, + 1 * kernel_size[0] * kernel_size[1], + kernel_size=kernel_size, + stride=stride, + padding=self.padding, + bias=True) + + nn.init.constant_(self.modulator_conv.weight, 0.) + nn.init.constant_(self.modulator_conv.bias, 0.) + + self.regular_conv = nn.Conv2d(in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=self.padding, + bias=bias) + + def forward(self, x): + #h, w = x.shape[2:] + #max_offset = max(h, w)/4. + + offset = self.offset_conv(x)#.clamp(-max_offset, max_offset) + modulator = 2. * torch.sigmoid(self.modulator_conv(x)) + + x = deform_conv2d( + input=x, + offset=offset, + weight=self.regular_conv.weight, + bias=self.regular_conv.bias, + padding=self.padding, + mask=modulator, + stride=self.stride, + ) + return x + + + + +### utils.py + +import torch.nn as nn + + +def build_act_layer(act_layer): + if act_layer == 'ReLU': + return nn.ReLU(inplace=True) + elif act_layer == 'SiLU': + return nn.SiLU(inplace=True) + elif act_layer == 'GELU': + return nn.GELU() + + raise NotImplementedError(f'build_act_layer does not support {act_layer}') + + +def build_norm_layer(dim, + norm_layer, + in_format='channels_last', + out_format='channels_last', + eps=1e-6): + layers = [] + if norm_layer == 'BN': + if in_format == 'channels_last': + layers.append(to_channels_first()) + layers.append(nn.BatchNorm2d(dim)) + if out_format == 'channels_last': + layers.append(to_channels_last()) + elif norm_layer == 'LN': + if in_format == 'channels_first': + layers.append(to_channels_last()) + layers.append(nn.LayerNorm(dim, eps=eps)) + if out_format == 'channels_first': + layers.append(to_channels_first()) + else: + raise NotImplementedError( + f'build_norm_layer does not support {norm_layer}') + return nn.Sequential(*layers) + + +class to_channels_first(nn.Module): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x.permute(0, 3, 1, 2) + + +class to_channels_last(nn.Module): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x.permute(0, 2, 3, 1) + + + +### dataset.py + +_class_labels_TR_sorted = ( + 'Airplane, Ant, Antenna, Archery, Axe, BabyCarriage, Bag, BalanceBeam, Balcony, Balloon, Basket, BasketballHoop, Beatle, Bed, Bee, Bench, Bicycle, ' + 'BicycleFrame, BicycleStand, Boat, Bonsai, BoomLift, Bridge, BunkBed, Butterfly, Button, Cable, CableLift, Cage, Camcorder, Cannon, Canoe, Car, ' + 'CarParkDropArm, Carriage, Cart, Caterpillar, CeilingLamp, Centipede, Chair, Clip, Clock, Clothes, CoatHanger, Comb, ConcretePumpTruck, Crack, Crane, ' + 'Cup, DentalChair, Desk, DeskChair, Diagram, DishRack, DoorHandle, Dragonfish, Dragonfly, Drum, Earphone, Easel, ElectricIron, Excavator, Eyeglasses, ' + 'Fan, Fence, Fencing, FerrisWheel, FireExtinguisher, Fishing, Flag, FloorLamp, Forklift, GasStation, Gate, Gear, Goal, Golf, GymEquipment, Hammock, ' + 'Handcart, Handcraft, Handrail, HangGlider, Harp, Harvester, Headset, Helicopter, Helmet, Hook, HorizontalBar, Hydrovalve, IroningTable, Jewelry, Key, ' + 'KidsPlayground, Kitchenware, Kite, Knife, Ladder, LaundryRack, Lightning, Lobster, Locust, Machine, MachineGun, MagazineRack, Mantis, Medal, MemorialArchway, ' + 'Microphone, Missile, MobileHolder, Monitor, Mosquito, Motorcycle, MovingTrolley, Mower, MusicPlayer, MusicStand, ObservationTower, Octopus, OilWell, ' + 'OlympicLogo, OperatingTable, OutdoorFitnessEquipment, Parachute, Pavilion, Piano, Pipe, PlowHarrow, PoleVault, Punchbag, Rack, Racket, Rifle, Ring, Robot, ' + 'RockClimbing, Rope, Sailboat, Satellite, Scaffold, Scale, Scissor, Scooter, Sculpture, Seadragon, Seahorse, Seal, SewingMachine, Ship, Shoe, ShoppingCart, ' + 'ShoppingTrolley, Shower, Shrimp, Signboard, Skateboarding, Skeleton, Skiing, Spade, SpeedBoat, Spider, Spoon, Stair, Stand, Stationary, SteeringWheel, ' + 'Stethoscope, Stool, Stove, StreetLamp, SweetStand, Swing, Sword, TV, Table, TableChair, TableLamp, TableTennis, Tank, Tapeline, Teapot, Telescope, Tent, ' + 'TobaccoPipe, Toy, Tractor, TrafficLight, TrafficSign, Trampoline, TransmissionTower, Tree, Tricycle, TrimmerCover, Tripod, Trombone, Truck, Trumpet, Tuba, ' + 'UAV, Umbrella, UnevenBars, UtilityPole, VacuumCleaner, Violin, Wakesurfing, Watch, WaterTower, WateringPot, Well, WellLid, Wheel, Wheelchair, WindTurbine, Windmill, WineGlass, WireWhisk, Yacht' +) +class_labels_TR_sorted = _class_labels_TR_sorted.split(', ') + + +### models/backbones/build_backbones.py + +import torch +import torch.nn as nn +from collections import OrderedDict +from torchvision.models import vgg16, vgg16_bn, VGG16_Weights, VGG16_BN_Weights, resnet50, ResNet50_Weights +# from models.pvt_v2 import pvt_v2_b0, pvt_v2_b1, pvt_v2_b2, pvt_v2_b5 +# from models.swin_v1 import swin_v1_t, swin_v1_s, swin_v1_b, swin_v1_l +# from config import Config + + +config = Config() + +def build_backbone(bb_name, pretrained=True, params_settings=''): + if bb_name == 'vgg16': + bb_net = list(vgg16(pretrained=VGG16_Weights.DEFAULT if pretrained else None).children())[0] + bb = nn.Sequential(OrderedDict({'conv1': bb_net[:4], 'conv2': bb_net[4:9], 'conv3': bb_net[9:16], 'conv4': bb_net[16:23]})) + elif bb_name == 'vgg16bn': + bb_net = list(vgg16_bn(pretrained=VGG16_BN_Weights.DEFAULT if pretrained else None).children())[0] + bb = nn.Sequential(OrderedDict({'conv1': bb_net[:6], 'conv2': bb_net[6:13], 'conv3': bb_net[13:23], 'conv4': bb_net[23:33]})) + elif bb_name == 'resnet50': + bb_net = list(resnet50(pretrained=ResNet50_Weights.DEFAULT if pretrained else None).children()) + bb = nn.Sequential(OrderedDict({'conv1': nn.Sequential(*bb_net[0:3]), 'conv2': bb_net[4], 'conv3': bb_net[5], 'conv4': bb_net[6]})) + else: + bb = eval('{}({})'.format(bb_name, params_settings)) + if pretrained: + bb = load_weights(bb, bb_name) + return bb + +def load_weights(model, model_name): + save_model = torch.load(config.weights[model_name], map_location='cpu') + model_dict = model.state_dict() + state_dict = {k: v if v.size() == model_dict[k].size() else model_dict[k] for k, v in save_model.items() if k in model_dict.keys()} + # to ignore the weights with mismatched size when I modify the backbone itself. + if not state_dict: + save_model_keys = list(save_model.keys()) + sub_item = save_model_keys[0] if len(save_model_keys) == 1 else None + state_dict = {k: v if v.size() == model_dict[k].size() else model_dict[k] for k, v in save_model[sub_item].items() if k in model_dict.keys()} + if not state_dict or not sub_item: + print('Weights are not successully loaded. Check the state dict of weights file.') + return None + else: + print('Found correct weights in the "{}" item of loaded state_dict.'.format(sub_item)) + model_dict.update(state_dict) + model.load_state_dict(model_dict) + return model + + + +### models/modules/decoder_blocks.py + +import torch +import torch.nn as nn +# from models.aspp import ASPP, ASPPDeformable +# from config import Config + + +# config = Config() + + +class BasicDecBlk(nn.Module): + def __init__(self, in_channels=64, out_channels=64, inter_channels=64): + super(BasicDecBlk, self).__init__() + inter_channels = in_channels // 4 if config.dec_channels_inter == 'adap' else 64 + self.conv_in = nn.Conv2d(in_channels, inter_channels, 3, 1, padding=1) + self.relu_in = nn.ReLU(inplace=True) + if config.dec_att == 'ASPP': + self.dec_att = ASPP(in_channels=inter_channels) + elif config.dec_att == 'ASPPDeformable': + self.dec_att = ASPPDeformable(in_channels=inter_channels) + self.conv_out = nn.Conv2d(inter_channels, out_channels, 3, 1, padding=1) + self.bn_in = nn.BatchNorm2d(inter_channels) if config.batch_size > 1 else nn.Identity() + self.bn_out = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity() + + def forward(self, x): + x = self.conv_in(x) + x = self.bn_in(x) + x = self.relu_in(x) + if hasattr(self, 'dec_att'): + x = self.dec_att(x) + x = self.conv_out(x) + x = self.bn_out(x) + return x + + +class ResBlk(nn.Module): + def __init__(self, in_channels=64, out_channels=None, inter_channels=64): + super(ResBlk, self).__init__() + if out_channels is None: + out_channels = in_channels + inter_channels = in_channels // 4 if config.dec_channels_inter == 'adap' else 64 + + self.conv_in = nn.Conv2d(in_channels, inter_channels, 3, 1, padding=1) + self.bn_in = nn.BatchNorm2d(inter_channels) if config.batch_size > 1 else nn.Identity() + self.relu_in = nn.ReLU(inplace=True) + + if config.dec_att == 'ASPP': + self.dec_att = ASPP(in_channels=inter_channels) + elif config.dec_att == 'ASPPDeformable': + self.dec_att = ASPPDeformable(in_channels=inter_channels) + + self.conv_out = nn.Conv2d(inter_channels, out_channels, 3, 1, padding=1) + self.bn_out = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity() + + self.conv_resi = nn.Conv2d(in_channels, out_channels, 1, 1, 0) + + def forward(self, x): + _x = self.conv_resi(x) + x = self.conv_in(x) + x = self.bn_in(x) + x = self.relu_in(x) + if hasattr(self, 'dec_att'): + x = self.dec_att(x) + x = self.conv_out(x) + x = self.bn_out(x) + return x + _x + + + +### models/modules/lateral_blocks.py + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from functools import partial + +# from config import Config + + +# config = Config() + + +class BasicLatBlk(nn.Module): + def __init__(self, in_channels=64, out_channels=64, inter_channels=64): + super(BasicLatBlk, self).__init__() + inter_channels = in_channels // 4 if config.dec_channels_inter == 'adap' else 64 + self.conv = nn.Conv2d(in_channels, out_channels, 1, 1, 0) + + def forward(self, x): + x = self.conv(x) + return x + + + +### models/modules/aspp.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +# from models.deform_conv import DeformableConv2d +# from config import Config + + +# config = Config() + + +class _ASPPModule(nn.Module): + def __init__(self, in_channels, planes, kernel_size, padding, dilation): + super(_ASPPModule, self).__init__() + self.atrous_conv = nn.Conv2d(in_channels, planes, kernel_size=kernel_size, + stride=1, padding=padding, dilation=dilation, bias=False) + self.bn = nn.BatchNorm2d(planes) if config.batch_size > 1 else nn.Identity() + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.atrous_conv(x) + x = self.bn(x) + + return self.relu(x) + + +class ASPP(nn.Module): + def __init__(self, in_channels=64, out_channels=None, output_stride=16): + super(ASPP, self).__init__() + self.down_scale = 1 + if out_channels is None: + out_channels = in_channels + self.in_channelster = 256 // self.down_scale + if output_stride == 16: + dilations = [1, 6, 12, 18] + elif output_stride == 8: + dilations = [1, 12, 24, 36] + else: + raise NotImplementedError + + self.aspp1 = _ASPPModule(in_channels, self.in_channelster, 1, padding=0, dilation=dilations[0]) + self.aspp2 = _ASPPModule(in_channels, self.in_channelster, 3, padding=dilations[1], dilation=dilations[1]) + self.aspp3 = _ASPPModule(in_channels, self.in_channelster, 3, padding=dilations[2], dilation=dilations[2]) + self.aspp4 = _ASPPModule(in_channels, self.in_channelster, 3, padding=dilations[3], dilation=dilations[3]) + + self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), + nn.Conv2d(in_channels, self.in_channelster, 1, stride=1, bias=False), + nn.BatchNorm2d(self.in_channelster) if config.batch_size > 1 else nn.Identity(), + nn.ReLU(inplace=True)) + self.conv1 = nn.Conv2d(self.in_channelster * 5, out_channels, 1, bias=False) + self.bn1 = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity() + self.relu = nn.ReLU(inplace=True) + self.dropout = nn.Dropout(0.5) + + def forward(self, x): + x1 = self.aspp1(x) + x2 = self.aspp2(x) + x3 = self.aspp3(x) + x4 = self.aspp4(x) + x5 = self.global_avg_pool(x) + x5 = F.interpolate(x5, size=x1.size()[2:], mode='bilinear', align_corners=True) + x = torch.cat((x1, x2, x3, x4, x5), dim=1) + + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + return self.dropout(x) + + +##################### Deformable +class _ASPPModuleDeformable(nn.Module): + def __init__(self, in_channels, planes, kernel_size, padding): + super(_ASPPModuleDeformable, self).__init__() + self.atrous_conv = DeformableConv2d(in_channels, planes, kernel_size=kernel_size, + stride=1, padding=padding, bias=False) + self.bn = nn.BatchNorm2d(planes) if config.batch_size > 1 else nn.Identity() + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.atrous_conv(x) + x = self.bn(x) + + return self.relu(x) + + +class ASPPDeformable(nn.Module): + def __init__(self, in_channels, out_channels=None, parallel_block_sizes=[1, 3, 7]): + super(ASPPDeformable, self).__init__() + self.down_scale = 1 + if out_channels is None: + out_channels = in_channels + self.in_channelster = 256 // self.down_scale + + self.aspp1 = _ASPPModuleDeformable(in_channels, self.in_channelster, 1, padding=0) + self.aspp_deforms = nn.ModuleList([ + _ASPPModuleDeformable(in_channels, self.in_channelster, conv_size, padding=int(conv_size//2)) for conv_size in parallel_block_sizes + ]) + + self.global_avg_pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), + nn.Conv2d(in_channels, self.in_channelster, 1, stride=1, bias=False), + nn.BatchNorm2d(self.in_channelster) if config.batch_size > 1 else nn.Identity(), + nn.ReLU(inplace=True)) + self.conv1 = nn.Conv2d(self.in_channelster * (2 + len(self.aspp_deforms)), out_channels, 1, bias=False) + self.bn1 = nn.BatchNorm2d(out_channels) if config.batch_size > 1 else nn.Identity() + self.relu = nn.ReLU(inplace=True) + self.dropout = nn.Dropout(0.5) + + def forward(self, x): + x1 = self.aspp1(x) + x_aspp_deforms = [aspp_deform(x) for aspp_deform in self.aspp_deforms] + x5 = self.global_avg_pool(x) + x5 = F.interpolate(x5, size=x1.size()[2:], mode='bilinear', align_corners=True) + x = torch.cat((x1, *x_aspp_deforms, x5), dim=1) + + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + return self.dropout(x) + + + +### models/refinement/refiner.py + +import torch +import torch.nn as nn +from collections import OrderedDict +import torch +import torch.nn as nn +import torch.nn.functional as F +from torchvision.models import vgg16, vgg16_bn +from torchvision.models import resnet50 + +# from config import Config +# from dataset import class_labels_TR_sorted +# from models.build_backbone import build_backbone +# from models.decoder_blocks import BasicDecBlk +# from models.lateral_blocks import BasicLatBlk +# from models.ing import * +# from models.stem_layer import StemLayer + + +class RefinerPVTInChannels4(nn.Module): + def __init__(self, in_channels=3+1): + super(RefinerPVTInChannels4, self).__init__() + self.config = Config() + self.epoch = 1 + self.bb = build_backbone(self.config.bb, params_settings='in_channels=4') + + lateral_channels_in_collection = { + 'vgg16': [512, 256, 128, 64], 'vgg16bn': [512, 256, 128, 64], 'resnet50': [1024, 512, 256, 64], + 'pvt_v2_b2': [512, 320, 128, 64], 'pvt_v2_b5': [512, 320, 128, 64], + 'swin_v1_b': [1024, 512, 256, 128], 'swin_v1_l': [1536, 768, 384, 192], + } + channels = lateral_channels_in_collection[self.config.bb] + self.squeeze_module = BasicDecBlk(channels[0], channels[0]) + + self.decoder = Decoder(channels) + + if 0: + for key, value in self.named_parameters(): + if 'bb.' in key: + value.requires_grad = False + + def forward(self, x): + if isinstance(x, list): + x = torch.cat(x, dim=1) + ########## Encoder ########## + if self.config.bb in ['vgg16', 'vgg16bn', 'resnet50']: + x1 = self.bb.conv1(x) + x2 = self.bb.conv2(x1) + x3 = self.bb.conv3(x2) + x4 = self.bb.conv4(x3) + else: + x1, x2, x3, x4 = self.bb(x) + + x4 = self.squeeze_module(x4) + + ########## Decoder ########## + + features = [x, x1, x2, x3, x4] + scaled_preds = self.decoder(features) + + return scaled_preds + + +class Refiner(nn.Module): + def __init__(self, in_channels=3+1): + super(Refiner, self).__init__() + self.config = Config() + self.epoch = 1 + self.stem_layer = StemLayer(in_channels=in_channels, inter_channels=48, out_channels=3, norm_layer='BN' if self.config.batch_size > 1 else 'LN') + self.bb = build_backbone(self.config.bb) + + lateral_channels_in_collection = { + 'vgg16': [512, 256, 128, 64], 'vgg16bn': [512, 256, 128, 64], 'resnet50': [1024, 512, 256, 64], + 'pvt_v2_b2': [512, 320, 128, 64], 'pvt_v2_b5': [512, 320, 128, 64], + 'swin_v1_b': [1024, 512, 256, 128], 'swin_v1_l': [1536, 768, 384, 192], + } + channels = lateral_channels_in_collection[self.config.bb] + self.squeeze_module = BasicDecBlk(channels[0], channels[0]) + + self.decoder = Decoder(channels) + + if 0: + for key, value in self.named_parameters(): + if 'bb.' in key: + value.requires_grad = False + + def forward(self, x): + if isinstance(x, list): + x = torch.cat(x, dim=1) + x = self.stem_layer(x) + ########## Encoder ########## + if self.config.bb in ['vgg16', 'vgg16bn', 'resnet50']: + x1 = self.bb.conv1(x) + x2 = self.bb.conv2(x1) + x3 = self.bb.conv3(x2) + x4 = self.bb.conv4(x3) + else: + x1, x2, x3, x4 = self.bb(x) + + x4 = self.squeeze_module(x4) + + ########## Decoder ########## + + features = [x, x1, x2, x3, x4] + scaled_preds = self.decoder(features) + + return scaled_preds + + +class Decoder(nn.Module): + def __init__(self, channels): + super(Decoder, self).__init__() + self.config = Config() + DecoderBlock = eval('BasicDecBlk') + LateralBlock = eval('BasicLatBlk') + + self.decoder_block4 = DecoderBlock(channels[0], channels[1]) + self.decoder_block3 = DecoderBlock(channels[1], channels[2]) + self.decoder_block2 = DecoderBlock(channels[2], channels[3]) + self.decoder_block1 = DecoderBlock(channels[3], channels[3]//2) + + self.lateral_block4 = LateralBlock(channels[1], channels[1]) + self.lateral_block3 = LateralBlock(channels[2], channels[2]) + self.lateral_block2 = LateralBlock(channels[3], channels[3]) + + if self.config.ms_supervision: + self.conv_ms_spvn_4 = nn.Conv2d(channels[1], 1, 1, 1, 0) + self.conv_ms_spvn_3 = nn.Conv2d(channels[2], 1, 1, 1, 0) + self.conv_ms_spvn_2 = nn.Conv2d(channels[3], 1, 1, 1, 0) + self.conv_out1 = nn.Sequential(nn.Conv2d(channels[3]//2, 1, 1, 1, 0)) + + def forward(self, features): + x, x1, x2, x3, x4 = features + outs = [] + p4 = self.decoder_block4(x4) + _p4 = F.interpolate(p4, size=x3.shape[2:], mode='bilinear', align_corners=True) + _p3 = _p4 + self.lateral_block4(x3) + + p3 = self.decoder_block3(_p3) + _p3 = F.interpolate(p3, size=x2.shape[2:], mode='bilinear', align_corners=True) + _p2 = _p3 + self.lateral_block3(x2) + + p2 = self.decoder_block2(_p2) + _p2 = F.interpolate(p2, size=x1.shape[2:], mode='bilinear', align_corners=True) + _p1 = _p2 + self.lateral_block2(x1) + + _p1 = self.decoder_block1(_p1) + _p1 = F.interpolate(_p1, size=x.shape[2:], mode='bilinear', align_corners=True) + p1_out = self.conv_out1(_p1) + + if self.config.ms_supervision: + outs.append(self.conv_ms_spvn_4(p4)) + outs.append(self.conv_ms_spvn_3(p3)) + outs.append(self.conv_ms_spvn_2(p2)) + outs.append(p1_out) + return outs + + +class RefUNet(nn.Module): + # Refinement + def __init__(self, in_channels=3+1): + super(RefUNet, self).__init__() + self.encoder_1 = nn.Sequential( + nn.Conv2d(in_channels, 64, 3, 1, 1), + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.encoder_2 = nn.Sequential( + nn.MaxPool2d(2, 2, ceil_mode=True), + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.encoder_3 = nn.Sequential( + nn.MaxPool2d(2, 2, ceil_mode=True), + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.encoder_4 = nn.Sequential( + nn.MaxPool2d(2, 2, ceil_mode=True), + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.pool4 = nn.MaxPool2d(2, 2, ceil_mode=True) + ##### + self.decoder_5 = nn.Sequential( + nn.Conv2d(64, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + ##### + self.decoder_4 = nn.Sequential( + nn.Conv2d(128, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.decoder_3 = nn.Sequential( + nn.Conv2d(128, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.decoder_2 = nn.Sequential( + nn.Conv2d(128, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.decoder_1 = nn.Sequential( + nn.Conv2d(128, 64, 3, 1, 1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True) + ) + + self.conv_d0 = nn.Conv2d(64, 1, 3, 1, 1) + + self.upscore2 = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) + + def forward(self, x): + outs = [] + if isinstance(x, list): + x = torch.cat(x, dim=1) + hx = x + + hx1 = self.encoder_1(hx) + hx2 = self.encoder_2(hx1) + hx3 = self.encoder_3(hx2) + hx4 = self.encoder_4(hx3) + + hx = self.decoder_5(self.pool4(hx4)) + hx = torch.cat((self.upscore2(hx), hx4), 1) + + d4 = self.decoder_4(hx) + hx = torch.cat((self.upscore2(d4), hx3), 1) + + d3 = self.decoder_3(hx) + hx = torch.cat((self.upscore2(d3), hx2), 1) + + d2 = self.decoder_2(hx) + hx = torch.cat((self.upscore2(d2), hx1), 1) + + d1 = self.decoder_1(hx) + + x = self.conv_d0(d1) + outs.append(x) + return outs + + + +### models/stem_layer.py + +import torch.nn as nn +# from utils import build_act_layer, build_norm_layer + + +class StemLayer(nn.Module): + r""" Stem layer of InternImage + Args: + in_channels (int): number of input channels + out_channels (int): number of output channels + act_layer (str): activation layer + norm_layer (str): normalization layer + """ + + def __init__(self, + in_channels=3+1, + inter_channels=48, + out_channels=96, + act_layer='GELU', + norm_layer='BN'): + super().__init__() + self.conv1 = nn.Conv2d(in_channels, + inter_channels, + kernel_size=3, + stride=1, + padding=1) + self.norm1 = build_norm_layer( + inter_channels, norm_layer, 'channels_first', 'channels_first' + ) + self.act = build_act_layer(act_layer) + self.conv2 = nn.Conv2d(inter_channels, + out_channels, + kernel_size=3, + stride=1, + padding=1) + self.norm2 = build_norm_layer( + out_channels, norm_layer, 'channels_first', 'channels_first' + ) + + def forward(self, x): + x = self.conv1(x) + x = self.norm1(x) + x = self.act(x) + x = self.conv2(x) + x = self.norm2(x) + return x + + +### models/birefnet.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +from kornia.filters import laplacian +from transformers import PreTrainedModel + +# from config import Config +# from dataset import class_labels_TR_sorted +# from models.build_backbone import build_backbone +# from models.decoder_blocks import BasicDecBlk, ResBlk, HierarAttDecBlk +# from models.lateral_blocks import BasicLatBlk +# from models.aspp import ASPP, ASPPDeformable +# from models.ing import * +# from models.refiner import Refiner, RefinerPVTInChannels4, RefUNet +# from models.stem_layer import StemLayer +from .BiRefNet_config import BiRefNetConfig + + +class BiRefNet( + PreTrainedModel +): + config_class = BiRefNetConfig + def __init__(self, bb_pretrained=True, config=BiRefNetConfig()): + super(BiRefNet, self).__init__(config) + bb_pretrained = config.bb_pretrained + self.config = Config() + self.epoch = 1 + self.bb = build_backbone(self.config.bb, pretrained=bb_pretrained) + + channels = self.config.lateral_channels_in_collection + + if self.config.auxiliary_classification: + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.cls_head = nn.Sequential( + nn.Linear(channels[0], len(class_labels_TR_sorted)) + ) + + if self.config.squeeze_block: + self.squeeze_module = nn.Sequential(*[ + eval(self.config.squeeze_block.split('_x')[0])(channels[0]+sum(self.config.cxt), channels[0]) + for _ in range(eval(self.config.squeeze_block.split('_x')[1])) + ]) + + self.decoder = Decoder(channels) + + if self.config.ender: + self.dec_end = nn.Sequential( + nn.Conv2d(1, 16, 3, 1, 1), + nn.Conv2d(16, 1, 3, 1, 1), + nn.ReLU(inplace=True), + ) + + # refine patch-level segmentation + if self.config.refine: + if self.config.refine == 'itself': + self.stem_layer = StemLayer(in_channels=3+1, inter_channels=48, out_channels=3, norm_layer='BN' if self.config.batch_size > 1 else 'LN') + else: + self.refiner = eval('{}({})'.format(self.config.refine, 'in_channels=3+1')) + + if self.config.freeze_bb: + # Freeze the backbone... + print(self.named_parameters()) + for key, value in self.named_parameters(): + if 'bb.' in key and 'refiner.' not in key: + value.requires_grad = False + + def forward_enc(self, x): + if self.config.bb in ['vgg16', 'vgg16bn', 'resnet50']: + x1 = self.bb.conv1(x); x2 = self.bb.conv2(x1); x3 = self.bb.conv3(x2); x4 = self.bb.conv4(x3) + else: + x1, x2, x3, x4 = self.bb(x) + if self.config.mul_scl_ipt == 'cat': + B, C, H, W = x.shape + x1_, x2_, x3_, x4_ = self.bb(F.interpolate(x, size=(H//2, W//2), mode='bilinear', align_corners=True)) + x1 = torch.cat([x1, F.interpolate(x1_, size=x1.shape[2:], mode='bilinear', align_corners=True)], dim=1) + x2 = torch.cat([x2, F.interpolate(x2_, size=x2.shape[2:], mode='bilinear', align_corners=True)], dim=1) + x3 = torch.cat([x3, F.interpolate(x3_, size=x3.shape[2:], mode='bilinear', align_corners=True)], dim=1) + x4 = torch.cat([x4, F.interpolate(x4_, size=x4.shape[2:], mode='bilinear', align_corners=True)], dim=1) + elif self.config.mul_scl_ipt == 'add': + B, C, H, W = x.shape + x1_, x2_, x3_, x4_ = self.bb(F.interpolate(x, size=(H//2, W//2), mode='bilinear', align_corners=True)) + x1 = x1 + F.interpolate(x1_, size=x1.shape[2:], mode='bilinear', align_corners=True) + x2 = x2 + F.interpolate(x2_, size=x2.shape[2:], mode='bilinear', align_corners=True) + x3 = x3 + F.interpolate(x3_, size=x3.shape[2:], mode='bilinear', align_corners=True) + x4 = x4 + F.interpolate(x4_, size=x4.shape[2:], mode='bilinear', align_corners=True) + class_preds = self.cls_head(self.avgpool(x4).view(x4.shape[0], -1)) if self.training and self.config.auxiliary_classification else None + if self.config.cxt: + x4 = torch.cat( + ( + *[ + F.interpolate(x1, size=x4.shape[2:], mode='bilinear', align_corners=True), + F.interpolate(x2, size=x4.shape[2:], mode='bilinear', align_corners=True), + F.interpolate(x3, size=x4.shape[2:], mode='bilinear', align_corners=True), + ][-len(self.config.cxt):], + x4 + ), + dim=1 + ) + return (x1, x2, x3, x4), class_preds + + def forward_ori(self, x): + ########## Encoder ########## + (x1, x2, x3, x4), class_preds = self.forward_enc(x) + if self.config.squeeze_block: + x4 = self.squeeze_module(x4) + ########## Decoder ########## + features = [x, x1, x2, x3, x4] + if self.training and self.config.out_ref: + features.append(laplacian(torch.mean(x, dim=1).unsqueeze(1), kernel_size=5)) + scaled_preds = self.decoder(features) + return scaled_preds, class_preds + + def forward(self, x): + scaled_preds, class_preds = self.forward_ori(x) + class_preds_lst = [class_preds] + return [scaled_preds, class_preds_lst] if self.training else scaled_preds + + +class Decoder(nn.Module): + def __init__(self, channels): + super(Decoder, self).__init__() + self.config = Config() + DecoderBlock = eval(self.config.dec_blk) + LateralBlock = eval(self.config.lat_blk) + + if self.config.dec_ipt: + self.split = self.config.dec_ipt_split + N_dec_ipt = 64 + DBlock = SimpleConvs + ic = 64 + ipt_cha_opt = 1 + self.ipt_blk5 = DBlock(2**10*3 if self.split else 3, [N_dec_ipt, channels[0]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk4 = DBlock(2**8*3 if self.split else 3, [N_dec_ipt, channels[0]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk3 = DBlock(2**6*3 if self.split else 3, [N_dec_ipt, channels[1]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk2 = DBlock(2**4*3 if self.split else 3, [N_dec_ipt, channels[2]//8][ipt_cha_opt], inter_channels=ic) + self.ipt_blk1 = DBlock(2**0*3 if self.split else 3, [N_dec_ipt, channels[3]//8][ipt_cha_opt], inter_channels=ic) + else: + self.split = None + + self.decoder_block4 = DecoderBlock(channels[0]+([N_dec_ipt, channels[0]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[1]) + self.decoder_block3 = DecoderBlock(channels[1]+([N_dec_ipt, channels[0]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[2]) + self.decoder_block2 = DecoderBlock(channels[2]+([N_dec_ipt, channels[1]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[3]) + self.decoder_block1 = DecoderBlock(channels[3]+([N_dec_ipt, channels[2]//8][ipt_cha_opt] if self.config.dec_ipt else 0), channels[3]//2) + self.conv_out1 = nn.Sequential(nn.Conv2d(channels[3]//2+([N_dec_ipt, channels[3]//8][ipt_cha_opt] if self.config.dec_ipt else 0), 1, 1, 1, 0)) + + self.lateral_block4 = LateralBlock(channels[1], channels[1]) + self.lateral_block3 = LateralBlock(channels[2], channels[2]) + self.lateral_block2 = LateralBlock(channels[3], channels[3]) + + if self.config.ms_supervision: + self.conv_ms_spvn_4 = nn.Conv2d(channels[1], 1, 1, 1, 0) + self.conv_ms_spvn_3 = nn.Conv2d(channels[2], 1, 1, 1, 0) + self.conv_ms_spvn_2 = nn.Conv2d(channels[3], 1, 1, 1, 0) + + if self.config.out_ref: + _N = 16 + self.gdt_convs_4 = nn.Sequential(nn.Conv2d(channels[1], _N, 3, 1, 1), nn.BatchNorm2d(_N) if self.config.batch_size > 1 else nn.Identity(), nn.ReLU(inplace=True)) + self.gdt_convs_3 = nn.Sequential(nn.Conv2d(channels[2], _N, 3, 1, 1), nn.BatchNorm2d(_N) if self.config.batch_size > 1 else nn.Identity(), nn.ReLU(inplace=True)) + self.gdt_convs_2 = nn.Sequential(nn.Conv2d(channels[3], _N, 3, 1, 1), nn.BatchNorm2d(_N) if self.config.batch_size > 1 else nn.Identity(), nn.ReLU(inplace=True)) + + self.gdt_convs_pred_4 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + self.gdt_convs_pred_3 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + self.gdt_convs_pred_2 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + + self.gdt_convs_attn_4 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + self.gdt_convs_attn_3 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + self.gdt_convs_attn_2 = nn.Sequential(nn.Conv2d(_N, 1, 1, 1, 0)) + + def get_patches_batch(self, x, p): + _size_h, _size_w = p.shape[2:] + patches_batch = [] + for idx in range(x.shape[0]): + columns_x = torch.split(x[idx], split_size_or_sections=_size_w, dim=-1) + patches_x = [] + for column_x in columns_x: + patches_x += [p.unsqueeze(0) for p in torch.split(column_x, split_size_or_sections=_size_h, dim=-2)] + patch_sample = torch.cat(patches_x, dim=1) + patches_batch.append(patch_sample) + return torch.cat(patches_batch, dim=0) + + def forward(self, features): + if self.training and self.config.out_ref: + outs_gdt_pred = [] + outs_gdt_label = [] + x, x1, x2, x3, x4, gdt_gt = features + else: + x, x1, x2, x3, x4 = features + outs = [] + + if self.config.dec_ipt: + patches_batch = self.get_patches_batch(x, x4) if self.split else x + x4 = torch.cat((x4, self.ipt_blk5(F.interpolate(patches_batch, size=x4.shape[2:], mode='bilinear', align_corners=True))), 1) + p4 = self.decoder_block4(x4) + m4 = self.conv_ms_spvn_4(p4) if self.config.ms_supervision else None + if self.config.out_ref: + p4_gdt = self.gdt_convs_4(p4) + if self.training: + # >> GT: + m4_dia = m4 + gdt_label_main_4 = gdt_gt * F.interpolate(m4_dia, size=gdt_gt.shape[2:], mode='bilinear', align_corners=True) + outs_gdt_label.append(gdt_label_main_4) + # >> Pred: + gdt_pred_4 = self.gdt_convs_pred_4(p4_gdt) + outs_gdt_pred.append(gdt_pred_4) + gdt_attn_4 = self.gdt_convs_attn_4(p4_gdt).sigmoid() + # >> Finally: + p4 = p4 * gdt_attn_4 + _p4 = F.interpolate(p4, size=x3.shape[2:], mode='bilinear', align_corners=True) + _p3 = _p4 + self.lateral_block4(x3) + + if self.config.dec_ipt: + patches_batch = self.get_patches_batch(x, _p3) if self.split else x + _p3 = torch.cat((_p3, self.ipt_blk4(F.interpolate(patches_batch, size=x3.shape[2:], mode='bilinear', align_corners=True))), 1) + p3 = self.decoder_block3(_p3) + m3 = self.conv_ms_spvn_3(p3) if self.config.ms_supervision else None + if self.config.out_ref: + p3_gdt = self.gdt_convs_3(p3) + if self.training: + # >> GT: + # m3 --dilation--> m3_dia + # G_3^gt * m3_dia --> G_3^m, which is the label of gradient + m3_dia = m3 + gdt_label_main_3 = gdt_gt * F.interpolate(m3_dia, size=gdt_gt.shape[2:], mode='bilinear', align_corners=True) + outs_gdt_label.append(gdt_label_main_3) + # >> Pred: + # p3 --conv--BN--> F_3^G, where F_3^G predicts the \hat{G_3} with xx + # F_3^G --sigmoid--> A_3^G + gdt_pred_3 = self.gdt_convs_pred_3(p3_gdt) + outs_gdt_pred.append(gdt_pred_3) + gdt_attn_3 = self.gdt_convs_attn_3(p3_gdt).sigmoid() + # >> Finally: + # p3 = p3 * A_3^G + p3 = p3 * gdt_attn_3 + _p3 = F.interpolate(p3, size=x2.shape[2:], mode='bilinear', align_corners=True) + _p2 = _p3 + self.lateral_block3(x2) + + if self.config.dec_ipt: + patches_batch = self.get_patches_batch(x, _p2) if self.split else x + _p2 = torch.cat((_p2, self.ipt_blk3(F.interpolate(patches_batch, size=x2.shape[2:], mode='bilinear', align_corners=True))), 1) + p2 = self.decoder_block2(_p2) + m2 = self.conv_ms_spvn_2(p2) if self.config.ms_supervision else None + if self.config.out_ref: + p2_gdt = self.gdt_convs_2(p2) + if self.training: + # >> GT: + m2_dia = m2 + gdt_label_main_2 = gdt_gt * F.interpolate(m2_dia, size=gdt_gt.shape[2:], mode='bilinear', align_corners=True) + outs_gdt_label.append(gdt_label_main_2) + # >> Pred: + gdt_pred_2 = self.gdt_convs_pred_2(p2_gdt) + outs_gdt_pred.append(gdt_pred_2) + gdt_attn_2 = self.gdt_convs_attn_2(p2_gdt).sigmoid() + # >> Finally: + p2 = p2 * gdt_attn_2 + _p2 = F.interpolate(p2, size=x1.shape[2:], mode='bilinear', align_corners=True) + _p1 = _p2 + self.lateral_block2(x1) + + if self.config.dec_ipt: + patches_batch = self.get_patches_batch(x, _p1) if self.split else x + _p1 = torch.cat((_p1, self.ipt_blk2(F.interpolate(patches_batch, size=x1.shape[2:], mode='bilinear', align_corners=True))), 1) + _p1 = self.decoder_block1(_p1) + _p1 = F.interpolate(_p1, size=x.shape[2:], mode='bilinear', align_corners=True) + + if self.config.dec_ipt: + patches_batch = self.get_patches_batch(x, _p1) if self.split else x + _p1 = torch.cat((_p1, self.ipt_blk1(F.interpolate(patches_batch, size=x.shape[2:], mode='bilinear', align_corners=True))), 1) + p1_out = self.conv_out1(_p1) + + if self.config.ms_supervision: + outs.append(m4) + outs.append(m3) + outs.append(m2) + outs.append(p1_out) + return outs if not (self.config.out_ref and self.training) else ([outs_gdt_pred, outs_gdt_label], outs) + + +class SimpleConvs(nn.Module): + def __init__( + self, in_channels: int, out_channels: int, inter_channels=64 + ) -> None: + super().__init__() + self.conv1 = nn.Conv2d(in_channels, inter_channels, 3, 1, 1) + self.conv_out = nn.Conv2d(inter_channels, out_channels, 3, 1, 1) + + def forward(self, x): + return self.conv_out(self.conv1(x)) diff --git a/RMBG/RMBG-2.0/config.json b/RMBG/RMBG-2.0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..06d8fa9d7f2f4c6f1cf0dc6e7bfd194153176a42 --- /dev/null +++ b/RMBG/RMBG-2.0/config.json @@ -0,0 +1,20 @@ +{ + "_name_or_path": "ZhengPeng7/BiRefNet", + "architectures": [ + "BiRefNet" + ], + "auto_map": { + "AutoConfig": "BiRefNet_config.BiRefNetConfig", + "AutoModelForImageSegmentation": "birefnet.BiRefNet" + }, + "custom_pipelines": { + "image-segmentation": { + "pt": [ + "AutoModelForImageSegmentation" + ], + "tf": [], + "type": "image" + } + }, + "bb_pretrained": false +} \ No newline at end of file diff --git a/ckpts/illustrious/obsessionIllustrious_v31.sha256 b/ckpts/illustrious/obsessionIllustrious_v31.sha256 new file mode 100644 index 0000000000000000000000000000000000000000..fd18ac790a13d43117a73b122b5ec9ce5eed8ff0 --- /dev/null +++ b/ckpts/illustrious/obsessionIllustrious_v31.sha256 @@ -0,0 +1 @@ +d1d7977219f40ef6b710aec7b7315e81a5b4f3093181a2e7d45301ed6cc576f6 \ No newline at end of file diff --git a/ckpts/pony/white_v20.sha256 b/ckpts/pony/white_v20.sha256 new file mode 100644 index 0000000000000000000000000000000000000000..739ce81b589095ea66c6ab120617c36c6860b93c --- /dev/null +++ b/ckpts/pony/white_v20.sha256 @@ -0,0 +1 @@ +a8bb2133a77a946f27638aafc6290e6fe3f27983d1434b74c01bc0a9be300bae \ No newline at end of file diff --git a/ckpts/upscale/SUPIR/.gitattributes b/ckpts/upscale/SUPIR/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/ckpts/upscale/SUPIR/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/ipadapter/ip-adapter_sd15.safetensors b/ipadapter/ip-adapter_sd15.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67788447289e5858c26dbc8fbf0a7ce3fd6c50fb --- /dev/null +++ b/ipadapter/ip-adapter_sd15.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:289b45f16d043d0bf542e45831f971dcdaabe18b656f11e86d9dfba7e9ee3369 +size 44642768 diff --git a/ipadapter/ip_plus_composition_sd15.safetensors b/ipadapter/ip_plus_composition_sd15.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7db8b186b300faa9a4695dcfbfd995ddae605e62 --- /dev/null +++ b/ipadapter/ip_plus_composition_sd15.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a02c9f4d2ade7c0e14db7471377ce5d326a2bfda7777231c79dc861c93f2c12 +size 98183728 diff --git a/loras/flux/20 (1).safetensors b/loras/flux/20 (1).safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e40adb4e09c14ec5bcf4b3ac4ad11d480d59ac2c --- /dev/null +++ b/loras/flux/20 (1).safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4670b3ce5e1fa557b6f96daf3a077666765f8728e34e31f7cc626ba92ec6999 +size 153265768 diff --git a/loras/flux/5yocrayon1_cap_d6a3e12.safetensors b/loras/flux/5yocrayon1_cap_d6a3e12.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da57426607be9b2152ba6dc89a03820fab6932dd --- /dev/null +++ b/loras/flux/5yocrayon1_cap_d6a3e12.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a1bd9e7614f7b97be3f2efd09c9f789b89ce63cf33c6825d3b3db7ae6ee503a +size 57554040 diff --git a/loras/flux/Bacun_style_Flux_Test_000003000.safetensors b/loras/flux/Bacun_style_Flux_Test_000003000.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7717eb28b96f510a8c9ce14ce1ed27f6c23dcd3b --- /dev/null +++ b/loras/flux/Bacun_style_Flux_Test_000003000.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2b5db3e0a47dc3fe95588100c9a470d0f9777178a6c8bcd6e81e3aa63463cd +size 171969428 diff --git a/loras/flux/Belly_Stuffed-000003.safetensors b/loras/flux/Belly_Stuffed-000003.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a18336d9b166d110e31db639b60385a1a7a90b07 --- /dev/null +++ b/loras/flux/Belly_Stuffed-000003.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b862b3edd880ab3671fd938cb934adfd80e75fd51391b94f721d764ec4c521ce +size 19257040 diff --git a/loras/flux/Dakimakura_Body_Pillow_Designer_FLUX-000021.safetensors b/loras/flux/Dakimakura_Body_Pillow_Designer_FLUX-000021.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8e4a7810bcf4a88fd967a8484433f3c0972a4fb --- /dev/null +++ b/loras/flux/Dakimakura_Body_Pillow_Designer_FLUX-000021.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbfe12b51348772421be6f6b4d622508bb127b3720e24f187f84207b94b9d4ab +size 57588504 diff --git a/loras/flux/DigitalMatte_FLUX.safetensors b/loras/flux/DigitalMatte_FLUX.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7611538bce1139ae264f75e79a7dc3425bd85c93 --- /dev/null +++ b/loras/flux/DigitalMatte_FLUX.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4551c87aa0bdc2400137fb70d60197dc455edce3ce00281514c18961507d2a29 +size 19599648 diff --git a/loras/flux/Dollification_-_inflatable_sex_doll_transformation_rubber_skin_sdol_-_flux.safetensors b/loras/flux/Dollification_-_inflatable_sex_doll_transformation_rubber_skin_sdol_-_flux.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d37f64da329dc71dd1e3dc628aa37e598ef5ca2b --- /dev/null +++ b/loras/flux/Dollification_-_inflatable_sex_doll_transformation_rubber_skin_sdol_-_flux.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc910b0e7caf58f9ea9d67a6213abb39b9e89c760747a0e2a48c04fc188d3c4f +size 19303584 diff --git a/loras/flux/Illumination_Style_Flux.safetensors b/loras/flux/Illumination_Style_Flux.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4e6369b8d3ffecbd764a7d2f2a27a82eebb5186 --- /dev/null +++ b/loras/flux/Illumination_Style_Flux.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b62f1e76dc7fce6081a840cd6537d3a5a2ce7179ade87d28a4adba276da5af5d +size 19269160 diff --git a/loras/flux/LOL_Emotes_HD_-_FLUX_dev.safetensors b/loras/flux/LOL_Emotes_HD_-_FLUX_dev.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9aa1d57714a820711bdfb5c1b797f45b6eb8df47 --- /dev/null +++ b/loras/flux/LOL_Emotes_HD_-_FLUX_dev.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca062af78823e1ae01bcbbb3c18188645b245368247ac119d38894bdfda144c +size 19314032 diff --git "a/loras/flux/MLKBC \345\276\256\350\247\202\344\270\226\347\225\214-\347\233\262\347\233\222\346\211\213\345\212\236_V1.safetensors" "b/loras/flux/MLKBC \345\276\256\350\247\202\344\270\226\347\225\214-\347\233\262\347\233\222\346\211\213\345\212\236_V1.safetensors" new file mode 100644 index 0000000000000000000000000000000000000000..639016f87b0e4662e5cc57dcd28a7a016e87c100 --- /dev/null +++ "b/loras/flux/MLKBC \345\276\256\350\247\202\344\270\226\347\225\214-\347\233\262\347\233\222\346\211\213\345\212\236_V1.safetensors" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef84fb37c5ba95183d77bea8d786c8b2c9a7adea6783b5590f0e1ce13f746de5 +size 306423864 diff --git "a/loras/flux/OB\344\271\220\351\253\230\344\272\272\347\211\251V1.0.safetensors" "b/loras/flux/OB\344\271\220\351\253\230\344\272\272\347\211\251V1.0.safetensors" new file mode 100644 index 0000000000000000000000000000000000000000..92054a70980e204560f69b97bd903be67dbc3214 --- /dev/null +++ "b/loras/flux/OB\344\271\220\351\253\230\344\272\272\347\211\251V1.0.safetensors" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81162bf6dfbb86a7a3dfa8186506d47e372b78cd59ee940571e7b22182071f43 +size 38441696 diff --git a/loras/flux/Simpsons_Style_-_FLUX.safetensors b/loras/flux/Simpsons_Style_-_FLUX.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c404254199e3176e5a79193d3503886076ecc38b --- /dev/null +++ b/loras/flux/Simpsons_Style_-_FLUX.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6673370ffca6b31d4e784be8bcd715b449653ee108fa0a9317fb3642f007eb8 +size 19258944 diff --git a/loras/flux/VectorCharm_Flux.safetensors b/loras/flux/VectorCharm_Flux.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de3bb9673e157feda008c769bc33dd49461d0ab8 --- /dev/null +++ b/loras/flux/VectorCharm_Flux.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f32c0d528884720a554c01e3ff873779be4c4f49ba446fb2216775a1b45ee4 +size 19584360 diff --git a/loras/flux/arcane-style-2.safetensors b/loras/flux/arcane-style-2.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c6e5a39d84600d58582b9f923410085c0413528 --- /dev/null +++ b/loras/flux/arcane-style-2.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bd48e61bd50b3f3295df044fe316f8b761020042c4629979aa0538752a8bfab +size 39770448 diff --git a/loras/flux/arcane-style-2.sha256 b/loras/flux/arcane-style-2.sha256 new file mode 100644 index 0000000000000000000000000000000000000000..5255b2e569f825ccdf549b29a4d572faae415857 --- /dev/null +++ b/loras/flux/arcane-style-2.sha256 @@ -0,0 +1 @@ +5bd48e61bd50b3f3295df044fe316f8b761020042c4629979aa0538752a8bfab \ No newline at end of file diff --git a/loras/flux/cartoon_saloon_flux_000007000.safetensors b/loras/flux/cartoon_saloon_flux_000007000.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..739db6eb577f09930fac2b0bd376db5a5edfbfa9 --- /dev/null +++ b/loras/flux/cartoon_saloon_flux_000007000.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb7dd6b07e7096fab8f9366e421ee63df6f83a66c4364950211d91d541ec025 +size 171969352 diff --git a/loras/flux/flux-lora-vintage-tarot.safetensors b/loras/flux/flux-lora-vintage-tarot.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0dbe63bb0dc1bf8b995fb861516097dcf54321f --- /dev/null +++ b/loras/flux/flux-lora-vintage-tarot.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e13351d7b19ef2591c4af6e3994deb89f32ee15246af99ec4ca18cb53c74f9f +size 171969394 diff --git a/loras/flux/hugeboobs.safetensors b/loras/flux/hugeboobs.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe50caf83a7c460de668f8e2f12b80c16ab29782 --- /dev/null +++ b/loras/flux/hugeboobs.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab39904df33d0ca22ec9be6335759db83e0d159c8fe2b869144e93508aaa3adb +size 19352488 diff --git a/loras/flux/j_dsstyle_flux.safetensors b/loras/flux/j_dsstyle_flux.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecfc93248c24f84c51d858fedef3ddca0625d925 --- /dev/null +++ b/loras/flux/j_dsstyle_flux.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15a0d31047672e3442609468dafc3ebe133ab8eebfb3d6842ee1be2f1cc4dd7 +size 153307528 diff --git a/loras/flux/jet_set_radio_flux_000002800.safetensors b/loras/flux/jet_set_radio_flux_000002800.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20e71c7d89d787b2e0254295a9ab5079b487c664 --- /dev/null +++ b/loras/flux/jet_set_radio_flux_000002800.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b618d1217e6ea0a589c5ac43bd2470f205c9dc954ef6d69471ddb4a7e7e4804d +size 86049797 diff --git a/loras/flux/lowerdecks_style_v1.0.safetensors b/loras/flux/lowerdecks_style_v1.0.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3058fbe747da85d0a299229f65e359dd0ad1d757 --- /dev/null +++ b/loras/flux/lowerdecks_style_v1.0.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:154feddcdb0f002d64ed208d313bac6984b12b612ca887aa35a2d1092cd1b15c +size 171969427 diff --git a/loras/flux/torikun_flux_lora_000003000.safetensors b/loras/flux/torikun_flux_lora_000003000.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b156205b71cd1c3c9cdc0fdf7a6c4379940f8c89 --- /dev/null +++ b/loras/flux/torikun_flux_lora_000003000.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:648977afffdd30f719931fdb30813d9578fe6a6f74f77b0cd57c4fd554d983f1 +size 343805456 diff --git a/loras/flux/wallace-and-gromit_style_v1.safetensors b/loras/flux/wallace-and-gromit_style_v1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4310f5118f62bd0b3c9f05e169728a897af02d35 --- /dev/null +++ b/loras/flux/wallace-and-gromit_style_v1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14511d3b6981ecc6263a41b1d257e22480fc7d8e296ffe31cbf39b3b2de20ae1 +size 19289272 diff --git a/loras/flux/yokawa2-resize.safetensors b/loras/flux/yokawa2-resize.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c96d15c05526c33f47e46855107a08b5ed09d71a --- /dev/null +++ b/loras/flux/yokawa2-resize.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d603a3f8a87e2c40605fe769ae27e2f26debae01cadfd7765ae9382397373375 +size 20132480 diff --git a/loras/illu/3Danime_style-ILXL.safetensors b/loras/illu/3Danime_style-ILXL.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00c5dcb53e3c6fee2e4ecba76bdaccc591cefbe9 --- /dev/null +++ b/loras/illu/3Danime_style-ILXL.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf89269dcb82ba0a407628d0d8b3eb26205e8196b6dfaa155bdd510083b9df0 +size 228463124 diff --git a/loras/illu/748cmSDXL.safetensors b/loras/illu/748cmSDXL.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fca57b79e5b7e9aacbee14c8e8c6e804dd37afe0 --- /dev/null +++ b/loras/illu/748cmSDXL.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85715eb695d5e42cc613ada4b12bb87cbeb1b6ef4f11142bb30774f8f26f261a +size 255025442 diff --git a/loras/illu/ATRex_style-12.safetensors b/loras/illu/ATRex_style-12.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84a0368e352beaec38a094daf1203ed7d8cc7a78 --- /dev/null +++ b/loras/illu/ATRex_style-12.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a991f67fd19e8c055bdaf24f399db6c0d0975bc8dd83d86627f601ef0bc6b63f +size 114443748 diff --git a/loras/illu/Detective_Conan_Illustrious_SD8.safetensors b/loras/illu/Detective_Conan_Illustrious_SD8.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fede7443ab2277535751e5715ec992e7793342d9 --- /dev/null +++ b/loras/illu/Detective_Conan_Illustrious_SD8.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d31b52414bc1c813f07e9c41cbe711da50b5b2e49732d6ef9a7532193b18293 +size 170609604 diff --git a/loras/illu/DragonBall.safetensors b/loras/illu/DragonBall.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61d080b93d18279d2f730dc992f992d999a58145 --- /dev/null +++ b/loras/illu/DragonBall.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe90d6014785956a059fcfdd254b67bf6ecbfcb4da2afd41226e2d0497d2def5 +size 202705564 diff --git a/loras/illu/Furry_femboy_style.safetensors b/loras/illu/Furry_femboy_style.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..032e804ca7c5c930bc92e5af633734fee820825d --- /dev/null +++ b/loras/illu/Furry_femboy_style.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d255db3772668da55bc7e4ae7e2f151480abba26c45d67d7100e1e1893ac703 +size 228457972 diff --git a/loras/illu/Grimphantom_-_Illustrious.safetensors b/loras/illu/Grimphantom_-_Illustrious.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c524227683e861f6e2dc051466c79612a812d9ce --- /dev/null +++ b/loras/illu/Grimphantom_-_Illustrious.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a3d95735e27ac73e0b9aaad3aeb81735ba3c901ace1d11d2207fe7cb17a1e3f +size 228469252 diff --git a/loras/illu/HerrscherAGGA2025_Chibi-IL_V1.safetensors b/loras/illu/HerrscherAGGA2025_Chibi-IL_V1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da91f1fe9a4186730177c997ab6826611a3c2821 --- /dev/null +++ b/loras/illu/HerrscherAGGA2025_Chibi-IL_V1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:093a1cd059a79f3ba275991eb747aa20d43899575b753ffea2f00f25c9f61a32 +size 256056360 diff --git a/loras/illu/HerrscherAGGA2025_Chibi-IL_V1.sha256 b/loras/illu/HerrscherAGGA2025_Chibi-IL_V1.sha256 new file mode 100644 index 0000000000000000000000000000000000000000..96cabfc1ebceaaf82fcf96f2f690cf32a7222b70 --- /dev/null +++ b/loras/illu/HerrscherAGGA2025_Chibi-IL_V1.sha256 @@ -0,0 +1 @@ +093a1cd059a79f3ba275991eb747aa20d43899575b753ffea2f00f25c9f61a32 \ No newline at end of file diff --git a/loras/illu/Illustrious_Fujimoto_Manga_Style.safetensors b/loras/illu/Illustrious_Fujimoto_Manga_Style.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a686869993bdc6c6189b8a196b0be87b29e9bbb0 --- /dev/null +++ b/loras/illu/Illustrious_Fujimoto_Manga_Style.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:867292e5857652cf5aefce90889ca0ac7266d248bcdc00dd71a6379e6955cd9e +size 57446812 diff --git a/loras/illu/Illustrious_Fujimoto_Manga_Style.sha256 b/loras/illu/Illustrious_Fujimoto_Manga_Style.sha256 new file mode 100644 index 0000000000000000000000000000000000000000..bc02b4519d0cd9be3f7c03b99f1c819650118f00 --- /dev/null +++ b/loras/illu/Illustrious_Fujimoto_Manga_Style.sha256 @@ -0,0 +1 @@ +867292e5857652cf5aefce90889ca0ac7266d248bcdc00dd71a6379e6955cd9e \ No newline at end of file diff --git a/loras/illu/Old Fashioned Celluloid_illustriousXL.safetensors b/loras/illu/Old Fashioned Celluloid_illustriousXL.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d52a7d765eba10abe474fafdeef4ceb144d30ab --- /dev/null +++ b/loras/illu/Old Fashioned Celluloid_illustriousXL.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0afd75faff7e507930a69d6580f53175bfa0bd05174c22deefb8cd20e1a9269 +size 57426148 diff --git a/loras/illu/Pokemon_Sun__Moon IL.safetensors b/loras/illu/Pokemon_Sun__Moon IL.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f16243e025f5f8d180c810cefbf571405f2c827b --- /dev/null +++ b/loras/illu/Pokemon_Sun__Moon IL.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a48c9000c842058b4f3ce7259c587b8a2c870c287a14c315d93a033213d79a65 +size 228469844 diff --git a/loras/illu/RealisticSkin_PornMaster-Pro_v1-000020.safetensors b/loras/illu/RealisticSkin_PornMaster-Pro_v1-000020.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5858a13b918e25bdfc313494503878f3f75cbce --- /dev/null +++ b/loras/illu/RealisticSkin_PornMaster-Pro_v1-000020.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:729fee8c79c55f19be9a8edb009cbe1d4ecef5bdffc385369d6f174509cf546a +size 85424716 diff --git a/loras/illu/RetroToonXL_Style-10-IL.safetensors b/loras/illu/RetroToonXL_Style-10-IL.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b3d7a562929cc9a5790ce8a471cb10dddea3c0f --- /dev/null +++ b/loras/illu/RetroToonXL_Style-10-IL.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:324965b61f03b1915afa5c616fdfdc486ae6a72e591868b5168be99c34964c60 +size 57432532 diff --git a/loras/illu/WindWaker_Style_IXL.safetensors b/loras/illu/WindWaker_Style_IXL.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..167938235a0210e96572ef8b455869627e42f9fa --- /dev/null +++ b/loras/illu/WindWaker_Style_IXL.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711 +size 228462660 diff --git a/loras/illu/WindWaker_Style_IXL.safetensors.rgthree-info.json b/loras/illu/WindWaker_Style_IXL.safetensors.rgthree-info.json new file mode 100644 index 0000000000000000000000000000000000000000..26f77ac6983f98cce9e773a184547f0590a3b4ac --- /dev/null +++ b/loras/illu/WindWaker_Style_IXL.safetensors.rgthree-info.json @@ -0,0 +1,2763 @@ +{ + "file": "illu/WindWaker_Style_IXL.safetensors", + "path": "/workspace/ComfyUI/models/loras/illu/WindWaker_Style_IXL.safetensors", + "images": [ + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/b9a71a85-c130-466c-9512-3c9e4b94daf2/width=832/51277231.jpeg", + "civitaiUrl": "https://civitai.com/images/51277231", + "width": 832, + "height": 1216, + "type": "image", + "nsfwLevel": 1, + "seed": 98679726, + "positive": "masterpiece, best quality, solo, 1girl, solo, blue eyes, blonde hair, hat, weapon, pointy ears, sword, chibi, 3d, instrument, shield , smile, looking at viewer,", + "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "steps": 30, + "sampler": "Euler a", + "cfg": 7, + "model": null, + "resources": [] + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/7d335619-6459-4f74-b014-90a41f971889/width=832/51163882.jpeg", + "civitaiUrl": "https://civitai.com/images/51163882", + "width": 832, + "height": 1216, + "type": "image", + "nsfwLevel": 1, + "seed": 252630410834070, + "positive": "masterpiece, best quality, solo, 1girl, solo, pink hair, short twintails, white dress, cowboy shot, 3d, , ,", + "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "steps": 30, + "sampler": "DPM++ 2M Karras", + "cfg": 7, + "model": null, + "resources": [ + { + "name": "WindWaker_Style_IXL", + "type": "lora", + "weight": 1 + } + ] + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dec826b5-fd01-406b-b881-b9fe5a6abbde/width=832/51163886.jpeg", + "civitaiUrl": "https://civitai.com/images/51163886", + "width": 832, + "height": 1216, + "type": "image", + "nsfwLevel": 1, + "seed": 272033102095518, + "positive": "masterpiece, best quality, solo, 1girl, solo, long hair, smile, blue eyes, gloves, jewelry, one eye closed, green hair, pointy ears, necklace, makeup, tiara, ;) , ,", + "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "steps": 30, + "sampler": "DPM++ 2M Karras", + "cfg": 7, + "model": null, + "resources": [ + { + "name": "WindWaker_Style_IXL", + "type": "lora", + "weight": 1 + } + ] + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/9ada0404-1402-482d-96aa-183da60661aa/width=832/51163887.jpeg", + "civitaiUrl": "https://civitai.com/images/51163887", + "width": 832, + "height": 1216, + "type": "image", + "nsfwLevel": 1, + "seed": 991930792475181, + "positive": "masterpiece, best quality, solo, 1girl, solo, blue eyes, blonde hair, 1boy, hat, weapon, male focus, pointy ears, sword, dark skin, chibi, instrument, shield , ,", + "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "steps": 30, + "sampler": "DPM++ 2M Karras", + "cfg": 7, + "model": null, + "resources": [ + { + "name": "WindWaker_Style_IXL", + "type": "lora", + "weight": 1 + } + ] + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/50c31f3d-3c04-4bb7-8609-34956da40d61/width=832/51170537.jpeg", + "civitaiUrl": "https://civitai.com/images/51170537", + "width": 832, + "height": 1216, + "type": "image", + "nsfwLevel": 1, + "seed": 31337, + "positive": "masterpiece, best quality, solo, 1girl, solo, chibi, 3d, \nzzMajora, spikes, yellow sclera, solo, 1boy, horns, glowing eyes,\nlooking at viewer, ,", + "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "steps": 30, + "sampler": "Euler a", + "cfg": 7, + "model": null, + "resources": [] + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/833e0c0d-8b3a-4393-8c44-b80cea0e6975/width=832/51169122.jpeg", + "civitaiUrl": "https://civitai.com/images/51169122", + "width": 832, + "height": 1216, + "type": "image", + "nsfwLevel": 1, + "seed": 1729355716, + "positive": "masterpiece, best quality, solo, 1girl, solo, blue eyes, blonde hair, hat, weapon, pointy ears, sword, chibi, shield , dynamic pose, sword slash, motion lines,", + "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "steps": 30, + "sampler": "DPM++ 2M Karras", + "cfg": 7, + "model": null, + "resources": [] + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/06154f27-96c8-487d-96f8-c627866842ad/width=832/51169120.jpeg", + "civitaiUrl": "https://civitai.com/images/51169120", + "width": 832, + "height": 1216, + "type": "image", + "nsfwLevel": 1, + "seed": 952291488, + "positive": "masterpiece, best quality, solo, 1girl, solo, 3d, chibi,\n Princess Peach, pink dress, blonde hair, blue eyes, long hair, crown, gem, gloves, puffy sleeves, short sleeves, white gloves, solo, smiling, looking at viewer, cowboy shot,", + "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "steps": 30, + "sampler": "DPM++ 2M Karras", + "cfg": 7, + "model": null, + "resources": [] + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/0fe7b1a9-914f-47aa-906b-83e572f767a3/width=832/51163876.jpeg", + "civitaiUrl": "https://civitai.com/images/51163876", + "width": 832, + "height": 1216, + "type": "image", + "nsfwLevel": 1, + "seed": 31337, + "positive": "masterpiece, best quality, solo, 1girl, solo, long hair, looking at viewer, smile, open mouth, blue eyes, dress, jewelry, red hair, pointy ears, white dress, bracelet, neckerchief, empty eyes, yellow neckerchief, triforce, , ,", + "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "steps": 30, + "sampler": "DPM++ 2M Karras", + "cfg": 7, + "model": null, + "resources": [ + { + "name": "WindWaker_Style_IXL", + "type": "lora", + "weight": 1 + } + ] + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/3147ea21-bc7a-4a84-9d77-93f62c34e274/width=832/51169254.jpeg", + "civitaiUrl": "https://civitai.com/images/51169254", + "width": 832, + "height": 1216, + "type": "image", + "nsfwLevel": 1, + "seed": 438227750, + "positive": "masterpiece, best quality, solo, 1girl, solo, chibi, 3d, \nzzHilda, red eyes, purple hair, long hair, pointy ears, tiara, white gloves, dress, elbow gloves, jewelry, makeup, earrings, purple tabard, triforce, shoulder armor, tiara, \nlooking at viewer, smile,", + "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "steps": 30, + "sampler": "Euler a", + "cfg": 5, + "model": null, + "resources": [] + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/bb4cbd77-e6dc-4525-96b9-23114ce381bc/width=832/51163885.jpeg", + "civitaiUrl": "https://civitai.com/images/51163885", + "width": 832, + "height": 1248, + "type": "image", + "nsfwLevel": 1, + "seed": 1332683, + "positive": "masterpiece, best quality, BREAK, 1girl, solo, blonde hair, wavy hair, angel, angel wings, halo, smile, sitting, forest, white dress,smile, looking at viewer, ,", + "negative": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "steps": 30, + "sampler": "DPM++ 2M Karras", + "cfg": 7, + "model": null, + "resources": [ + { + "name": "WindWaker_Style_IXL", + "type": "lora", + "weight": 1 + } + ] + } + ], + "raw": { + "metadata": { + "ss_output_name": "WindWaker_Style_IXL", + "ss_base_model_version": "sdxl_base_v1-0", + "ss_clip_skip": "1", + "ss_learning_rate": "0.0001", + "ss_network_dim": "32", + "ss_network_dropout": "None", + "ss_multires_noise_iterations": "None", + "ss_loss_type": "l2", + "modelspec.prediction_type": "epsilon", + "ss_datasets": "[{\"is_dreambooth\": true, \"batch_size_per_device\": 1, \"num_train_images\": 462, \"num_reg_images\": 0, \"resolution\": [768, 768], \"enable_bucket\": true, \"min_bucket_reso\": 256, \"max_bucket_reso\": 4096, \"tag_frequency\": {\"WindWaker_Style_IXL\": {\"multiple girls\": 4, \"blonde hair\": 68, \"brown hair\": 14, \"1boy\": 80, \"dress\": 21, \"3girls\": 2, \"bird\": 7, \"penguin\": 1, \"looking at viewer\": 24, \"smile\": 52, \"blue eyes\": 49, \"twintails\": 9, \"pointy ears\": 98, \"black eyes\": 12, \"parody\": 5, \"standing\": 44, \"pokemon (creature)\": 4, \"fire\": 2, \"molten rock\": 2, \"solo\": 109, \"no humans\": 21, \"crown\": 2, \"pillar\": 3, \"throne\": 1, \"stained glass\": 1, \"hat\": 49, \"closed eyes\": 7, \"upper body\": 11, \"male focus\": 65, \"instrument\": 5, \"green headwear\": 4, \"playing instrument\": 2, \"violin\": 1, \"1girl\": 64, \"full body\": 17, \"black skin\": 2, \"cape\": 8, \"from side\": 2, \"night\": 8, \"holding\": 29, \"mask\": 1, \"blue background\": 1, \"music\": 1, \"holding instrument\": 1, \"outdoors\": 38, \"sky\": 39, \"cloud\": 36, \"water\": 15, \"glowing\": 8, \"ocean\": 20, \"night sky\": 3, \"scenery\": 12, \"horizon\": 2, \"multiple boys\": 9, \"2boys\": 7, \"from behind\": 8, \"star (sky)\": 3, \"starry sky\": 2, \"barrel\": 1, \"day\": 33, \"blue sky\": 17, \"red eyes\": 28, \"weapon\": 58, \"horns\": 3, \"belt\": 18, \"sword\": 46, \"indoors\": 10, \"holding weapon\": 19, \"armor\": 3, \"holding sword\": 16, \"helmet\": 1, \"shoulder armor\": 1, \"glowing eyes\": 3, \"walking\": 2, \"pauldrons\": 1, \"shield\": 38, \"full armor\": 1, \"tree\": 20, \"windmill\": 1, \"watercraft\": 8, \"ship\": 2, \"boat\": 4, \"open mouth\": 19, \"shirt\": 13, \"teeth\": 10, \"sleeveless\": 8, \"arm up\": 1, \"blush stickers\": 2, \"grass\": 14, \"short hair\": 9, \"one eye closed\": 4, \"green hair\": 4, \"neckerchief\": 10, \"crossed arms\": 1, \"red neckerchief\": 1, \"tunic\": 13, \"holding shield\": 3, \"vest\": 2, \"master sword\": 6, \"desert\": 1, \"fantasy\": 1, \"building\": 6, \"window\": 1, \"shadow\": 8, \"sunlight\": 1, \"plant\": 5, \"stairs\": 3, \"shade\": 1, \"beach\": 2, \"landscape\": 1, \"sign\": 3, \"flag\": 1, \"ruins\": 1, \"dark skin\": 30, \"door\": 1, \"carpet\": 1, \":3\": 2, \"own hands together\": 1, \"beak\": 2, \"chain\": 2, \"triforce\": 12, \"left-handed\": 8, \"child\": 1, \"male child\": 1, \"battle\": 1, \"long hair\": 27, \"gloves\": 15, \"lying\": 4, \"parted lips\": 1, \"floating hair\": 1, \"on side\": 3, \"sleeping\": 4, \"tiara\": 3, \"pink dress\": 1, \"closed mouth\": 12, \"on back\": 1, \"white hair\": 1, \"facial hair\": 1, \"beard\": 1, \"serious\": 1, \"yellow eyes\": 7, \"red hair\": 30, \"colored skin\": 4, \"fangs\": 2, \"thick eyebrows\": 1, \"green skin\": 1, \"tusks\": 1, \"elbow gloves\": 1, \"jewelry\": 13, \"necklace\": 1, \"makeup\": 1, \";)\": 1, \"long sleeves\": 6, \"green eyes\": 5, \"braid\": 4, \"gun\": 1, \"aiming\": 2, \"flower\": 2, \"bracelet\": 6, \"dark-skinned female\": 9, \"blue dress\": 1, \"sandals\": 5, \"weapon on back\": 2, \"earrings\": 5, \"dark-skinned male\": 4, \"fairy\": 2, \"freckles\": 1, \"purple eyes\": 1, \"boots\": 11, \"chibi\": 14, \"cosplay\": 4, \"headband\": 1, \"furry\": 2, \"1other\": 4, \"gameplay mechanics\": 5, \"yellow fur\": 1, \"stadium\": 1, \"lucario\": 1, \"animal ears\": 4, \"furry male\": 1, \"short sleeves\": 7, \"scarf\": 4, \"expressionless\": 1, \"looking up\": 1, \"red shirt\": 3, \"meme\": 1, \"sailor moon redraw challenge (meme)\": 1, \"bandana\": 1, \"palm tree\": 3, \"fake screenshot\": 2, \"turban\": 2, \"health bar\": 2, \"knife\": 1, \"dagger\": 1, \"slit pupils\": 3, \"androgynous\": 2, \"colored sclera\": 2, \"blue scarf\": 1, \"animal hat\": 3, \"yellow sclera\": 2, \"cat hat\": 3, \"cat\": 1, \"cliff\": 1, \"cat ears\": 1, \"witch hat\": 1, \":<\": 1, \":d\": 5, \"white dress\": 2, \"empty eyes\": 3, \"yellow neckerchief\": 3, \"tabard\": 1, \"skirt\": 3, \"shovel\": 1, \"profile\": 3, \"cow\": 2, \"simple background\": 2, \"grey background\": 1, \"black background\": 1, \"brown footwear\": 3, \"staff\": 1, \"sunglasses\": 2, \"hawaiian shirt\": 1, \"polearm\": 2, \"spear\": 1, \"ponytail\": 1, \"pants\": 5, \"pirate\": 1, \"ahoge\": 4, \"blue shirt\": 4, \"t-shirt\": 6, \"waving\": 2, \"pikachu\": 1, \"sheath\": 3, \"facing away\": 1, \"flying\": 1, \"chicken\": 1, \"thighhighs\": 1, \"bow\": 1, \":t\": 1, \"food\": 1, \"fruit\": 1, \"bow (weapon)\": 1, \"arrow (projectile)\": 1, \"holding arrow\": 1, \"wand\": 1, \"hammer\": 1, \"rabbit ears\": 2, \"hair over one eye\": 4, \"grin\": 6, \"sharp teeth\": 5, \"red footwear\": 8, \"forehead jewel\": 1, \"blue hair\": 1, \"bodysuit\": 1, \"outstretched arm\": 1, \"purple hair\": 2, \"fusion\": 1, \"among us\": 1, \"jacket\": 1, \"spiked hair\": 1, \"pink flower\": 1, \"red gloves\": 2, \"white footwear\": 1, \"skull\": 13, \"evil smile\": 3, \"dark persona\": 1, \"grey footwear\": 1, \"grey skin\": 1, \"blood\": 1, \"arrow (symbol)\": 2, \"pantyhose\": 4, \"english text\": 1, \"torch\": 1, \"cave\": 1, \"dual wielding\": 1, \"blue skin\": 2, \"energy sword\": 1, \"fang\": 3, \"black dress\": 3, \"wristband\": 3, \"tentacle hair\": 3, \"octarian\": 3, \"octoling\": 3, \"octoling girl\": 3, \"twin braids\": 3, \"forehead\": 2, \"bare shoulders\": 2, \"black footwear\": 2, \"sleeveless dress\": 2, \"bangs\": 3, \"female child\": 2, \"surcoat\": 1, \"animal\": 1, \"monster\": 1, \"creature\": 1, \"black gloves\": 1, \"green background\": 1, \"yordle\": 1, \"rain\": 1, \"dark\": 1, \"heads-up display\": 1, \"diluc (genshin impact)\": 1, \"2girls\": 2, \"hair bun\": 1, \"double bun\": 1, \"slime (creature)\": 1, \"grey hair\": 1, \"broom\": 2, \"broom riding\": 1, \"black hair\": 5, \"hairband\": 5, \"red dress\": 5, \"big hair\": 4, \"holding broom\": 1, \"orange hairband\": 4, \"shoes\": 1, \"trident\": 1, \"orange neckerchief\": 1, \"book\": 1, \"fence\": 1, \"wall\": 1, \"stone wall\": 1, \"camera\": 1, \"holding camera\": 1, \"video camera\": 1}}, \"bucket_info\": {\"buckets\": {\"0\": {\"resolution\": [512, 1088], \"count\": 9}, \"1\": {\"resolution\": [576, 960], \"count\": 54}, \"2\": {\"resolution\": [576, 1024], \"count\": 18}, \"3\": {\"resolution\": [640, 896], \"count\": 60}, \"4\": {\"resolution\": [704, 832], \"count\": 30}, \"5\": {\"resolution\": [768, 768], \"count\": 78}, \"6\": {\"resolution\": [832, 704], \"count\": 48}, \"7\": {\"resolution\": [896, 640], \"count\": 54}, \"8\": {\"resolution\": [960, 576], \"count\": 24}, \"9\": {\"resolution\": [1024, 576], \"count\": 78}, \"10\": {\"resolution\": [1088, 512], \"count\": 9}}, \"mean_img_ar_error\": 0.031399245684279216}, \"subsets\": [{\"img_count\": 154, \"num_repeats\": 3, \"color_aug\": false, \"flip_aug\": false, \"random_crop\": false, \"shuffle_caption\": true, \"keep_tokens\": 1, \"keep_tokens_separator\": \"\", \"secondary_separator\": null, \"enable_wildcard\": false, \"caption_prefix\": null, \"caption_suffix\": null, \"image_dir\": \"WindWaker_Style_IXL\", \"class_tokens\": null, \"is_reg\": false}]}]", + "modelspec.date": "2025-01-12T04:43:45", + "ss_seed": "42", + "ss_network_module": "networks.lora", + "modelspec.sai_model_spec": "1.0.0", + "ss_mixed_precision": "fp16", + "sshs_model_hash": "cb3949362934ce652a2d242c2c4ebf26f720f1ec05b8b4a030b268548065080c", + "modelspec.title": "WindWaker_Style_IXL", + "ss_lowram": "False", + "ss_training_comment": "Lora created by https://civitai.com/user/CitronLegacy", + "ss_cache_latents": "True", + "ss_debiased_estimation": "False", + "ss_steps": "4620", + "ss_full_fp16": "False", + "ss_multires_noise_discount": "0.3", + "ss_min_snr_gamma": "None", + "modelspec.architecture": "stable-diffusion-xl-v1-base/lora", + "ss_caption_dropout_rate": "0.0", + "ss_optimizer": "bitsandbytes.optim.adamw.AdamW8bit(weight_decay=0.1,betas=[0.9, 0.99])", + "ss_training_started_at": "1736647818.3101928", + "ss_session_id": "148266488", + "ss_network_alpha": "32", + "ss_tag_frequency": { + "WindWaker_Style_IXL": { + "multiple girls": 4, + "blonde hair": 68, + "brown hair": 14, + "1boy": 80, + "dress": 21, + "3girls": 2, + "bird": 7, + "penguin": 1, + "looking at viewer": 24, + "smile": 52, + "blue eyes": 49, + "twintails": 9, + "pointy ears": 98, + "black eyes": 12, + "parody": 5, + "standing": 44, + "pokemon (creature)": 4, + "fire": 2, + "molten rock": 2, + "solo": 109, + "no humans": 21, + "crown": 2, + "pillar": 3, + "throne": 1, + "stained glass": 1, + "hat": 49, + "closed eyes": 7, + "upper body": 11, + "male focus": 65, + "instrument": 5, + "green headwear": 4, + "playing instrument": 2, + "violin": 1, + "1girl": 64, + "full body": 17, + "black skin": 2, + "cape": 8, + "from side": 2, + "night": 8, + "holding": 29, + "mask": 1, + "blue background": 1, + "music": 1, + "holding instrument": 1, + "outdoors": 38, + "sky": 39, + "cloud": 36, + "water": 15, + "glowing": 8, + "ocean": 20, + "night sky": 3, + "scenery": 12, + "horizon": 2, + "multiple boys": 9, + "2boys": 7, + "from behind": 8, + "star (sky)": 3, + "starry sky": 2, + "barrel": 1, + "day": 33, + "blue sky": 17, + "red eyes": 28, + "weapon": 58, + "horns": 3, + "belt": 18, + "sword": 46, + "indoors": 10, + "holding weapon": 19, + "armor": 3, + "holding sword": 16, + "helmet": 1, + "shoulder armor": 1, + "glowing eyes": 3, + "walking": 2, + "pauldrons": 1, + "shield": 38, + "full armor": 1, + "tree": 20, + "windmill": 1, + "watercraft": 8, + "ship": 2, + "boat": 4, + "open mouth": 19, + "shirt": 13, + "teeth": 10, + "sleeveless": 8, + "arm up": 1, + "blush stickers": 2, + "grass": 14, + "short hair": 9, + "one eye closed": 4, + "green hair": 4, + "neckerchief": 10, + "crossed arms": 1, + "red neckerchief": 1, + "tunic": 13, + "holding shield": 3, + "vest": 2, + "master sword": 6, + "desert": 1, + "fantasy": 1, + "building": 6, + "window": 1, + "shadow": 8, + "sunlight": 1, + "plant": 5, + "stairs": 3, + "shade": 1, + "beach": 2, + "landscape": 1, + "sign": 3, + "flag": 1, + "ruins": 1, + "dark skin": 30, + "door": 1, + "carpet": 1, + ":3": 2, + "own hands together": 1, + "beak": 2, + "chain": 2, + "triforce": 12, + "left-handed": 8, + "child": 1, + "male child": 1, + "battle": 1, + "long hair": 27, + "gloves": 15, + "lying": 4, + "parted lips": 1, + "floating hair": 1, + "on side": 3, + "sleeping": 4, + "tiara": 3, + "pink dress": 1, + "closed mouth": 12, + "on back": 1, + "white hair": 1, + "facial hair": 1, + "beard": 1, + "serious": 1, + "yellow eyes": 7, + "red hair": 30, + "colored skin": 4, + "fangs": 2, + "thick eyebrows": 1, + "green skin": 1, + "tusks": 1, + "elbow gloves": 1, + "jewelry": 13, + "necklace": 1, + "makeup": 1, + ";)": 1, + "long sleeves": 6, + "green eyes": 5, + "braid": 4, + "gun": 1, + "aiming": 2, + "flower": 2, + "bracelet": 6, + "dark-skinned female": 9, + "blue dress": 1, + "sandals": 5, + "weapon on back": 2, + "earrings": 5, + "dark-skinned male": 4, + "fairy": 2, + "freckles": 1, + "purple eyes": 1, + "boots": 11, + "chibi": 14, + "cosplay": 4, + "headband": 1, + "furry": 2, + "1other": 4, + "gameplay mechanics": 5, + "yellow fur": 1, + "stadium": 1, + "lucario": 1, + "animal ears": 4, + "furry male": 1, + "short sleeves": 7, + "scarf": 4, + "expressionless": 1, + "looking up": 1, + "red shirt": 3, + "meme": 1, + "sailor moon redraw challenge (meme)": 1, + "bandana": 1, + "palm tree": 3, + "fake screenshot": 2, + "turban": 2, + "health bar": 2, + "knife": 1, + "dagger": 1, + "slit pupils": 3, + "androgynous": 2, + "colored sclera": 2, + "blue scarf": 1, + "animal hat": 3, + "yellow sclera": 2, + "cat hat": 3, + "cat": 1, + "cliff": 1, + "cat ears": 1, + "witch hat": 1, + ":<": 1, + ":d": 5, + "white dress": 2, + "empty eyes": 3, + "yellow neckerchief": 3, + "tabard": 1, + "skirt": 3, + "shovel": 1, + "profile": 3, + "cow": 2, + "simple background": 2, + "grey background": 1, + "black background": 1, + "brown footwear": 3, + "staff": 1, + "sunglasses": 2, + "hawaiian shirt": 1, + "polearm": 2, + "spear": 1, + "ponytail": 1, + "pants": 5, + "pirate": 1, + "ahoge": 4, + "blue shirt": 4, + "t-shirt": 6, + "waving": 2, + "pikachu": 1, + "sheath": 3, + "facing away": 1, + "flying": 1, + "chicken": 1, + "thighhighs": 1, + "bow": 1, + ":t": 1, + "food": 1, + "fruit": 1, + "bow (weapon)": 1, + "arrow (projectile)": 1, + "holding arrow": 1, + "wand": 1, + "hammer": 1, + "rabbit ears": 2, + "hair over one eye": 4, + "grin": 6, + "sharp teeth": 5, + "red footwear": 8, + "forehead jewel": 1, + "blue hair": 1, + "bodysuit": 1, + "outstretched arm": 1, + "purple hair": 2, + "fusion": 1, + "among us": 1, + "jacket": 1, + "spiked hair": 1, + "pink flower": 1, + "red gloves": 2, + "white footwear": 1, + "skull": 13, + "evil smile": 3, + "dark persona": 1, + "grey footwear": 1, + "grey skin": 1, + "blood": 1, + "arrow (symbol)": 2, + "pantyhose": 4, + "english text": 1, + "torch": 1, + "cave": 1, + "dual wielding": 1, + "blue skin": 2, + "energy sword": 1, + "fang": 3, + "black dress": 3, + "wristband": 3, + "tentacle hair": 3, + "octarian": 3, + "octoling": 3, + "octoling girl": 3, + "twin braids": 3, + "forehead": 2, + "bare shoulders": 2, + "black footwear": 2, + "sleeveless dress": 2, + "bangs": 3, + "female child": 2, + "surcoat": 1, + "animal": 1, + "monster": 1, + "creature": 1, + "black gloves": 1, + "green background": 1, + "yordle": 1, + "rain": 1, + "dark": 1, + "heads-up display": 1, + "diluc (genshin impact)": 1, + "2girls": 2, + "hair bun": 1, + "double bun": 1, + "slime (creature)": 1, + "grey hair": 1, + "broom": 2, + "broom riding": 1, + "black hair": 5, + "hairband": 5, + "red dress": 5, + "big hair": 4, + "holding broom": 1, + "orange hairband": 4, + "shoes": 1, + "trident": 1, + "orange neckerchief": 1, + "book": 1, + "fence": 1, + "wall": 1, + "stone wall": 1, + "camera": 1, + "holding camera": 1, + "video camera": 1 + } + }, + "modelspec.encoder_layer": "1", + "sshs_legacy_hash": "bf29079a", + "ss_epoch": "10", + "ss_sd_model_name": "OnomaAIResearch/Illustrious-xl-early-release-v0", + "ss_num_train_images": "462", + "ss_num_epochs": "10", + "ss_caption_tag_dropout_rate": "0.0", + "modelspec.implementation": "https://github.com/Stability-AI/generative-models", + "ss_max_train_steps": "4620", + "ss_adaptive_noise_scale": "None", + "ss_huber_schedule": "snr", + "ss_lr_scheduler": "cosine_with_restarts", + "modelspec.resolution": "1024x1024", + "ss_zero_terminal_snr": "False", + "ss_ip_noise_gamma": "None", + "ss_caption_dropout_every_n_epochs": "0", + "ss_sd_scripts_commit_hash": "(unknown)", + "ss_text_encoder_lr": "0.0001", + "ss_gradient_checkpointing": "False", + "ss_lr_warmup_steps": "78", + "ss_max_token_length": "75", + "ss_dataset_dirs": { + "WindWaker_Style_IXL": { + "n_repeats": 3, + "img_count": 154 + } + }, + "ss_max_grad_norm": "1", + "ss_face_crop_aug_range": "None", + "ss_gradient_accumulation_steps": "1", + "ss_num_reg_images": "0", + "ss_v2": "False", + "ss_num_batches_per_epoch": "462", + "ss_noise_offset": "0.03", + "ss_prior_loss_weight": "1", + "ss_scale_weight_norms": "None", + "ss_noise_offset_random_strength": "False", + "ss_ip_noise_gamma_random_strength": "False", + "ss_huber_c": "0.1", + "ss_training_finished_at": "1736657025.676514", + "ss_unet_lr": "0.0001", + "_sha256": "6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711" + }, + "civitai": { + "id": 1273708, + "modelId": 208265, + "name": "Illustrious", + "createdAt": "2025-01-12T14:32:52.185Z", + "updatedAt": "2025-03-22T22:32:54.409Z", + "status": "Published", + "publishedAt": "2025-01-12T15:25:14.123Z", + "trainedWords": [ + "3D, ", + "Chibi, " + ], + "trainingStatus": null, + "trainingDetails": null, + "baseModel": "Illustrious", + "baseModelType": null, + "earlyAccessEndsAt": null, + "earlyAccessConfig": null, + "description": "

3d,

<lora:WindWaker_Style_IXL:1.0>,

", + "uploadType": "Created", + "usageControl": "Download", + "air": "urn:air:sdxl:lora:civitai:208265@1273708", + "stats": { + "downloadCount": 503, + "ratingCount": 0, + "rating": 0, + "thumbsUpCount": 104 + }, + "model": { + "name": "Style of the Winds (The Legend Of Zelda: Wind Waker) [Illustrious & NoobAI & SD1.5]", + "type": "LORA", + "nsfw": false, + "poi": false + }, + "files": [ + { + "id": 1178599, + "sizeKB": 223108.06640625, + "name": "WindWaker_Style_IXL.safetensors", + "type": "Model", + "pickleScanResult": "Success", + "pickleScanMessage": "No Pickle imports", + "virusScanResult": "Success", + "virusScanMessage": null, + "scannedAt": "2025-01-12T14:41:33.135Z", + "metadata": { + "format": "SafeTensor", + "size": null, + "fp": null + }, + "hashes": { + "AutoV1": "87417B67", + "AutoV2": "6E0655E27E", + "SHA256": "6E0655E27E5635E91D6F9EDBC74198AB13A8BB1C4C95FD2ED0E361D422199711", + "CRC32": "7B77E249", + "BLAKE3": "AC1EABA893D66C855957B5D1FF036126D872662BFE00BD5D37508FABF8953EC9", + "AutoV3": "CB3949362934" + }, + "primary": true, + "downloadUrl": "https://civitai.com/api/download/models/1273708" + } + ], + "images": [ + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/b9a71a85-c130-466c-9512-3c9e4b94daf2/width=832/51277231.jpeg", + "nsfwLevel": 1, + "width": 832, + "height": 1216, + "hash": "UOJj;f9w~VWm4.M{9cWV9HIq9bs,E1~Bw|t6", + "type": "image", + "metadata": { + "hash": "UOJj;f9w~VWm4.M{9cWV9HIq9bs,E1~Bw|t6", + "size": 147226, + "width": 832, + "height": 1216 + }, + "meta": { + "Size": "832x1216", + "seed": 98679726, + "extra": { + "remixOfId": 51163887 + }, + "steps": 30, + "prompt": "masterpiece, best quality, solo, 1girl, solo, blue eyes, blonde hair, hat, weapon, pointy ears, sword, chibi, 3d, instrument, shield , smile, looking at viewer,", + "sampler": "Euler a", + "cfgScale": 7, + "clipSkip": 2, + "resources": [], + "Created Date": "2025-01-13T0256:00.7778409Z", + "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "civitaiResources": [ + { + "type": "checkpoint", + "modelVersionId": 1183765, + "modelVersionName": "v8.0" + }, + { + "type": "lora", + "weight": 1, + "modelVersionId": 1273708, + "modelVersionName": "Illustrious" + } + ] + }, + "availability": "Public", + "hasMeta": true, + "hasPositivePrompt": true, + "onSite": true, + "remixOfId": 51163887 + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/7d335619-6459-4f74-b014-90a41f971889/width=832/51163882.jpeg", + "nsfwLevel": 1, + "width": 832, + "height": 1216, + "hash": "UFJQco01B#-V~D01.RDlrY%0%esX59aLRPM|", + "type": "image", + "metadata": { + "hash": "UFJQco01B#-V~D01.RDlrY%0%esX59aLRPM|", + "size": 1259841, + "width": 832, + "height": 1216 + }, + "meta": { + "Size": "832x1216", + "seed": 252630410834070, + "steps": 30, + "hashes": { + "model": "63e5c28bf8", + "LORA:WindWaker_Style_IXL": "6e0655e27e" + }, + "prompt": "masterpiece, best quality, solo, 1girl, solo, pink hair, short twintails, white dress, cowboy shot, 3d, , ,", + "sampler": "DPM++ 2M Karras", + "cfgScale": 7, + "resources": [ + { + "name": "WindWaker_Style_IXL", + "type": "lora", + "weight": 1 + } + ], + "Model hash": "63e5c28bf8", + "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "waiNSFWIllustrious_v80 Version": "ComfyUI" + }, + "availability": "Public", + "hasMeta": true, + "hasPositivePrompt": true, + "onSite": false, + "remixOfId": null + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/dec826b5-fd01-406b-b881-b9fe5a6abbde/width=832/51163886.jpeg", + "nsfwLevel": 1, + "width": 832, + "height": 1216, + "hash": "UdKA[g_J.6tktjM|sptQX7%1wNWEV[t6fSxt", + "type": "image", + "metadata": { + "hash": "UdKA[g_J.6tktjM|sptQX7%1wNWEV[t6fSxt", + "size": 1120952, + "width": 832, + "height": 1216 + }, + "meta": { + "Size": "832x1216", + "seed": 272033102095518, + "steps": 30, + "hashes": { + "model": "63e5c28bf8", + "LORA:WindWaker_Style_IXL": "6e0655e27e" + }, + "prompt": "masterpiece, best quality, solo, 1girl, solo, long hair, smile, blue eyes, gloves, jewelry, one eye closed, green hair, pointy ears, necklace, makeup, tiara, ;) , ,", + "sampler": "DPM++ 2M Karras", + "cfgScale": 7, + "resources": [ + { + "name": "WindWaker_Style_IXL", + "type": "lora", + "weight": 1 + } + ], + "Model hash": "63e5c28bf8", + "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "waiNSFWIllustrious_v80 Version": "ComfyUI" + }, + "availability": "Public", + "hasMeta": true, + "hasPositivePrompt": true, + "onSite": false, + "remixOfId": null + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/9ada0404-1402-482d-96aa-183da60661aa/width=832/51163887.jpeg", + "nsfwLevel": 1, + "width": 832, + "height": 1216, + "hash": "UNLWkPR._M-n00s.9coz9ZD+0LxWs*?G-:IV", + "type": "image", + "metadata": { + "hash": "UNLWkPR._M-n00s.9coz9ZD+0LxWs*?G-:IV", + "size": 1110897, + "width": 832, + "height": 1216 + }, + "meta": { + "Size": "832x1216", + "seed": 991930792475181, + "steps": 30, + "hashes": { + "model": "63e5c28bf8", + "LORA:WindWaker_Style_IXL": "6e0655e27e" + }, + "prompt": "masterpiece, best quality, solo, 1girl, solo, blue eyes, blonde hair, 1boy, hat, weapon, male focus, pointy ears, sword, dark skin, chibi, instrument, shield , ,", + "sampler": "DPM++ 2M Karras", + "cfgScale": 7, + "resources": [ + { + "name": "WindWaker_Style_IXL", + "type": "lora", + "weight": 1 + } + ], + "Model hash": "63e5c28bf8", + "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "waiNSFWIllustrious_v80 Version": "ComfyUI" + }, + "availability": "Public", + "hasMeta": true, + "hasPositivePrompt": true, + "onSite": false, + "remixOfId": null + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/50c31f3d-3c04-4bb7-8609-34956da40d61/width=832/51170537.jpeg", + "nsfwLevel": 1, + "width": 832, + "height": 1216, + "hash": "U27^6;Iq03=_jwxYI.EQ04so~SEg0yWF%3xV", + "type": "image", + "metadata": { + "hash": "U27^6;Iq03=_jwxYI.EQ04so~SEg0yWF%3xV", + "size": 139579, + "width": 832, + "height": 1216 + }, + "meta": { + "Size": "832x1216", + "seed": 31337, + "steps": 30, + "prompt": "masterpiece, best quality, solo, 1girl, solo, chibi, 3d, \nzzMajora, spikes, yellow sclera, solo, 1boy, horns, glowing eyes,\nlooking at viewer, ,", + "sampler": "Euler a", + "cfgScale": 7, + "clipSkip": 2, + "resources": [], + "Created Date": "2025-01-12T1544:13.9453863Z", + "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "civitaiResources": [ + { + "type": "checkpoint", + "modelVersionId": 1183765, + "modelVersionName": "v8.0" + }, + { + "type": "lora", + "weight": 0.8, + "modelVersionId": 1193395, + "modelVersionName": "Illustrious" + }, + { + "type": "lora", + "weight": 1, + "modelVersionId": 1273708, + "modelVersionName": "Illustrious" + } + ] + }, + "availability": "Public", + "hasMeta": true, + "hasPositivePrompt": true, + "onSite": true, + "remixOfId": null + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/833e0c0d-8b3a-4393-8c44-b80cea0e6975/width=832/51169122.jpeg", + "nsfwLevel": 1, + "width": 832, + "height": 1216, + "hash": "UUMjBQxc%hM_~osEOZWA%fD*R,tQ%MV?nOS4", + "type": "image", + "metadata": { + "hash": "UUMjBQxc%hM_~osEOZWA%fD*R,tQ%MV?nOS4", + "size": 198671, + "width": 832, + "height": 1216 + }, + "meta": { + "Size": "832x1216", + "seed": 1729355716, + "extra": { + "remixOfId": 51163887 + }, + "steps": 30, + "prompt": "masterpiece, best quality, solo, 1girl, solo, blue eyes, blonde hair, hat, weapon, pointy ears, sword, chibi, shield , dynamic pose, sword slash, motion lines,", + "sampler": "DPM++ 2M Karras", + "cfgScale": 7, + "clipSkip": 2, + "resources": [], + "Created Date": "2025-01-12T1529:44.8655317Z", + "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "civitaiResources": [ + { + "type": "checkpoint", + "modelVersionId": 1183765, + "modelVersionName": "v8.0" + }, + { + "type": "lora", + "weight": 1, + "modelVersionId": 1273708, + "modelVersionName": "Illustrious" + } + ] + }, + "availability": "Public", + "hasMeta": true, + "hasPositivePrompt": true, + "onSite": true, + "remixOfId": 51163887 + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/06154f27-96c8-487d-96f8-c627866842ad/width=832/51169120.jpeg", + "nsfwLevel": 1, + "width": 832, + "height": 1216, + "hash": "UGLf8XtR00~UCVxbnXtOIDWTjKRn9hxZx@R-", + "type": "image", + "metadata": { + "hash": "UGLf8XtR00~UCVxbnXtOIDWTjKRn9hxZx@R-", + "size": 166337, + "width": 832, + "height": 1216 + }, + "meta": { + "Size": "832x1216", + "seed": 952291488, + "extra": { + "remixOfId": 51163887 + }, + "steps": 30, + "prompt": "masterpiece, best quality, solo, 1girl, solo, 3d, chibi,\n Princess Peach, pink dress, blonde hair, blue eyes, long hair, crown, gem, gloves, puffy sleeves, short sleeves, white gloves, solo, smiling, looking at viewer, cowboy shot,", + "sampler": "DPM++ 2M Karras", + "cfgScale": 7, + "clipSkip": 2, + "resources": [], + "Created Date": "2025-01-12T1533:26.5487359Z", + "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "civitaiResources": [ + { + "type": "checkpoint", + "modelVersionId": 1183765, + "modelVersionName": "v8.0" + }, + { + "type": "lora", + "weight": 1, + "modelVersionId": 1273708, + "modelVersionName": "Illustrious" + } + ] + }, + "availability": "Public", + "hasMeta": true, + "hasPositivePrompt": true, + "onSite": true, + "remixOfId": 51163887 + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/0fe7b1a9-914f-47aa-906b-83e572f767a3/width=832/51163876.jpeg", + "nsfwLevel": 1, + "width": 832, + "height": 1216, + "hash": "UKK,T#}?02OF00RQY5S2E+N^fMWWWFM|s.oc", + "type": "image", + "metadata": { + "hash": "UKK,T#}?02OF00RQY5S2E+N^fMWWWFM|s.oc", + "size": 852319, + "width": 832, + "height": 1216 + }, + "meta": { + "Size": "832x1216", + "seed": 31337, + "steps": 30, + "hashes": { + "model": "63e5c28bf8", + "LORA:WindWaker_Style_IXL": "6e0655e27e" + }, + "prompt": "masterpiece, best quality, solo, 1girl, solo, long hair, looking at viewer, smile, open mouth, blue eyes, dress, jewelry, red hair, pointy ears, white dress, bracelet, neckerchief, empty eyes, yellow neckerchief, triforce, , ,", + "sampler": "DPM++ 2M Karras", + "cfgScale": 7, + "resources": [ + { + "name": "WindWaker_Style_IXL", + "type": "lora", + "weight": 1 + } + ], + "Model hash": "63e5c28bf8", + "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "waiNSFWIllustrious_v80 Version": "ComfyUI" + }, + "availability": "Public", + "hasMeta": true, + "hasPositivePrompt": true, + "onSite": false, + "remixOfId": null + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/3147ea21-bc7a-4a84-9d77-93f62c34e274/width=832/51169254.jpeg", + "nsfwLevel": 1, + "width": 832, + "height": 1216, + "hash": "UBEoAB0L0J~p00?HA1D%b|E1-NbwJB%2?Fsl", + "type": "image", + "metadata": { + "hash": "UBEoAB0L0J~p00?HA1D%b|E1-NbwJB%2?Fsl", + "size": 146909, + "width": 832, + "height": 1216 + }, + "meta": { + "Size": "832x1216", + "seed": 438227750, + "extra": { + "remixOfId": 51163887 + }, + "steps": 30, + "prompt": "masterpiece, best quality, solo, 1girl, solo, chibi, 3d, \nzzHilda, red eyes, purple hair, long hair, pointy ears, tiara, white gloves, dress, elbow gloves, jewelry, makeup, earrings, purple tabard, triforce, shoulder armor, tiara, \nlooking at viewer, smile,", + "sampler": "Euler a", + "cfgScale": 5, + "clipSkip": 2, + "resources": [], + "Created Date": "2025-01-12T1534:34.0959271Z", + "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "civitaiResources": [ + { + "type": "checkpoint", + "modelVersionId": 1183765, + "modelVersionName": "v8.0" + }, + { + "type": "lora", + "weight": 0.7, + "modelVersionId": 1221721, + "modelVersionName": "v1.0" + }, + { + "type": "lora", + "weight": 1, + "modelVersionId": 1273708, + "modelVersionName": "Illustrious" + } + ] + }, + "availability": "Public", + "hasMeta": true, + "hasPositivePrompt": true, + "onSite": true, + "remixOfId": 51163887 + }, + { + "url": "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/bb4cbd77-e6dc-4525-96b9-23114ce381bc/width=832/51163885.jpeg", + "nsfwLevel": 1, + "width": 832, + "height": 1248, + "hash": "U8HC77?900Iw1bIqm_tQu6Rp*{tK0?xV}NtQ", + "type": "image", + "metadata": { + "hash": "U8HC77?900Iw1bIqm_tQu6Rp*{tK0?xV}NtQ", + "size": 1154943, + "width": 832, + "height": 1248 + }, + "meta": { + "Size": "832x1248", + "seed": 1332683, + "steps": 30, + "hashes": { + "model": "04ba0dfcc1", + "LORA:WindWaker_Style_IXL": "6e0655e27e" + }, + "prompt": "masterpiece, best quality, BREAK, 1girl, solo, blonde hair, wavy hair, angel, angel wings, halo, smile, sitting, forest, white dress,smile, looking at viewer, ,", + "sampler": "DPM++ 2M Karras", + "cfgScale": 7, + "resources": [ + { + "name": "WindWaker_Style_IXL", + "type": "lora", + "weight": 1 + } + ], + "Model hash": "04ba0dfcc1", + "negativePrompt": "(blurry), (lowres:1.2), (worst quality:1.4), (low quality:1.4), multiple views, jpeg artifacts, signature, watermark, text, logo, artist name,", + "waiNSFWIllustrious_v70 Version": "ComfyUI" + }, + "availability": "Public", + "hasMeta": true, + "hasPositivePrompt": true, + "onSite": false, + "remixOfId": null + } + ], + "downloadUrl": "https://civitai.com/api/download/models/1273708", + "_sha256": "6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711", + "_civitai_api": "https://civitai.com/api/v1/model-versions/by-hash/6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711" + } + }, + "baseModelFile": "OnomaAIResearch/Illustrious-xl-early-release-v0", + "trainedWords": [ + { + "word": "3D", + "civitai": true + }, + { + "word": "Chibi", + "civitai": true + }, + { + "word": "solo", + "count": 109, + "metadata": true + }, + { + "word": "pointy ears", + "count": 98, + "metadata": true + }, + { + "word": "1boy", + "count": 80, + "metadata": true + }, + { + "word": "blonde hair", + "count": 68, + "metadata": true + }, + { + "word": "male focus", + "count": 65, + "metadata": true + }, + { + "word": "1girl", + "count": 64, + "metadata": true + }, + { + "word": "weapon", + "count": 58, + "metadata": true + }, + { + "word": "smile", + "count": 52, + "metadata": true + }, + { + "word": "blue eyes", + "count": 49, + "metadata": true + }, + { + "word": "hat", + "count": 49, + "metadata": true + }, + { + "word": "sword", + "count": 46, + "metadata": true + }, + { + "word": "standing", + "count": 44, + "metadata": true + }, + { + "word": "sky", + "count": 39, + "metadata": true + }, + { + "word": "outdoors", + "count": 38, + "metadata": true + }, + { + "word": "shield", + "count": 38, + "metadata": true + }, + { + "word": "cloud", + "count": 36, + "metadata": true + }, + { + "word": "day", + "count": 33, + "metadata": true + }, + { + "word": "dark skin", + "count": 30, + "metadata": true + }, + { + "word": "red hair", + "count": 30, + "metadata": true + }, + { + "word": "holding", + "count": 29, + "metadata": true + }, + { + "word": "red eyes", + "count": 28, + "metadata": true + }, + { + "word": "long hair", + "count": 27, + "metadata": true + }, + { + "word": "looking at viewer", + "count": 24, + "metadata": true + }, + { + "word": "dress", + "count": 21, + "metadata": true + }, + { + "word": "no humans", + "count": 21, + "metadata": true + }, + { + "word": "ocean", + "count": 20, + "metadata": true + }, + { + "word": "tree", + "count": 20, + "metadata": true + }, + { + "word": "holding weapon", + "count": 19, + "metadata": true + }, + { + "word": "open mouth", + "count": 19, + "metadata": true + }, + { + "word": "belt", + "count": 18, + "metadata": true + }, + { + "word": "full body", + "count": 17, + "metadata": true + }, + { + "word": "blue sky", + "count": 17, + "metadata": true + }, + { + "word": "holding sword", + "count": 16, + "metadata": true + }, + { + "word": "water", + "count": 15, + "metadata": true + }, + { + "word": "gloves", + "count": 15, + "metadata": true + }, + { + "word": "brown hair", + "count": 14, + "metadata": true + }, + { + "word": "grass", + "count": 14, + "metadata": true + }, + { + "word": "chibi", + "count": 14, + "metadata": true + }, + { + "word": "shirt", + "count": 13, + "metadata": true + }, + { + "word": "tunic", + "count": 13, + "metadata": true + }, + { + "word": "jewelry", + "count": 13, + "metadata": true + }, + { + "word": "skull", + "count": 13, + "metadata": true + }, + { + "word": "black eyes", + "count": 12, + "metadata": true + }, + { + "word": "scenery", + "count": 12, + "metadata": true + }, + { + "word": "triforce", + "count": 12, + "metadata": true + }, + { + "word": "closed mouth", + "count": 12, + "metadata": true + }, + { + "word": "upper body", + "count": 11, + "metadata": true + }, + { + "word": "boots", + "count": 11, + "metadata": true + }, + { + "word": "indoors", + "count": 10, + "metadata": true + }, + { + "word": "teeth", + "count": 10, + "metadata": true + }, + { + "word": "neckerchief", + "count": 10, + "metadata": true + }, + { + "word": "twintails", + "count": 9, + "metadata": true + }, + { + "word": "multiple boys", + "count": 9, + "metadata": true + }, + { + "word": "short hair", + "count": 9, + "metadata": true + }, + { + "word": "dark-skinned female", + "count": 9, + "metadata": true + }, + { + "word": "cape", + "count": 8, + "metadata": true + }, + { + "word": "night", + "count": 8, + "metadata": true + }, + { + "word": "glowing", + "count": 8, + "metadata": true + }, + { + "word": "from behind", + "count": 8, + "metadata": true + }, + { + "word": "watercraft", + "count": 8, + "metadata": true + }, + { + "word": "sleeveless", + "count": 8, + "metadata": true + }, + { + "word": "shadow", + "count": 8, + "metadata": true + }, + { + "word": "left-handed", + "count": 8, + "metadata": true + }, + { + "word": "red footwear", + "count": 8, + "metadata": true + }, + { + "word": "bird", + "count": 7, + "metadata": true + }, + { + "word": "closed eyes", + "count": 7, + "metadata": true + }, + { + "word": "2boys", + "count": 7, + "metadata": true + }, + { + "word": "yellow eyes", + "count": 7, + "metadata": true + }, + { + "word": "short sleeves", + "count": 7, + "metadata": true + }, + { + "word": "master sword", + "count": 6, + "metadata": true + }, + { + "word": "building", + "count": 6, + "metadata": true + }, + { + "word": "long sleeves", + "count": 6, + "metadata": true + }, + { + "word": "bracelet", + "count": 6, + "metadata": true + }, + { + "word": "t-shirt", + "count": 6, + "metadata": true + }, + { + "word": "grin", + "count": 6, + "metadata": true + }, + { + "word": "parody", + "count": 5, + "metadata": true + }, + { + "word": "instrument", + "count": 5, + "metadata": true + }, + { + "word": "plant", + "count": 5, + "metadata": true + }, + { + "word": "green eyes", + "count": 5, + "metadata": true + }, + { + "word": "sandals", + "count": 5, + "metadata": true + }, + { + "word": "earrings", + "count": 5, + "metadata": true + }, + { + "word": "gameplay mechanics", + "count": 5, + "metadata": true + }, + { + "word": ":d", + "count": 5, + "metadata": true + }, + { + "word": "pants", + "count": 5, + "metadata": true + }, + { + "word": "sharp teeth", + "count": 5, + "metadata": true + }, + { + "word": "black hair", + "count": 5, + "metadata": true + }, + { + "word": "hairband", + "count": 5, + "metadata": true + }, + { + "word": "red dress", + "count": 5, + "metadata": true + }, + { + "word": "multiple girls", + "count": 4, + "metadata": true + }, + { + "word": "pokemon (creature)", + "count": 4, + "metadata": true + }, + { + "word": "green headwear", + "count": 4, + "metadata": true + }, + { + "word": "boat", + "count": 4, + "metadata": true + }, + { + "word": "one eye closed", + "count": 4, + "metadata": true + }, + { + "word": "green hair", + "count": 4, + "metadata": true + }, + { + "word": "lying", + "count": 4, + "metadata": true + }, + { + "word": "sleeping", + "count": 4, + "metadata": true + }, + { + "word": "colored skin", + "count": 4, + "metadata": true + }, + { + "word": "braid", + "count": 4, + "metadata": true + }, + { + "word": "dark-skinned male", + "count": 4, + "metadata": true + }, + { + "word": "cosplay", + "count": 4, + "metadata": true + }, + { + "word": "1other", + "count": 4, + "metadata": true + }, + { + "word": "animal ears", + "count": 4, + "metadata": true + }, + { + "word": "scarf", + "count": 4, + "metadata": true + }, + { + "word": "ahoge", + "count": 4, + "metadata": true + }, + { + "word": "blue shirt", + "count": 4, + "metadata": true + }, + { + "word": "hair over one eye", + "count": 4, + "metadata": true + }, + { + "word": "pantyhose", + "count": 4, + "metadata": true + }, + { + "word": "big hair", + "count": 4, + "metadata": true + }, + { + "word": "orange hairband", + "count": 4, + "metadata": true + }, + { + "word": "pillar", + "count": 3, + "metadata": true + }, + { + "word": "night sky", + "count": 3, + "metadata": true + }, + { + "word": "star (sky)", + "count": 3, + "metadata": true + }, + { + "word": "horns", + "count": 3, + "metadata": true + }, + { + "word": "armor", + "count": 3, + "metadata": true + }, + { + "word": "glowing eyes", + "count": 3, + "metadata": true + }, + { + "word": "holding shield", + "count": 3, + "metadata": true + }, + { + "word": "stairs", + "count": 3, + "metadata": true + }, + { + "word": "sign", + "count": 3, + "metadata": true + }, + { + "word": "on side", + "count": 3, + "metadata": true + }, + { + "word": "tiara", + "count": 3, + "metadata": true + }, + { + "word": "red shirt", + "count": 3, + "metadata": true + }, + { + "word": "palm tree", + "count": 3, + "metadata": true + }, + { + "word": "slit pupils", + "count": 3, + "metadata": true + }, + { + "word": "animal hat", + "count": 3, + "metadata": true + }, + { + "word": "cat hat", + "count": 3, + "metadata": true + }, + { + "word": "empty eyes", + "count": 3, + "metadata": true + }, + { + "word": "yellow neckerchief", + "count": 3, + "metadata": true + }, + { + "word": "skirt", + "count": 3, + "metadata": true + }, + { + "word": "profile", + "count": 3, + "metadata": true + }, + { + "word": "brown footwear", + "count": 3, + "metadata": true + }, + { + "word": "sheath", + "count": 3, + "metadata": true + }, + { + "word": "evil smile", + "count": 3, + "metadata": true + }, + { + "word": "fang", + "count": 3, + "metadata": true + }, + { + "word": "black dress", + "count": 3, + "metadata": true + }, + { + "word": "wristband", + "count": 3, + "metadata": true + }, + { + "word": "tentacle hair", + "count": 3, + "metadata": true + }, + { + "word": "octarian", + "count": 3, + "metadata": true + }, + { + "word": "octoling", + "count": 3, + "metadata": true + }, + { + "word": "octoling girl", + "count": 3, + "metadata": true + }, + { + "word": "twin braids", + "count": 3, + "metadata": true + }, + { + "word": "bangs", + "count": 3, + "metadata": true + }, + { + "word": "3girls", + "count": 2, + "metadata": true + }, + { + "word": "fire", + "count": 2, + "metadata": true + }, + { + "word": "molten rock", + "count": 2, + "metadata": true + }, + { + "word": "crown", + "count": 2, + "metadata": true + }, + { + "word": "playing instrument", + "count": 2, + "metadata": true + }, + { + "word": "black skin", + "count": 2, + "metadata": true + }, + { + "word": "from side", + "count": 2, + "metadata": true + }, + { + "word": "horizon", + "count": 2, + "metadata": true + }, + { + "word": "starry sky", + "count": 2, + "metadata": true + }, + { + "word": "walking", + "count": 2, + "metadata": true + }, + { + "word": "ship", + "count": 2, + "metadata": true + }, + { + "word": "blush stickers", + "count": 2, + "metadata": true + }, + { + "word": "vest", + "count": 2, + "metadata": true + }, + { + "word": "beach", + "count": 2, + "metadata": true + }, + { + "word": ":3", + "count": 2, + "metadata": true + }, + { + "word": "beak", + "count": 2, + "metadata": true + }, + { + "word": "chain", + "count": 2, + "metadata": true + }, + { + "word": "fangs", + "count": 2, + "metadata": true + }, + { + "word": "aiming", + "count": 2, + "metadata": true + }, + { + "word": "flower", + "count": 2, + "metadata": true + }, + { + "word": "weapon on back", + "count": 2, + "metadata": true + }, + { + "word": "fairy", + "count": 2, + "metadata": true + }, + { + "word": "furry", + "count": 2, + "metadata": true + }, + { + "word": "fake screenshot", + "count": 2, + "metadata": true + }, + { + "word": "turban", + "count": 2, + "metadata": true + }, + { + "word": "health bar", + "count": 2, + "metadata": true + }, + { + "word": "androgynous", + "count": 2, + "metadata": true + }, + { + "word": "colored sclera", + "count": 2, + "metadata": true + }, + { + "word": "yellow sclera", + "count": 2, + "metadata": true + }, + { + "word": "white dress", + "count": 2, + "metadata": true + }, + { + "word": "cow", + "count": 2, + "metadata": true + }, + { + "word": "simple background", + "count": 2, + "metadata": true + }, + { + "word": "sunglasses", + "count": 2, + "metadata": true + }, + { + "word": "polearm", + "count": 2, + "metadata": true + }, + { + "word": "waving", + "count": 2, + "metadata": true + }, + { + "word": "rabbit ears", + "count": 2, + "metadata": true + }, + { + "word": "purple hair", + "count": 2, + "metadata": true + }, + { + "word": "red gloves", + "count": 2, + "metadata": true + }, + { + "word": "arrow (symbol)", + "count": 2, + "metadata": true + }, + { + "word": "blue skin", + "count": 2, + "metadata": true + }, + { + "word": "forehead", + "count": 2, + "metadata": true + }, + { + "word": "bare shoulders", + "count": 2, + "metadata": true + }, + { + "word": "black footwear", + "count": 2, + "metadata": true + }, + { + "word": "sleeveless dress", + "count": 2, + "metadata": true + }, + { + "word": "female child", + "count": 2, + "metadata": true + }, + { + "word": "2girls", + "count": 2, + "metadata": true + }, + { + "word": "broom", + "count": 2, + "metadata": true + }, + { + "word": "penguin", + "count": 1, + "metadata": true + }, + { + "word": "throne", + "count": 1, + "metadata": true + }, + { + "word": "stained glass", + "count": 1, + "metadata": true + }, + { + "word": "violin", + "count": 1, + "metadata": true + }, + { + "word": "mask", + "count": 1, + "metadata": true + }, + { + "word": "blue background", + "count": 1, + "metadata": true + }, + { + "word": "music", + "count": 1, + "metadata": true + }, + { + "word": "holding instrument", + "count": 1, + "metadata": true + }, + { + "word": "barrel", + "count": 1, + "metadata": true + }, + { + "word": "helmet", + "count": 1, + "metadata": true + }, + { + "word": "shoulder armor", + "count": 1, + "metadata": true + }, + { + "word": "pauldrons", + "count": 1, + "metadata": true + }, + { + "word": "full armor", + "count": 1, + "metadata": true + }, + { + "word": "windmill", + "count": 1, + "metadata": true + }, + { + "word": "arm up", + "count": 1, + "metadata": true + }, + { + "word": "crossed arms", + "count": 1, + "metadata": true + }, + { + "word": "red neckerchief", + "count": 1, + "metadata": true + }, + { + "word": "desert", + "count": 1, + "metadata": true + }, + { + "word": "fantasy", + "count": 1, + "metadata": true + }, + { + "word": "window", + "count": 1, + "metadata": true + }, + { + "word": "sunlight", + "count": 1, + "metadata": true + }, + { + "word": "shade", + "count": 1, + "metadata": true + }, + { + "word": "landscape", + "count": 1, + "metadata": true + }, + { + "word": "flag", + "count": 1, + "metadata": true + }, + { + "word": "ruins", + "count": 1, + "metadata": true + }, + { + "word": "door", + "count": 1, + "metadata": true + }, + { + "word": "carpet", + "count": 1, + "metadata": true + }, + { + "word": "own hands together", + "count": 1, + "metadata": true + }, + { + "word": "child", + "count": 1, + "metadata": true + }, + { + "word": "male child", + "count": 1, + "metadata": true + }, + { + "word": "battle", + "count": 1, + "metadata": true + }, + { + "word": "parted lips", + "count": 1, + "metadata": true + }, + { + "word": "floating hair", + "count": 1, + "metadata": true + }, + { + "word": "pink dress", + "count": 1, + "metadata": true + }, + { + "word": "on back", + "count": 1, + "metadata": true + }, + { + "word": "white hair", + "count": 1, + "metadata": true + }, + { + "word": "facial hair", + "count": 1, + "metadata": true + }, + { + "word": "beard", + "count": 1, + "metadata": true + }, + { + "word": "serious", + "count": 1, + "metadata": true + }, + { + "word": "thick eyebrows", + "count": 1, + "metadata": true + }, + { + "word": "green skin", + "count": 1, + "metadata": true + }, + { + "word": "tusks", + "count": 1, + "metadata": true + }, + { + "word": "elbow gloves", + "count": 1, + "metadata": true + }, + { + "word": "necklace", + "count": 1, + "metadata": true + }, + { + "word": "makeup", + "count": 1, + "metadata": true + }, + { + "word": ";)", + "count": 1, + "metadata": true + }, + { + "word": "gun", + "count": 1, + "metadata": true + }, + { + "word": "blue dress", + "count": 1, + "metadata": true + }, + { + "word": "freckles", + "count": 1, + "metadata": true + }, + { + "word": "purple eyes", + "count": 1, + "metadata": true + }, + { + "word": "headband", + "count": 1, + "metadata": true + }, + { + "word": "yellow fur", + "count": 1, + "metadata": true + }, + { + "word": "stadium", + "count": 1, + "metadata": true + }, + { + "word": "lucario", + "count": 1, + "metadata": true + }, + { + "word": "furry male", + "count": 1, + "metadata": true + }, + { + "word": "expressionless", + "count": 1, + "metadata": true + }, + { + "word": "looking up", + "count": 1, + "metadata": true + }, + { + "word": "meme", + "count": 1, + "metadata": true + }, + { + "word": "sailor moon redraw challenge (meme)", + "count": 1, + "metadata": true + }, + { + "word": "bandana", + "count": 1, + "metadata": true + }, + { + "word": "knife", + "count": 1, + "metadata": true + }, + { + "word": "dagger", + "count": 1, + "metadata": true + }, + { + "word": "blue scarf", + "count": 1, + "metadata": true + }, + { + "word": "cat", + "count": 1, + "metadata": true + }, + { + "word": "cliff", + "count": 1, + "metadata": true + }, + { + "word": "cat ears", + "count": 1, + "metadata": true + }, + { + "word": "witch hat", + "count": 1, + "metadata": true + }, + { + "word": ":<", + "count": 1, + "metadata": true + }, + { + "word": "tabard", + "count": 1, + "metadata": true + }, + { + "word": "shovel", + "count": 1, + "metadata": true + }, + { + "word": "grey background", + "count": 1, + "metadata": true + }, + { + "word": "black background", + "count": 1, + "metadata": true + }, + { + "word": "staff", + "count": 1, + "metadata": true + }, + { + "word": "hawaiian shirt", + "count": 1, + "metadata": true + }, + { + "word": "spear", + "count": 1, + "metadata": true + }, + { + "word": "ponytail", + "count": 1, + "metadata": true + }, + { + "word": "pirate", + "count": 1, + "metadata": true + }, + { + "word": "pikachu", + "count": 1, + "metadata": true + }, + { + "word": "facing away", + "count": 1, + "metadata": true + }, + { + "word": "flying", + "count": 1, + "metadata": true + }, + { + "word": "chicken", + "count": 1, + "metadata": true + }, + { + "word": "thighhighs", + "count": 1, + "metadata": true + }, + { + "word": "bow", + "count": 1, + "metadata": true + }, + { + "word": ":t", + "count": 1, + "metadata": true + }, + { + "word": "food", + "count": 1, + "metadata": true + }, + { + "word": "fruit", + "count": 1, + "metadata": true + }, + { + "word": "bow (weapon)", + "count": 1, + "metadata": true + }, + { + "word": "arrow (projectile)", + "count": 1, + "metadata": true + }, + { + "word": "holding arrow", + "count": 1, + "metadata": true + }, + { + "word": "wand", + "count": 1, + "metadata": true + }, + { + "word": "hammer", + "count": 1, + "metadata": true + }, + { + "word": "forehead jewel", + "count": 1, + "metadata": true + }, + { + "word": "blue hair", + "count": 1, + "metadata": true + }, + { + "word": "bodysuit", + "count": 1, + "metadata": true + }, + { + "word": "outstretched arm", + "count": 1, + "metadata": true + }, + { + "word": "fusion", + "count": 1, + "metadata": true + }, + { + "word": "among us", + "count": 1, + "metadata": true + }, + { + "word": "jacket", + "count": 1, + "metadata": true + }, + { + "word": "spiked hair", + "count": 1, + "metadata": true + }, + { + "word": "pink flower", + "count": 1, + "metadata": true + }, + { + "word": "white footwear", + "count": 1, + "metadata": true + }, + { + "word": "dark persona", + "count": 1, + "metadata": true + }, + { + "word": "grey footwear", + "count": 1, + "metadata": true + }, + { + "word": "grey skin", + "count": 1, + "metadata": true + }, + { + "word": "blood", + "count": 1, + "metadata": true + }, + { + "word": "english text", + "count": 1, + "metadata": true + }, + { + "word": "torch", + "count": 1, + "metadata": true + }, + { + "word": "cave", + "count": 1, + "metadata": true + }, + { + "word": "dual wielding", + "count": 1, + "metadata": true + }, + { + "word": "energy sword", + "count": 1, + "metadata": true + }, + { + "word": "surcoat", + "count": 1, + "metadata": true + }, + { + "word": "animal", + "count": 1, + "metadata": true + }, + { + "word": "monster", + "count": 1, + "metadata": true + }, + { + "word": "creature", + "count": 1, + "metadata": true + }, + { + "word": "black gloves", + "count": 1, + "metadata": true + }, + { + "word": "green background", + "count": 1, + "metadata": true + }, + { + "word": "yordle", + "count": 1, + "metadata": true + }, + { + "word": "rain", + "count": 1, + "metadata": true + }, + { + "word": "dark", + "count": 1, + "metadata": true + }, + { + "word": "heads-up display", + "count": 1, + "metadata": true + }, + { + "word": "diluc (genshin impact)", + "count": 1, + "metadata": true + }, + { + "word": "hair bun", + "count": 1, + "metadata": true + }, + { + "word": "double bun", + "count": 1, + "metadata": true + }, + { + "word": "slime (creature)", + "count": 1, + "metadata": true + }, + { + "word": "grey hair", + "count": 1, + "metadata": true + }, + { + "word": "broom riding", + "count": 1, + "metadata": true + }, + { + "word": "holding broom", + "count": 1, + "metadata": true + }, + { + "word": "shoes", + "count": 1, + "metadata": true + }, + { + "word": "trident", + "count": 1, + "metadata": true + }, + { + "word": "orange neckerchief", + "count": 1, + "metadata": true + }, + { + "word": "book", + "count": 1, + "metadata": true + }, + { + "word": "fence", + "count": 1, + "metadata": true + }, + { + "word": "wall", + "count": 1, + "metadata": true + }, + { + "word": "stone wall", + "count": 1, + "metadata": true + }, + { + "word": "camera", + "count": 1, + "metadata": true + }, + { + "word": "holding camera", + "count": 1, + "metadata": true + }, + { + "word": "video camera", + "count": 1, + "metadata": true + } + ], + "sha256": "6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711", + "name": "Style of the Winds (The Legend Of Zelda: Wind Waker) [Illustrious & NoobAI & SD1.5] - Illustrious", + "type": "LORA", + "baseModel": "Illustrious", + "links": [ + "https://civitai.com/models/208265?modelVersionId=1273708", + "https://civitai.com/api/v1/model-versions/by-hash/6e0655e27e5635e91d6f9edbc74198ab13a8bb1c4c95fd2ed0e361d422199711" + ] +} \ No newline at end of file diff --git a/loras/illu/[Style] Crypine [Illustrious-XL].safetensors b/loras/illu/[Style] Crypine [Illustrious-XL].safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7adaa45fd9344fc5769e94b810bd42410c41fdb3 --- /dev/null +++ b/loras/illu/[Style] Crypine [Illustrious-XL].safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd682b31f88169f9140f4ba54373417c904f235dc30ea2ab2b0aecf2d857f6a +size 256056360 diff --git a/loras/illu/birdman_style_ill_v1.safetensors b/loras/illu/birdman_style_ill_v1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff2fe68ffaea66dc4a82428f61bf755f6a7328bd --- /dev/null +++ b/loras/illu/birdman_style_ill_v1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a822d6541cffe7abc753cf276a2e0e43501697e596a413e2241a96f5709bdd80 +size 228459524 diff --git a/loras/illu/curss_style_ill_v2.safetensors b/loras/illu/curss_style_ill_v2.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b95db60017836f7c270ce167a5f59ef40948ee3 --- /dev/null +++ b/loras/illu/curss_style_ill_v2.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7b661d8569de1341730689e79e32968009056ec10bfb50e3a4f2d3f29d52fa +size 228458148 diff --git a/loras/illu/explodingclothes_il.safetensors b/loras/illu/explodingclothes_il.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d0c7829e539876158f7647bb5fdc4e6316293be --- /dev/null +++ b/loras/illu/explodingclothes_il.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c756ee3a39320ac287a5591a3e60c65ff9d8a65fb1d3495714e6f0288d1cae78 +size 57436188 diff --git a/loras/illu/naked_ribbon-v1.safetensors b/loras/illu/naked_ribbon-v1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d1130e1f87bf626fac1ebd970b198551531b498 --- /dev/null +++ b/loras/illu/naked_ribbon-v1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c9a2bf9b2fb8f7efb1d0e7b9cf555813769c319e393206fcc4961b67d56461 +size 37861368 diff --git "a/loras/illu/niji\347\224\267\346\200\247\351\243\216\346\240\274\342\221\241_v1.0.safetensors" "b/loras/illu/niji\347\224\267\346\200\247\351\243\216\346\240\274\342\221\241_v1.0.safetensors" new file mode 100644 index 0000000000000000000000000000000000000000..776fa82a53c71e983800095114f40090da719b82 --- /dev/null +++ "b/loras/illu/niji\347\224\267\346\200\247\351\243\216\346\240\274\342\221\241_v1.0.safetensors" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4988f4f38f34004fb73fb26ea9a867dc6bcec806928203dccc8329e21bb932 +size 90175688 diff --git a/loras/illu/pixel-Illustrius.safetensors b/loras/illu/pixel-Illustrius.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c13e8e2ce36f5ab14a8d74e1db5ca534a47e6e4 --- /dev/null +++ b/loras/illu/pixel-Illustrius.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b568a22e427b2073459b27d88706e6ecc6f7cb5295a27f867f4938871860bbcd +size 57423348 diff --git a/loras/illu/qwq_style_ill_v1.safetensors b/loras/illu/qwq_style_ill_v1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a03f86ce469ff4cc52344f04d67b70ecb3ffe4ac --- /dev/null +++ b/loras/illu/qwq_style_ill_v1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa261320a735fd926ae062761639181d0d6c48f673020dfa1b148bfecf8e71ee +size 228456620 diff --git a/loras/illu/studio_ghibliILL.safetensors b/loras/illu/studio_ghibliILL.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7335ee082516e85c3407a22c4640b31517b40e0 --- /dev/null +++ b/loras/illu/studio_ghibliILL.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb3e90bd7ffa1ffcfe13c0f8cfcd7e505b514edcdc9ee56301221c20beb6290 +size 228468204 diff --git a/loras/ipadapter/ip-adapter-faceid_sdxl_lora.safetensors b/loras/ipadapter/ip-adapter-faceid_sdxl_lora.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d02749c27bbb928770944f59caf7a4d58e495c2 --- /dev/null +++ b/loras/ipadapter/ip-adapter-faceid_sdxl_lora.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fcf93d6e8dc8dd18f5f9e51c8306f369486ed0aa0780ade9961308aff7f0d64 +size 371842896 diff --git a/loras/pony/0811 see-through nurse uniform_v1_pony.safetensors b/loras/pony/0811 see-through nurse uniform_v1_pony.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd1fd9411ace272c1a5de04de396c0835b33ad4b --- /dev/null +++ b/loras/pony/0811 see-through nurse uniform_v1_pony.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1621313271717ca8f71a0675d5fb503ccb81fb216a9ba9c624ffc1201f285bf +size 85425460 diff --git a/loras/pony/2BPony.safetensors b/loras/pony/2BPony.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7fea2b1734e55843164c3138b4dba96c4b40a1e --- /dev/null +++ b/loras/pony/2BPony.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9f7c4888ee0350d4bc86a8414708065b335d40d98ace6de3ac317bd76bbd0c4 +size 228450668 diff --git a/loras/pony/Arcane.safetensors b/loras/pony/Arcane.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e476104dc531cc9014dc7195e654dce2f76da686 --- /dev/null +++ b/loras/pony/Arcane.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28b79c8e80f2f7840904efec19e843efe73db413bed6b480399768a1b7f7e4e4 +size 114457556 diff --git a/loras/pony/DVaPony.safetensors b/loras/pony/DVaPony.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd41fdf2414c6af8981ed65631b27eb991737eb7 --- /dev/null +++ b/loras/pony/DVaPony.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5b2f112a0ef490279b2c86252a7f629ff174e0966288e24fa385fecafeebc1a +size 57424268 diff --git a/loras/pony/Five.safetensors b/loras/pony/Five.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd6e7a898f0b662ca298fa76880c2272b3cae812 --- /dev/null +++ b/loras/pony/Five.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aca8aeeea6d962e41c186562e129ff22ee5bb1b236b558eb7927d88ba771fa60 +size 57476372 diff --git a/loras/pony/Minecraft_style_3d.safetensors b/loras/pony/Minecraft_style_3d.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd1a91ef12ac83e27133a0b221a1a43c54d26dea --- /dev/null +++ b/loras/pony/Minecraft_style_3d.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82cf4dbcf44e06c35f7146641ee208f98ae3b98dc7ea2bbf794b15ebdeb24d1a +size 228465484 diff --git a/loras/pony/POCHI_SCIENCE.safetensors b/loras/pony/POCHI_SCIENCE.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ca277e83602c9886ce2bfcf243c63a31c086fbf --- /dev/null +++ b/loras/pony/POCHI_SCIENCE.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:777e48e5a7013ceb2e677369c7ba57456160ba1450ae78ca587623b47869ae5d +size 228457828 diff --git a/loras/pony/PaperMario.safetensors b/loras/pony/PaperMario.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..963d77c874a9b8b8eb2f429f7a96a58e7f369ba9 --- /dev/null +++ b/loras/pony/PaperMario.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:185217bfdc1b32e6429129ee9ccd9536a6021829f79336167ebe053606d977aa +size 114441492 diff --git a/loras/pony/QiandaiyiyuPDv1 Style.safetensors b/loras/pony/QiandaiyiyuPDv1 Style.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9016604b76450b1783cc09367cbab8c0e63f66cd --- /dev/null +++ b/loras/pony/QiandaiyiyuPDv1 Style.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c867156605f1efea7d2585d4d366bcca7ed9cf300e850407ea3e109ff869d68a +size 114431844 diff --git a/loras/pony/R3DStyle.sha256 b/loras/pony/R3DStyle.sha256 new file mode 100644 index 0000000000000000000000000000000000000000..9d0dd8c2545188e9d145c67fae0f481b731ebd81 --- /dev/null +++ b/loras/pony/R3DStyle.sha256 @@ -0,0 +1 @@ +91ded154761569f71ff3dcc28f0715a55d88388dbae2b2325eb9883d4851c2b0 \ No newline at end of file diff --git a/loras/pony/Rem.safetensors b/loras/pony/Rem.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c2d0dcc6da1d8233cf083a53a4c33ef0d54239f --- /dev/null +++ b/loras/pony/Rem.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc59bd21d4eb3507fa56e078b3d511b9c63096152ad48c79905fb64ed48fb56 +size 57426532 diff --git a/loras/pony/Slingshot_Swimsuit_By_Stable_Yogi_PONY_v1.safetensors b/loras/pony/Slingshot_Swimsuit_By_Stable_Yogi_PONY_v1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a612f7efedc7f258876e999f92e3408f16414dbb --- /dev/null +++ b/loras/pony/Slingshot_Swimsuit_By_Stable_Yogi_PONY_v1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e74c89dbe21e072c472c9a29ff1063dfc63818907e07715d2799e91978a96572 +size 228448022 diff --git a/loras/pony/Snow_White_Pony.safetensors b/loras/pony/Snow_White_Pony.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8679c015a025b9425adf2cac1e4a37f3ea8a77b4 --- /dev/null +++ b/loras/pony/Snow_White_Pony.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82fd681b89c43d0db7c23003f8963eafaceb28400cc792af64423eb2309daf40 +size 228467444 diff --git a/loras/pony/Style_RizuNM-PONY.safetensors b/loras/pony/Style_RizuNM-PONY.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be3a4f11ec0529b06e806ca5e0555d9a6771d8f4 --- /dev/null +++ b/loras/pony/Style_RizuNM-PONY.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da49605afd1f7c7d3f587bec159c5d1910db3e2220cf6b0624969847492d7119 +size 228450902 diff --git a/loras/pony/Style_SixPlusOne-PONY.safetensors b/loras/pony/Style_SixPlusOne-PONY.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f47002ac378bd64ab496d3e3291d9d84809034b --- /dev/null +++ b/loras/pony/Style_SixPlusOne-PONY.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e3cbed7ebd80505ec578f88915f958681f95785e16a85affe97b95049b3042b +size 228453942 diff --git a/loras/pony/SuperSaiyanHair_pdxl_Incrs_v1.safetensors b/loras/pony/SuperSaiyanHair_pdxl_Incrs_v1.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bcaaa70c724127050bfac79cc1767451c0d6f3e --- /dev/null +++ b/loras/pony/SuperSaiyanHair_pdxl_Incrs_v1.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36450eabde4420b9c9d35a4cb0f80b28d5c06ff6b8f01f383e7c13818c2965d3 +size 57431044 diff --git a/loras/pony/_SELF_fallen_ai_PONY_gta5_style_b.safetensors b/loras/pony/_SELF_fallen_ai_PONY_gta5_style_b.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ddeeaf8a7a4d66ebcd190016aeadf9c803ae3508 --- /dev/null +++ b/loras/pony/_SELF_fallen_ai_PONY_gta5_style_b.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d2a63627915d56e5a7bc2a4a3cd1fbf228e38993f8ffa03f6b2b8091accc006 +size 228459020 diff --git a/loras/pony/glass-girl-02.safetensors b/loras/pony/glass-girl-02.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..518439dedfd68eb96c733d4c2fcfa03d5b1ed41e --- /dev/null +++ b/loras/pony/glass-girl-02.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5943d89f0afa31152938087a6d88fa1dda11359e7934947016f2c1f77e59aa7a +size 114435356 diff --git a/loras/pony/moga(nur)XLpony-000006.safetensors b/loras/pony/moga(nur)XLpony-000006.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f65b43f04f8028a901cee007d3a95b7e7abf430 --- /dev/null +++ b/loras/pony/moga(nur)XLpony-000006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84288cb16f8df0c38d1af8b0413dc124ba133ff7273b911c4d7aa0757f5de4c2 +size 57424100 diff --git a/loras/pony/nai3_check2.safetensors b/loras/pony/nai3_check2.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a49a788a7cd3186c98821198f8f6ec427101ed71 --- /dev/null +++ b/loras/pony/nai3_check2.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b5da12cf412df6107ce364f4740f08566a06614228c85db596619b7b4eb50ec +size 114442140 diff --git a/loras/pony/v6.safetensors b/loras/pony/v6.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5be507e02bca199f695c5cfd00117b529d3cad38 --- /dev/null +++ b/loras/pony/v6.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4abd2cac101168be3e00695b7f61e3115b483cb65c2e644396f82b3824ffe988 +size 228463868 diff --git a/loras/sd1.5/JOJOS_BIZARRE_ADVENTURE_Part5_ComicStyle-000040.safetensors b/loras/sd1.5/JOJOS_BIZARRE_ADVENTURE_Part5_ComicStyle-000040.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d569e6708382530ddb9339643d4634a795fe6636 --- /dev/null +++ b/loras/sd1.5/JOJOS_BIZARRE_ADVENTURE_Part5_ComicStyle-000040.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c60772b319036cbf882aec7d86521419c1f1c6487af53685b47d8e9889fa70 +size 75624990 diff --git a/loras/sd1.5/VOXEL.safetensors b/loras/sd1.5/VOXEL.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec7413c13e0f6778dd55996d6f62f6d42933d918 --- /dev/null +++ b/loras/sd1.5/VOXEL.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e6dc19d8cdecfeda93c868189e033bc0e3b46e647c7d0879f1d295a09e78bc +size 151126648 diff --git a/loras/sd1.5/jianbihua.safetensors b/loras/sd1.5/jianbihua.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88a2413cd3dc38629d7e2ccbb6de675f81d5aadd --- /dev/null +++ b/loras/sd1.5/jianbihua.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cf4a87629f457f48036c1fb05ca453a794be0c8d05af02fece4a579315f5a2a +size 91168804 diff --git a/loras/sd1.5/pixai style.safetensors b/loras/sd1.5/pixai style.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c5cebfb21c3fb201c4d5e28de4a617176d8a137 --- /dev/null +++ b/loras/sd1.5/pixai style.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd6fb5f138f301fb7e2ad1b08b2288241f9cc74f0f5fcebbd8d69344ee54a89b +size 151114048 diff --git a/loras/sd1.5/shilhps.safetensors b/loras/sd1.5/shilhps.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9b0063055956114d0207eddaf529e099fd04763 --- /dev/null +++ b/loras/sd1.5/shilhps.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ca864c7e1e3698a6bdf63e45053ecf6eeef7a0e28d67a9184de0e565fbe2e8 +size 151108831 diff --git a/loras/sd1.5/shinkai_makoto_offset.safetensors b/loras/sd1.5/shinkai_makoto_offset.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f142559a5db2c2e78bed0ff27247919b6473635 --- /dev/null +++ b/loras/sd1.5/shinkai_makoto_offset.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6ba90e343502f6a8bf6da0d9f8f4e2571d0248d11d14aa577b7ddc490bbd48 +size 151108831 diff --git a/onnx/parsing_lip.onnx b/onnx/parsing_lip.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7d1a879fa30fc002188b0c9fec3cc05064dd1093 --- /dev/null +++ b/onnx/parsing_lip.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8436e1dae96e2601c373d1ace29c8f0978b16357d9038c17a8ba756cca376dbc +size 266863411 diff --git a/rembg/RMBG-1.4.pth b/rembg/RMBG-1.4.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a35fd28a4f04bc3a38135dec168c918632d6e8c --- /dev/null +++ b/rembg/RMBG-1.4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893c16c340b1ddafc93e78457a4d94190da9b7179149f8574284c83caebf5e8c +size 176718373 diff --git a/segformer_b2_clothes/.gitattributes b/segformer_b2_clothes/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..c7d9f3332a950355d5a77d85000f05e6f45435ea --- /dev/null +++ b/segformer_b2_clothes/.gitattributes @@ -0,0 +1,34 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/segformer_b2_clothes/.gitignore b/segformer_b2_clothes/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..d0e300c55f010e9077e7befa1bb4df35ccf0a818 --- /dev/null +++ b/segformer_b2_clothes/.gitignore @@ -0,0 +1,2 @@ +.ipynb_checkpoints +test.ipynb \ No newline at end of file diff --git a/segformer_b2_clothes/README.md b/segformer_b2_clothes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..afd23f68db798bb276b79fecffd64ecd8c8e3cf1 --- /dev/null +++ b/segformer_b2_clothes/README.md @@ -0,0 +1,105 @@ +--- +license: mit +tags: +- vision +- image-segmentation +widget: +- src: https://images.unsplash.com/photo-1643310325061-2beef64926a5?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxzZWFyY2h8Nnx8cmFjb29uc3xlbnwwfHwwfHw%3D&w=1000&q=80 + example_title: Person +- src: https://freerangestock.com/sample/139043/young-man-standing-and-leaning-on-car.jpg + example_title: Person +datasets: +- mattmdjaga/human_parsing_dataset +--- +# Segformer B2 fine-tuned for clothes segmentation + +SegFormer model fine-tuned on [ATR dataset](https://github.com/lemondan/HumanParsing-Dataset) for clothes segmentation but can also be used for human segmentation. +The dataset on hugging face is called "mattmdjaga/human_parsing_dataset". + +**[Training code](https://github.com/mattmdjaga/segformer_b2_clothes)**. +```python +from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation +from PIL import Image +import requests +import matplotlib.pyplot as plt +import torch.nn as nn + +processor = SegformerImageProcessor.from_pretrained("mattmdjaga/segformer_b2_clothes") +model = AutoModelForSemanticSegmentation.from_pretrained("mattmdjaga/segformer_b2_clothes") + +url = "https://plus.unsplash.com/premium_photo-1673210886161-bfcc40f54d1f?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxzZWFyY2h8MXx8cGVyc29uJTIwc3RhbmRpbmd8ZW58MHx8MHx8&w=1000&q=80" + +image = Image.open(requests.get(url, stream=True).raw) +inputs = processor(images=image, return_tensors="pt") + +outputs = model(**inputs) +logits = outputs.logits.cpu() + +upsampled_logits = nn.functional.interpolate( + logits, + size=image.size[::-1], + mode="bilinear", + align_corners=False, +) + +pred_seg = upsampled_logits.argmax(dim=1)[0] +plt.imshow(pred_seg) +``` + +Labels: 0: "Background", 1: "Hat", 2: "Hair", 3: "Sunglasses", 4: "Upper-clothes", 5: "Skirt", 6: "Pants", 7: "Dress", 8: "Belt", 9: "Left-shoe", 10: "Right-shoe", 11: "Face", 12: "Left-leg", 13: "Right-leg", 14: "Left-arm", 15: "Right-arm", 16: "Bag", 17: "Scarf" + +### Evaluation + +| Label Index | Label Name | Category Accuracy | Category IoU | +|:-------------:|:----------------:|:-----------------:|:------------:| +| 0 | Background | 0.99 | 0.99 | +| 1 | Hat | 0.73 | 0.68 | +| 2 | Hair | 0.91 | 0.82 | +| 3 | Sunglasses | 0.73 | 0.63 | +| 4 | Upper-clothes | 0.87 | 0.78 | +| 5 | Skirt | 0.76 | 0.65 | +| 6 | Pants | 0.90 | 0.84 | +| 7 | Dress | 0.74 | 0.55 | +| 8 | Belt | 0.35 | 0.30 | +| 9 | Left-shoe | 0.74 | 0.58 | +| 10 | Right-shoe | 0.75 | 0.60 | +| 11 | Face | 0.92 | 0.85 | +| 12 | Left-leg | 0.90 | 0.82 | +| 13 | Right-leg | 0.90 | 0.81 | +| 14 | Left-arm | 0.86 | 0.74 | +| 15 | Right-arm | 0.82 | 0.73 | +| 16 | Bag | 0.91 | 0.84 | +| 17 | Scarf | 0.63 | 0.29 | + +Overall Evaluation Metrics: +- Evaluation Loss: 0.15 +- Mean Accuracy: 0.80 +- Mean IoU: 0.69 + +### License + +The license for this model can be found [here](https://github.com/NVlabs/SegFormer/blob/master/LICENSE). + +### BibTeX entry and citation info + +```bibtex +@article{DBLP:journals/corr/abs-2105-15203, + author = {Enze Xie and + Wenhai Wang and + Zhiding Yu and + Anima Anandkumar and + Jose M. Alvarez and + Ping Luo}, + title = {SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers}, + journal = {CoRR}, + volume = {abs/2105.15203}, + year = {2021}, + url = {https://arxiv.org/abs/2105.15203}, + eprinttype = {arXiv}, + eprint = {2105.15203}, + timestamp = {Wed, 02 Jun 2021 11:46:42 +0200}, + biburl = {https://dblp.org/rec/journals/corr/abs-2105-15203.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` \ No newline at end of file diff --git a/segformer_b2_clothes/config.json b/segformer_b2_clothes/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bf506a3672235c681b35e124ef9333ca6f5e8b2 --- /dev/null +++ b/segformer_b2_clothes/config.json @@ -0,0 +1,110 @@ +{ + "_name_or_path": "nvidia/mit-b2", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 4, + 6, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "Background", + "1": "Hat", + "2": "Hair", + "3": "Sunglasses", + "4": "Upper-clothes", + "5": "Skirt", + "6": "Pants", + "7": "Dress", + "8": "Belt", + "9": "Left-shoe", + "10": "Right-shoe", + "11": "Face", + "12": "Left-leg", + "13": "Right-leg", + "14": "Left-arm", + "15": "Right-arm", + "16": "Bag", + "17": "Scarf" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "Background": 0, + "Bag": 16, + "Belt": 8, + "Dress": 7, + "Face": 11, + "Hair": 2, + "Hat": 1, + "Left-arm": 14, + "Left-leg": 12, + "Left-shoe": 9, + "Pants": 6, + "Right-arm": 15, + "Right-leg": 13, + "Right-shoe": 10, + "Scarf": 17, + "Skirt": 5, + "Sunglasses": 3, + "Upper-clothes": 4 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "torch_dtype": "float32", + "transformers_version": "4.24.0" +} diff --git a/segformer_b2_clothes/handler.py b/segformer_b2_clothes/handler.py new file mode 100644 index 0000000000000000000000000000000000000000..677fdf4567de92cdd30b8957c5b8d7c0563bcdfe --- /dev/null +++ b/segformer_b2_clothes/handler.py @@ -0,0 +1,39 @@ +from typing import Dict, List, Any +from PIL import Image +from io import BytesIO +from transformers import AutoModelForSemanticSegmentation, AutoFeatureExtractor +import base64 +import torch +from torch import nn + +class EndpointHandler(): + def __init__(self, path="."): + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.model = AutoModelForSemanticSegmentation.from_pretrained(path).to(self.device).eval() + self.feature_extractor = AutoFeatureExtractor.from_pretrained(path) + + def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + data args: + images (:obj:`PIL.Image`) + candiates (:obj:`list`) + Return: + A :obj:`list`:. The list contains items that are dicts should be liked {"label": "XXX", "score": 0.82} + """ + inputs = data.pop("inputs", data) + + # decode base64 image to PIL + image = Image.open(BytesIO(base64.b64decode(inputs['image']))) + + # preprocess image + encoding = self.feature_extractor(images=image, return_tensors="pt") + pixel_values = encoding["pixel_values"].to(self.device) + with torch.no_grad(): + outputs = self.model(pixel_values=pixel_values) + logits = outputs.logits + upsampled_logits = nn.functional.interpolate(logits, + size=image.size[::-1], + mode="bilinear", + align_corners=False,) + pred_seg = upsampled_logits.argmax(dim=1)[0] + return pred_seg.tolist() diff --git a/segformer_b2_clothes/model.safetensors b/segformer_b2_clothes/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9313aacc3c6e9d4e23b754ecee24f4397d13cfb0 --- /dev/null +++ b/segformer_b2_clothes/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f86fd90c567afd4370b3cc3a7e81ed767a632b2832a738331af660acc0c4c68 +size 109493236 diff --git a/segformer_b2_clothes/onnx/config.json b/segformer_b2_clothes/onnx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db6261b5b36fac4aea5274b43f0e8d2d5a2bfdc2 --- /dev/null +++ b/segformer_b2_clothes/onnx/config.json @@ -0,0 +1,109 @@ +{ + "_name_or_path": "mattmdjaga/segformer_b2_clothes", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 4, + 6, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "Background", + "1": "Hat", + "2": "Hair", + "3": "Sunglasses", + "4": "Upper-clothes", + "5": "Skirt", + "6": "Pants", + "7": "Dress", + "8": "Belt", + "9": "Left-shoe", + "10": "Right-shoe", + "11": "Face", + "12": "Left-leg", + "13": "Right-leg", + "14": "Left-arm", + "15": "Right-arm", + "16": "Bag", + "17": "Scarf" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "Background": 0, + "Bag": 16, + "Belt": 8, + "Dress": 7, + "Face": 11, + "Hair": 2, + "Hat": 1, + "Left-arm": 14, + "Left-leg": 12, + "Left-shoe": 9, + "Pants": 6, + "Right-arm": 15, + "Right-leg": 13, + "Right-shoe": 10, + "Scarf": 17, + "Skirt": 5, + "Sunglasses": 3, + "Upper-clothes": 4 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.34.0" +} diff --git a/segformer_b2_clothes/onnx/model.onnx b/segformer_b2_clothes/onnx/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fd685ae8a7ef9da57e1da399c06d8c7b7c925cc3 --- /dev/null +++ b/segformer_b2_clothes/onnx/model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93a8dac171b5c1fcc53632a8bfc180bfd9759ea69a3e207451bb07f76add54f +size 110039290 diff --git a/segformer_b2_clothes/onnx/preprocessor_config.json b/segformer_b2_clothes/onnx/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9063ab0ff9e34e2ad7d3e6622f3755ff3c650367 --- /dev/null +++ b/segformer_b2_clothes/onnx/preprocessor_config.json @@ -0,0 +1,24 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "feature_extractor_type": "SegformerFeatureExtractor", + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/segformer_b2_clothes/optimizer.pt b/segformer_b2_clothes/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..924b784b7a4051188aac1caac738415b2e602b58 --- /dev/null +++ b/segformer_b2_clothes/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f642f5c29cb7c9ac0ff242ccf94220c88913f4a65db4727b2530a987ce14d9a +size 219104837 diff --git a/segformer_b2_clothes/preprocessor_config.json b/segformer_b2_clothes/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d230721d08201d422c6e3d11994083b55e7e66ca --- /dev/null +++ b/segformer_b2_clothes/preprocessor_config.json @@ -0,0 +1,18 @@ +{ + "do_normalize": true, + "do_resize": true, + "feature_extractor_type": "SegformerFeatureExtractor", + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "reduce_labels": false, + "resample": 2, + "size": 512 +} \ No newline at end of file diff --git a/segformer_b2_clothes/pytorch_model.bin b/segformer_b2_clothes/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b799f7a56b1851657bdd3990740f6c374bef283 --- /dev/null +++ b/segformer_b2_clothes/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934543143c97acf3197b030bb0ba046f6c713757467a7dcf47f27ce8c0d6264d +size 109579005 diff --git a/segformer_b2_clothes/rng_state.pth b/segformer_b2_clothes/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c958a22f2b13e60ae38c7444aa0af1ba79406a37 --- /dev/null +++ b/segformer_b2_clothes/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c38376dfee2c075efd2b37186139541f47970794c545ba17f510796313aaa8 +size 14575 diff --git a/segformer_b2_clothes/scheduler.pt b/segformer_b2_clothes/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4d6b1344f61853d19202ef8956512f6c215b635 --- /dev/null +++ b/segformer_b2_clothes/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9a297dec0fe2336eab64ac3bbd47e4936655c43239740a40cfe5f4623a0657 +size 627 diff --git a/segformer_b2_clothes/trainer_state.json b/segformer_b2_clothes/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..205f1598461cab0a4a5845aa80a06ada96501f15 --- /dev/null +++ b/segformer_b2_clothes/trainer_state.json @@ -0,0 +1,12226 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.993191103041307, + "global_step": 11000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 7.992737176577395e-05, + "loss": 2.2448, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 7.985474353154789e-05, + "loss": 1.2019, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 7.978211529732184e-05, + "loss": 0.8748, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 7.970948706309578e-05, + "loss": 0.6175, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 7.963685882886972e-05, + "loss": 0.5248, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 7.956423059464368e-05, + "loss": 0.4516, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 7.949160236041762e-05, + "loss": 0.4111, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 7.941897412619157e-05, + "loss": 0.3828, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 7.934634589196551e-05, + "loss": 0.3985, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 7.927371765773945e-05, + "loss": 0.3005, + "step": 100 + }, + { + "epoch": 0.05, + "eval_loss": 0.30581027269363403, + "eval_mean_accuracy": 0.5292847927678034, + "eval_mean_iou": 0.41129172532009384, + "eval_overall_accuracy": 0.9130313316088045, + "eval_per_category_accuracy": [ + 0.9867883273464839, + 0.5434349649443806, + 0.780135770881479, + 0.0, + 0.8113983778550663, + 0.5378490454382858, + 0.7404555846704737, + 0.2147733081885067, + 0.0, + 0.40405372405372403, + 0.25176572429125105, + 0.9192750649381457, + 0.5542416776073233, + 0.7225067314291748, + 0.6504408087601311, + 0.6555691161356629, + 0.754438043280372, + 0.0 + ], + "eval_per_category_iou": [ + 0.9692330591898085, + 0.41746377262267553, + 0.6905409848042081, + 0.0, + 0.6326664141511412, + 0.3040235250652781, + 0.50160063365291, + 0.19516779272624604, + 0.0, + 0.2624766014150195, + 0.18683524714023958, + 0.7177784569221793, + 0.4419055676444967, + 0.49617640216212294, + 0.5149447942797659, + 0.4906163771139591, + 0.5818214268716386, + 0.0 + ], + "eval_runtime": 80.836, + "eval_samples_per_second": 1.101, + "eval_steps_per_second": 0.148, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 7.92010894235134e-05, + "loss": 0.3054, + "step": 110 + }, + { + "epoch": 0.05, + "learning_rate": 7.912846118928734e-05, + "loss": 0.3008, + "step": 120 + }, + { + "epoch": 0.06, + "learning_rate": 7.905583295506128e-05, + "loss": 0.28, + "step": 130 + }, + { + "epoch": 0.06, + "learning_rate": 7.898320472083523e-05, + "loss": 0.2834, + "step": 140 + }, + { + "epoch": 0.07, + "learning_rate": 7.891057648660917e-05, + "loss": 0.2652, + "step": 150 + }, + { + "epoch": 0.07, + "learning_rate": 7.883794825238313e-05, + "loss": 0.2602, + "step": 160 + }, + { + "epoch": 0.08, + "learning_rate": 7.876532001815707e-05, + "loss": 0.2308, + "step": 170 + }, + { + "epoch": 0.08, + "learning_rate": 7.869269178393101e-05, + "loss": 0.2486, + "step": 180 + }, + { + "epoch": 0.09, + "learning_rate": 7.862006354970496e-05, + "loss": 0.2408, + "step": 190 + }, + { + "epoch": 0.09, + "learning_rate": 7.85474353154789e-05, + "loss": 0.2458, + "step": 200 + }, + { + "epoch": 0.09, + "eval_loss": 0.23244497179985046, + "eval_mean_accuracy": 0.5973882303923332, + "eval_mean_iou": 0.4794550446888198, + "eval_overall_accuracy": 0.926452679580517, + "eval_per_category_accuracy": [ + 0.9907792090081089, + 0.5952492754092521, + 0.8835458622207549, + 0.3454964731416169, + 0.7917318774601994, + 0.3479833420921308, + 0.7821879338963764, + 0.5380378725887068, + 0.015571650284531816, + 0.5074578754578755, + 0.43118101870979164, + 0.9046111025449427, + 0.6666366922648058, + 0.7831467891522799, + 0.6957018451457148, + 0.7076567317574511, + 0.7660125959274607, + 0.0 + ], + "eval_per_category_iou": [ + 0.9718178183715939, + 0.4710299054403813, + 0.7456040814786654, + 0.30835351089588375, + 0.6790629628823184, + 0.2858956985209414, + 0.5603084033173548, + 0.39362178720525015, + 0.015510666804081212, + 0.3084005627682499, + 0.29075652548837516, + 0.7718823584975677, + 0.5398866830767438, + 0.5739773379508709, + 0.5774116215829398, + 0.5246370047954892, + 0.6120338753220498, + 0.0 + ], + "eval_runtime": 79.6125, + "eval_samples_per_second": 1.118, + "eval_steps_per_second": 0.151, + "step": 200 + }, + { + "epoch": 0.1, + "learning_rate": 7.847480708125284e-05, + "loss": 0.2293, + "step": 210 + }, + { + "epoch": 0.1, + "learning_rate": 7.840217884702679e-05, + "loss": 0.2194, + "step": 220 + }, + { + "epoch": 0.1, + "learning_rate": 7.832955061280073e-05, + "loss": 0.2144, + "step": 230 + }, + { + "epoch": 0.11, + "learning_rate": 7.825692237857467e-05, + "loss": 0.219, + "step": 240 + }, + { + "epoch": 0.11, + "learning_rate": 7.818429414434862e-05, + "loss": 0.2142, + "step": 250 + }, + { + "epoch": 0.12, + "learning_rate": 7.811166591012257e-05, + "loss": 0.1925, + "step": 260 + }, + { + "epoch": 0.12, + "learning_rate": 7.803903767589652e-05, + "loss": 0.2189, + "step": 270 + }, + { + "epoch": 0.13, + "learning_rate": 7.796640944167046e-05, + "loss": 0.2039, + "step": 280 + }, + { + "epoch": 0.13, + "learning_rate": 7.78937812074444e-05, + "loss": 0.2164, + "step": 290 + }, + { + "epoch": 0.14, + "learning_rate": 7.782115297321835e-05, + "loss": 0.1981, + "step": 300 + }, + { + "epoch": 0.14, + "eval_loss": 0.18461237847805023, + "eval_mean_accuracy": 0.6624681867698782, + "eval_mean_iou": 0.5471385687290596, + "eval_overall_accuracy": 0.9407998417200667, + "eval_per_category_accuracy": [ + 0.9879307267213177, + 0.60820285302547, + 0.8681323594145482, + 0.5095632121540966, + 0.8110457078204237, + 0.792409876390551, + 0.8195600051502087, + 0.6689390942443275, + 0.08535954474909467, + 0.4324884004884005, + 0.5378032225697928, + 0.9206627842155828, + 0.7629039800010791, + 0.7839079246079932, + 0.7553457492671151, + 0.712108170606372, + 0.8469511498952397, + 0.02111260053619303 + ], + "eval_per_category_iou": [ + 0.9761332103456535, + 0.506465313827277, + 0.7501968813986455, + 0.4387153284671533, + 0.7108649071563323, + 0.5753079215555509, + 0.6404749829013227, + 0.5486831870207801, + 0.08104125736738703, + 0.31538757425954156, + 0.3560139873801012, + 0.7866483347577722, + 0.6376416739319966, + 0.665543581794072, + 0.61706426945715, + 0.5785792942920218, + 0.6433084891951437, + 0.020424042015172145 + ], + "eval_runtime": 80.127, + "eval_samples_per_second": 1.111, + "eval_steps_per_second": 0.15, + "step": 300 + }, + { + "epoch": 0.14, + "learning_rate": 7.774852473899229e-05, + "loss": 0.1953, + "step": 310 + }, + { + "epoch": 0.15, + "learning_rate": 7.767589650476623e-05, + "loss": 0.216, + "step": 320 + }, + { + "epoch": 0.15, + "learning_rate": 7.760326827054018e-05, + "loss": 0.2082, + "step": 330 + }, + { + "epoch": 0.15, + "learning_rate": 7.753064003631412e-05, + "loss": 0.1968, + "step": 340 + }, + { + "epoch": 0.16, + "learning_rate": 7.745801180208806e-05, + "loss": 0.2073, + "step": 350 + }, + { + "epoch": 0.16, + "learning_rate": 7.738538356786201e-05, + "loss": 0.1935, + "step": 360 + }, + { + "epoch": 0.17, + "learning_rate": 7.731275533363596e-05, + "loss": 0.2165, + "step": 370 + }, + { + "epoch": 0.17, + "learning_rate": 7.724012709940991e-05, + "loss": 0.1938, + "step": 380 + }, + { + "epoch": 0.18, + "learning_rate": 7.716749886518385e-05, + "loss": 0.1906, + "step": 390 + }, + { + "epoch": 0.18, + "learning_rate": 7.70948706309578e-05, + "loss": 0.213, + "step": 400 + }, + { + "epoch": 0.18, + "eval_loss": 0.19584733247756958, + "eval_mean_accuracy": 0.6836090641053443, + "eval_mean_iou": 0.5613970854128544, + "eval_overall_accuracy": 0.937813962443491, + "eval_per_category_accuracy": [ + 0.9872100638090652, + 0.6182904515940997, + 0.8384488142401233, + 0.6422951709169832, + 0.7890436261066395, + 0.5009693559854972, + 0.8500088736702545, + 0.8491183956444536, + 0.09529229177444387, + 0.4957167277167277, + 0.40987428436522455, + 0.9088928688625051, + 0.7835323633616891, + 0.7806346021857347, + 0.7904002845318159, + 0.7346993833504625, + 0.8303345233944204, + 0.40020107238605895 + ], + "eval_per_category_iou": [ + 0.9775908413156985, + 0.5344072327264076, + 0.7521034650296529, + 0.5101820924469346, + 0.7105627872001641, + 0.40911795785441996, + 0.6171770972037284, + 0.5161374621460837, + 0.09092704116892092, + 0.3422119125808344, + 0.3093433312298155, + 0.7932841949798822, + 0.6599897744702607, + 0.6609991431658161, + 0.6341190305315129, + 0.5955388476397755, + 0.6818525050425194, + 0.3096028206989526 + ], + "eval_runtime": 80.4083, + "eval_samples_per_second": 1.107, + "eval_steps_per_second": 0.149, + "step": 400 + }, + { + "epoch": 0.19, + "learning_rate": 7.702224239673174e-05, + "loss": 0.2075, + "step": 410 + }, + { + "epoch": 0.19, + "learning_rate": 7.694961416250568e-05, + "loss": 0.184, + "step": 420 + }, + { + "epoch": 0.2, + "learning_rate": 7.687698592827962e-05, + "loss": 0.2022, + "step": 430 + }, + { + "epoch": 0.2, + "learning_rate": 7.680435769405357e-05, + "loss": 0.2174, + "step": 440 + }, + { + "epoch": 0.2, + "learning_rate": 7.673172945982751e-05, + "loss": 0.1752, + "step": 450 + }, + { + "epoch": 0.21, + "learning_rate": 7.665910122560145e-05, + "loss": 0.1899, + "step": 460 + }, + { + "epoch": 0.21, + "learning_rate": 7.658647299137541e-05, + "loss": 0.172, + "step": 470 + }, + { + "epoch": 0.22, + "learning_rate": 7.651384475714935e-05, + "loss": 0.1834, + "step": 480 + }, + { + "epoch": 0.22, + "learning_rate": 7.64412165229233e-05, + "loss": 0.1858, + "step": 490 + }, + { + "epoch": 0.23, + "learning_rate": 7.636858828869724e-05, + "loss": 0.2084, + "step": 500 + }, + { + "epoch": 0.23, + "eval_loss": 0.1708747297525406, + "eval_mean_accuracy": 0.6622155280724535, + "eval_mean_iou": 0.5659747611486945, + "eval_overall_accuracy": 0.9442667586080143, + "eval_per_category_accuracy": [ + 0.9866500967447802, + 0.5469648148448, + 0.8757806481787168, + 0.4290558871405317, + 0.9149488868594733, + 0.7030833911119551, + 0.8079024383453911, + 0.6823532812011712, + 0.04759441282979824, + 0.4875702075702076, + 0.5283204469889237, + 0.8952884756042114, + 0.7860142438357642, + 0.8228842194181933, + 0.7062640110363856, + 0.7089606885919836, + 0.8114900037954411, + 0.17875335120643432 + ], + "eval_per_category_iou": [ + 0.9778258858246885, + 0.5132802528414271, + 0.7644472861480458, + 0.391654284299158, + 0.7185696664929735, + 0.5427059364933272, + 0.6772939108618439, + 0.5690865593212168, + 0.0467979042677654, + 0.3500497917163415, + 0.38005066587412784, + 0.7938747177854749, + 0.6864153225173156, + 0.7103305267235585, + 0.6054474564369792, + 0.5835536333643516, + 0.7060943062886968, + 0.17006759341920674 + ], + "eval_runtime": 82.1375, + "eval_samples_per_second": 1.084, + "eval_steps_per_second": 0.146, + "step": 500 + }, + { + "epoch": 0.23, + "learning_rate": 7.629596005447118e-05, + "loss": 0.189, + "step": 510 + }, + { + "epoch": 0.24, + "learning_rate": 7.622333182024513e-05, + "loss": 0.1882, + "step": 520 + }, + { + "epoch": 0.24, + "learning_rate": 7.615070358601907e-05, + "loss": 0.1683, + "step": 530 + }, + { + "epoch": 0.25, + "learning_rate": 7.607807535179301e-05, + "loss": 0.2009, + "step": 540 + }, + { + "epoch": 0.25, + "learning_rate": 7.600544711756696e-05, + "loss": 0.1955, + "step": 550 + }, + { + "epoch": 0.25, + "learning_rate": 7.59328188833409e-05, + "loss": 0.159, + "step": 560 + }, + { + "epoch": 0.26, + "learning_rate": 7.586019064911484e-05, + "loss": 0.1591, + "step": 570 + }, + { + "epoch": 0.26, + "learning_rate": 7.57875624148888e-05, + "loss": 0.1813, + "step": 580 + }, + { + "epoch": 0.27, + "learning_rate": 7.571493418066274e-05, + "loss": 0.1742, + "step": 590 + }, + { + "epoch": 0.27, + "learning_rate": 7.564230594643669e-05, + "loss": 0.1768, + "step": 600 + }, + { + "epoch": 0.27, + "eval_loss": 0.16358888149261475, + "eval_mean_accuracy": 0.7168573384583677, + "eval_mean_iou": 0.5947241030305628, + "eval_overall_accuracy": 0.9456647380014483, + "eval_per_category_accuracy": [ + 0.9883615047521485, + 0.6133680920999368, + 0.8897205485858919, + 0.606348345089528, + 0.8547445440779222, + 0.6468916573558442, + 0.8031315356321359, + 0.7755932607529358, + 0.18939472322814277, + 0.6884297924297924, + 0.44591670109583115, + 0.912668572309663, + 0.8122943006672302, + 0.8121667282614434, + 0.7158238489394724, + 0.739613309352518, + 0.8623828581831872, + 0.5465817694369973 + ], + "eval_per_category_iou": [ + 0.9789898289726563, + 0.5284958912900931, + 0.7700365647308839, + 0.5212220149253731, + 0.7458776898298387, + 0.5271602805638629, + 0.6717823107866594, + 0.5744670602921285, + 0.1729007273070747, + 0.40336528831020174, + 0.34301128968794453, + 0.8029663743656279, + 0.7079338085180584, + 0.7184545748212611, + 0.6150804308165325, + 0.5833481100196065, + 0.6760090852137903, + 0.3639325240985362 + ], + "eval_runtime": 81.4207, + "eval_samples_per_second": 1.093, + "eval_steps_per_second": 0.147, + "step": 600 + }, + { + "epoch": 0.28, + "learning_rate": 7.556967771221063e-05, + "loss": 0.192, + "step": 610 + }, + { + "epoch": 0.28, + "learning_rate": 7.549704947798457e-05, + "loss": 0.1611, + "step": 620 + }, + { + "epoch": 0.29, + "learning_rate": 7.542442124375852e-05, + "loss": 0.1801, + "step": 630 + }, + { + "epoch": 0.29, + "learning_rate": 7.535179300953246e-05, + "loss": 0.17, + "step": 640 + }, + { + "epoch": 0.3, + "learning_rate": 7.52791647753064e-05, + "loss": 0.1827, + "step": 650 + }, + { + "epoch": 0.3, + "learning_rate": 7.520653654108035e-05, + "loss": 0.191, + "step": 660 + }, + { + "epoch": 0.3, + "learning_rate": 7.513390830685429e-05, + "loss": 0.1678, + "step": 670 + }, + { + "epoch": 0.31, + "learning_rate": 7.506128007262825e-05, + "loss": 0.2309, + "step": 680 + }, + { + "epoch": 0.31, + "learning_rate": 7.498865183840219e-05, + "loss": 0.1681, + "step": 690 + }, + { + "epoch": 0.32, + "learning_rate": 7.491602360417613e-05, + "loss": 0.1672, + "step": 700 + }, + { + "epoch": 0.32, + "eval_loss": 0.1567693054676056, + "eval_mean_accuracy": 0.7141943900361636, + "eval_mean_iou": 0.596780275848511, + "eval_overall_accuracy": 0.9485598360554556, + "eval_per_category_accuracy": [ + 0.9900870575442013, + 0.6031185738111045, + 0.8562610047320348, + 0.5413049376017363, + 0.8634782733988677, + 0.7461214720079138, + 0.822484836463477, + 0.7418608978075951, + 0.21919296430419039, + 0.47965811965811966, + 0.5514470085974542, + 0.927099271348539, + 0.7968634785892847, + 0.868974710944512, + 0.7993457923779962, + 0.7131680369989722, + 0.8430754799073048, + 0.49195710455764075 + ], + "eval_per_category_iou": [ + 0.9792376762603316, + 0.5245458386142797, + 0.7704556601277661, + 0.48261474269819193, + 0.7415891427416451, + 0.6085604990922975, + 0.6909641574493773, + 0.6132853682648054, + 0.19095047095407636, + 0.3546614086785694, + 0.40903154254193635, + 0.7993550454399804, + 0.6968145754208833, + 0.7112413080349587, + 0.6509687753713822, + 0.6050298356992998, + 0.6898486321344258, + 0.2228902857489903 + ], + "eval_runtime": 86.481, + "eval_samples_per_second": 1.029, + "eval_steps_per_second": 0.139, + "step": 700 + }, + { + "epoch": 0.32, + "learning_rate": 7.484339536995008e-05, + "loss": 0.194, + "step": 710 + }, + { + "epoch": 0.33, + "learning_rate": 7.477076713572402e-05, + "loss": 0.1855, + "step": 720 + }, + { + "epoch": 0.33, + "learning_rate": 7.469813890149796e-05, + "loss": 0.1533, + "step": 730 + }, + { + "epoch": 0.34, + "learning_rate": 7.462551066727191e-05, + "loss": 0.1814, + "step": 740 + }, + { + "epoch": 0.34, + "learning_rate": 7.455288243304585e-05, + "loss": 0.1775, + "step": 750 + }, + { + "epoch": 0.34, + "learning_rate": 7.44802541988198e-05, + "loss": 0.1746, + "step": 760 + }, + { + "epoch": 0.35, + "learning_rate": 7.440762596459374e-05, + "loss": 0.1722, + "step": 770 + }, + { + "epoch": 0.35, + "learning_rate": 7.43349977303677e-05, + "loss": 0.1586, + "step": 780 + }, + { + "epoch": 0.36, + "learning_rate": 7.426236949614164e-05, + "loss": 0.1738, + "step": 790 + }, + { + "epoch": 0.36, + "learning_rate": 7.418974126191558e-05, + "loss": 0.1745, + "step": 800 + }, + { + "epoch": 0.36, + "eval_loss": 0.15606097877025604, + "eval_mean_accuracy": 0.7226644915074127, + "eval_mean_iou": 0.6052765335352305, + "eval_overall_accuracy": 0.9491321263688334, + "eval_per_category_accuracy": [ + 0.9890046209624822, + 0.6175132369371266, + 0.8928362633432376, + 0.6507053716766142, + 0.8532096746859076, + 0.5752365979358679, + 0.8398441713905911, + 0.8413509563885382, + 0.23833419555095706, + 0.6422954822954823, + 0.5084695744555273, + 0.9224774940399236, + 0.8665719475567865, + 0.8431225032117277, + 0.8245656578720469, + 0.7727517985611511, + 0.8572852412882283, + 0.27238605898123325 + ], + "eval_per_category_iou": [ + 0.9807662339083965, + 0.5198184445110814, + 0.7770744008201076, + 0.554374205477869, + 0.7651092891972128, + 0.5236022173180742, + 0.7019472398347972, + 0.5826306354990312, + 0.2023276240667545, + 0.4164344521849272, + 0.37874236140623396, + 0.8109587612741775, + 0.710578901186412, + 0.707150659050318, + 0.6494664640616643, + 0.6240312892282475, + 0.7346238867852686, + 0.25534053782357374 + ], + "eval_runtime": 83.6193, + "eval_samples_per_second": 1.064, + "eval_steps_per_second": 0.144, + "step": 800 + }, + { + "epoch": 0.37, + "learning_rate": 7.411711302768953e-05, + "loss": 0.1652, + "step": 810 + }, + { + "epoch": 0.37, + "learning_rate": 7.404448479346347e-05, + "loss": 0.1851, + "step": 820 + }, + { + "epoch": 0.38, + "learning_rate": 7.397185655923741e-05, + "loss": 0.1703, + "step": 830 + }, + { + "epoch": 0.38, + "learning_rate": 7.389922832501136e-05, + "loss": 0.1454, + "step": 840 + }, + { + "epoch": 0.39, + "learning_rate": 7.38266000907853e-05, + "loss": 0.1894, + "step": 850 + }, + { + "epoch": 0.39, + "learning_rate": 7.375397185655924e-05, + "loss": 0.1734, + "step": 860 + }, + { + "epoch": 0.39, + "learning_rate": 7.368134362233319e-05, + "loss": 0.1696, + "step": 870 + }, + { + "epoch": 0.4, + "learning_rate": 7.360871538810713e-05, + "loss": 0.1804, + "step": 880 + }, + { + "epoch": 0.4, + "learning_rate": 7.353608715388109e-05, + "loss": 0.1661, + "step": 890 + }, + { + "epoch": 0.41, + "learning_rate": 7.346345891965503e-05, + "loss": 0.1339, + "step": 900 + }, + { + "epoch": 0.41, + "eval_loss": 0.15481343865394592, + "eval_mean_accuracy": 0.719634431340832, + "eval_mean_iou": 0.6087190770313099, + "eval_overall_accuracy": 0.9506247874056355, + "eval_per_category_accuracy": [ + 0.9919838454355556, + 0.5911527064881232, + 0.8761297045229449, + 0.668407487791644, + 0.8954285318291567, + 0.6038513683596907, + 0.8717180469573751, + 0.737148049198599, + 0.25064666321779616, + 0.3186715506715507, + 0.7722658324971965, + 0.9177924588725077, + 0.7597341870042983, + 0.823957728384633, + 0.7929276599413692, + 0.7448484069886948, + 0.8559244367781333, + 0.48083109919571043 + ], + "eval_per_category_iou": [ + 0.9811278355168047, + 0.5256497012454107, + 0.77428604860021, + 0.5607396870554765, + 0.7664850267467621, + 0.5611307522001325, + 0.7133819452453877, + 0.5870733942446437, + 0.19933349790175264, + 0.28642920482181583, + 0.44613601104727485, + 0.8111479557072823, + 0.6859910442065434, + 0.7038314824210309, + 0.6581561679496527, + 0.6242086904095431, + 0.7302158936248069, + 0.3416190476190476 + ], + "eval_runtime": 76.1804, + "eval_samples_per_second": 1.168, + "eval_steps_per_second": 0.158, + "step": 900 + }, + { + "epoch": 0.41, + "learning_rate": 7.339083068542897e-05, + "loss": 0.1726, + "step": 910 + }, + { + "epoch": 0.42, + "learning_rate": 7.331820245120292e-05, + "loss": 0.1685, + "step": 920 + }, + { + "epoch": 0.42, + "learning_rate": 7.324557421697686e-05, + "loss": 0.1719, + "step": 930 + }, + { + "epoch": 0.43, + "learning_rate": 7.31729459827508e-05, + "loss": 0.1705, + "step": 940 + }, + { + "epoch": 0.43, + "learning_rate": 7.310031774852475e-05, + "loss": 0.1648, + "step": 950 + }, + { + "epoch": 0.44, + "learning_rate": 7.302768951429869e-05, + "loss": 0.1671, + "step": 960 + }, + { + "epoch": 0.44, + "learning_rate": 7.295506128007263e-05, + "loss": 0.1721, + "step": 970 + }, + { + "epoch": 0.44, + "learning_rate": 7.288243304584658e-05, + "loss": 0.1781, + "step": 980 + }, + { + "epoch": 0.45, + "learning_rate": 7.280980481162053e-05, + "loss": 0.157, + "step": 990 + }, + { + "epoch": 0.45, + "learning_rate": 7.273717657739448e-05, + "loss": 0.1525, + "step": 1000 + }, + { + "epoch": 0.45, + "eval_loss": 0.1561342477798462, + "eval_mean_accuracy": 0.709750323770174, + "eval_mean_iou": 0.6044507479852305, + "eval_overall_accuracy": 0.9485302614362052, + "eval_per_category_accuracy": [ + 0.9892216740702133, + 0.615634968182775, + 0.8966913585341697, + 0.5729788388497016, + 0.9087696138400346, + 0.5404590198684855, + 0.9021390765119168, + 0.643297512793695, + 0.20713916192446974, + 0.5912087912087912, + 0.6094158846327884, + 0.9404664476363306, + 0.8413754653525889, + 0.8410942752054625, + 0.7898613985169857, + 0.7273830935251798, + 0.8567606681210714, + 0.30160857908847183 + ], + "eval_per_category_iou": [ + 0.9809378800002442, + 0.5447368798085878, + 0.7765388584370854, + 0.5160659743433109, + 0.7594886697914242, + 0.49892168893273703, + 0.6637198728091062, + 0.5417948413676431, + 0.17752948479205463, + 0.4290757768026145, + 0.45001343822412054, + 0.7856182729113205, + 0.742127889084535, + 0.749399262258774, + 0.6515890409740961, + 0.6199102206164121, + 0.7204954276995257, + 0.2721499848805564 + ], + "eval_runtime": 76.0709, + "eval_samples_per_second": 1.17, + "eval_steps_per_second": 0.158, + "step": 1000 + }, + { + "epoch": 0.46, + "learning_rate": 7.266454834316842e-05, + "loss": 0.1442, + "step": 1010 + }, + { + "epoch": 0.46, + "learning_rate": 7.259192010894236e-05, + "loss": 0.17, + "step": 1020 + }, + { + "epoch": 0.47, + "learning_rate": 7.25192918747163e-05, + "loss": 0.1451, + "step": 1030 + }, + { + "epoch": 0.47, + "learning_rate": 7.244666364049025e-05, + "loss": 0.1649, + "step": 1040 + }, + { + "epoch": 0.48, + "learning_rate": 7.237403540626419e-05, + "loss": 0.1765, + "step": 1050 + }, + { + "epoch": 0.48, + "learning_rate": 7.230140717203814e-05, + "loss": 0.1611, + "step": 1060 + }, + { + "epoch": 0.49, + "learning_rate": 7.222877893781208e-05, + "loss": 0.1544, + "step": 1070 + }, + { + "epoch": 0.49, + "learning_rate": 7.215615070358602e-05, + "loss": 0.1671, + "step": 1080 + }, + { + "epoch": 0.49, + "learning_rate": 7.208352246935997e-05, + "loss": 0.1831, + "step": 1090 + }, + { + "epoch": 0.5, + "learning_rate": 7.201089423513392e-05, + "loss": 0.1444, + "step": 1100 + }, + { + "epoch": 0.5, + "eval_loss": 0.1541578769683838, + "eval_mean_accuracy": 0.7261158297983399, + "eval_mean_iou": 0.6067205446661403, + "eval_overall_accuracy": 0.9484547818644663, + "eval_per_category_accuracy": [ + 0.9907627899639259, + 0.6337861688175003, + 0.8650957411687026, + 0.5448996201844818, + 0.8902592399206012, + 0.6789908098869895, + 0.7145583870103387, + 0.7030056377157948, + 0.2124159337816865, + 0.5363516483516484, + 0.5988805603100592, + 0.9369991262508253, + 0.8605920543855547, + 0.8890370096615807, + 0.8072943610967408, + 0.7692381808838643, + 0.8745128412425596, + 0.5634048257372655 + ], + "eval_per_category_iou": [ + 0.981575441938742, + 0.5338588906013448, + 0.775538517031844, + 0.4985417313062364, + 0.7595387897122733, + 0.5114676877169579, + 0.6549842187273229, + 0.5485690202468412, + 0.1797644586489208, + 0.4145013965426134, + 0.44945222064729506, + 0.8091983064736411, + 0.7258117726467789, + 0.7419751635810855, + 0.6691561400060748, + 0.6295969170754275, + 0.7225487021678552, + 0.31489042891927327 + ], + "eval_runtime": 72.9643, + "eval_samples_per_second": 1.22, + "eval_steps_per_second": 0.164, + "step": 1100 + }, + { + "epoch": 0.5, + "learning_rate": 7.193826600090787e-05, + "loss": 0.144, + "step": 1110 + }, + { + "epoch": 0.51, + "learning_rate": 7.186563776668181e-05, + "loss": 0.1601, + "step": 1120 + }, + { + "epoch": 0.51, + "learning_rate": 7.179300953245575e-05, + "loss": 0.1434, + "step": 1130 + }, + { + "epoch": 0.52, + "learning_rate": 7.17203812982297e-05, + "loss": 0.1475, + "step": 1140 + }, + { + "epoch": 0.52, + "learning_rate": 7.164775306400364e-05, + "loss": 0.1512, + "step": 1150 + }, + { + "epoch": 0.53, + "learning_rate": 7.157512482977758e-05, + "loss": 0.1462, + "step": 1160 + }, + { + "epoch": 0.53, + "learning_rate": 7.150249659555153e-05, + "loss": 0.1585, + "step": 1170 + }, + { + "epoch": 0.54, + "learning_rate": 7.142986836132547e-05, + "loss": 0.1654, + "step": 1180 + }, + { + "epoch": 0.54, + "learning_rate": 7.135724012709941e-05, + "loss": 0.1471, + "step": 1190 + }, + { + "epoch": 0.54, + "learning_rate": 7.128461189287337e-05, + "loss": 0.1752, + "step": 1200 + }, + { + "epoch": 0.54, + "eval_loss": 0.15197259187698364, + "eval_mean_accuracy": 0.7405501900221605, + "eval_mean_iou": 0.6187261085826632, + "eval_overall_accuracy": 0.9496975587994865, + "eval_per_category_accuracy": [ + 0.9902535777152744, + 0.6046406191810101, + 0.8918200451194014, + 0.7129001627780792, + 0.8910963166565234, + 0.6483997897888615, + 0.88835182884604, + 0.6013659854803077, + 0.3084324883600621, + 0.6496898656898656, + 0.5727439060378917, + 0.9107629293132964, + 0.8787745265543226, + 0.8375877725568872, + 0.7801830056906364, + 0.7421184480986639, + 0.8873371123172865, + 0.5334450402144773 + ], + "eval_per_category_iou": [ + 0.9817709454707259, + 0.5506045414332056, + 0.7855775347611402, + 0.5654115115653577, + 0.7486948450993278, + 0.559655111294076, + 0.716737769654396, + 0.4881858588110045, + 0.24148406172789502, + 0.4524287298229384, + 0.4322109388918582, + 0.817335769660629, + 0.7500805919285265, + 0.7571950171821306, + 0.6612331582553095, + 0.6318837884696372, + 0.7427932302859976, + 0.2537865501737827 + ], + "eval_runtime": 75.6612, + "eval_samples_per_second": 1.176, + "eval_steps_per_second": 0.159, + "step": 1200 + }, + { + "epoch": 0.55, + "learning_rate": 7.121198365864731e-05, + "loss": 0.1617, + "step": 1210 + }, + { + "epoch": 0.55, + "learning_rate": 7.113935542442126e-05, + "loss": 0.1982, + "step": 1220 + }, + { + "epoch": 0.56, + "learning_rate": 7.10667271901952e-05, + "loss": 0.1574, + "step": 1230 + }, + { + "epoch": 0.56, + "learning_rate": 7.099409895596914e-05, + "loss": 0.1615, + "step": 1240 + }, + { + "epoch": 0.57, + "learning_rate": 7.092147072174309e-05, + "loss": 0.1692, + "step": 1250 + }, + { + "epoch": 0.57, + "learning_rate": 7.084884248751703e-05, + "loss": 0.1685, + "step": 1260 + }, + { + "epoch": 0.58, + "learning_rate": 7.077621425329097e-05, + "loss": 0.1412, + "step": 1270 + }, + { + "epoch": 0.58, + "learning_rate": 7.070358601906492e-05, + "loss": 0.144, + "step": 1280 + }, + { + "epoch": 0.59, + "learning_rate": 7.063095778483886e-05, + "loss": 0.1503, + "step": 1290 + }, + { + "epoch": 0.59, + "learning_rate": 7.055832955061282e-05, + "loss": 0.1718, + "step": 1300 + }, + { + "epoch": 0.59, + "eval_loss": 0.15203571319580078, + "eval_mean_accuracy": 0.7414586755906867, + "eval_mean_iou": 0.6240856832008199, + "eval_overall_accuracy": 0.9509381497843882, + "eval_per_category_accuracy": [ + 0.9905460696746577, + 0.6256416068913033, + 0.9004415648728954, + 0.6867878459034183, + 0.8565435729316829, + 0.6157419879084434, + 0.8422365824885947, + 0.819614771140531, + 0.2132436627004656, + 0.759931623931624, + 0.5529815656416612, + 0.9297837767313873, + 0.7906048234807475, + 0.8327965788501135, + 0.7729942662528022, + 0.7829522096608428, + 0.8637621770403582, + 0.5096514745308312 + ], + "eval_per_category_iou": [ + 0.9817290567284135, + 0.5610588371957949, + 0.7816651963835514, + 0.5702861004730795, + 0.765167781080122, + 0.5597764555681353, + 0.7079144644669532, + 0.5854913989220103, + 0.1928872250818905, + 0.43874104026031885, + 0.38286033413017867, + 0.8100921792328042, + 0.7233445636032301, + 0.7350904649654567, + 0.6550626552681572, + 0.6321275768183586, + 0.7493401292436516, + 0.4009068381926504 + ], + "eval_runtime": 73.6261, + "eval_samples_per_second": 1.209, + "eval_steps_per_second": 0.163, + "step": 1300 + }, + { + "epoch": 0.59, + "learning_rate": 7.048570131638676e-05, + "loss": 0.1693, + "step": 1310 + }, + { + "epoch": 0.6, + "learning_rate": 7.04130730821607e-05, + "loss": 0.1236, + "step": 1320 + }, + { + "epoch": 0.6, + "learning_rate": 7.034044484793465e-05, + "loss": 0.1621, + "step": 1330 + }, + { + "epoch": 0.61, + "learning_rate": 7.026781661370859e-05, + "loss": 0.1611, + "step": 1340 + }, + { + "epoch": 0.61, + "learning_rate": 7.019518837948253e-05, + "loss": 0.1957, + "step": 1350 + }, + { + "epoch": 0.62, + "learning_rate": 7.012256014525648e-05, + "loss": 0.168, + "step": 1360 + }, + { + "epoch": 0.62, + "learning_rate": 7.004993191103042e-05, + "loss": 0.1436, + "step": 1370 + }, + { + "epoch": 0.63, + "learning_rate": 6.997730367680436e-05, + "loss": 0.1626, + "step": 1380 + }, + { + "epoch": 0.63, + "learning_rate": 6.99046754425783e-05, + "loss": 0.1659, + "step": 1390 + }, + { + "epoch": 0.64, + "learning_rate": 6.983204720835225e-05, + "loss": 0.1539, + "step": 1400 + }, + { + "epoch": 0.64, + "eval_loss": 0.14264430105686188, + "eval_mean_accuracy": 0.7458751803493104, + "eval_mean_iou": 0.6292022953035405, + "eval_overall_accuracy": 0.9526165308577291, + "eval_per_category_accuracy": [ + 0.9905214411083831, + 0.6409592124224809, + 0.8682527236711786, + 0.665491047205643, + 0.9027220774872329, + 0.6931469402355601, + 0.900312144400714, + 0.6993621667904037, + 0.22519399896533884, + 0.5980268620268621, + 0.7019024572586515, + 0.9121901847524838, + 0.8336016042299876, + 0.8247628601094628, + 0.7937198223831695, + 0.7335495889003083, + 0.8290354333745792, + 0.6130026809651474 + ], + "eval_per_category_iou": [ + 0.9813920975948569, + 0.5516996278797508, + 0.7827110010897079, + 0.5675941458899751, + 0.7729552750191524, + 0.597299971458472, + 0.7236993928611889, + 0.5609767053003881, + 0.20242745535714285, + 0.4380612339813536, + 0.48269890341827726, + 0.8207891170014835, + 0.7432858798013126, + 0.7517122463710001, + 0.6698090469628962, + 0.6459694434545526, + 0.7570778443113773, + 0.2754819277108434 + ], + "eval_runtime": 74.8707, + "eval_samples_per_second": 1.189, + "eval_steps_per_second": 0.16, + "step": 1400 + }, + { + "epoch": 0.64, + "learning_rate": 6.97594189741262e-05, + "loss": 0.1324, + "step": 1410 + }, + { + "epoch": 0.64, + "learning_rate": 6.968679073990015e-05, + "loss": 0.1607, + "step": 1420 + }, + { + "epoch": 0.65, + "learning_rate": 6.96141625056741e-05, + "loss": 0.1534, + "step": 1430 + }, + { + "epoch": 0.65, + "learning_rate": 6.954153427144804e-05, + "loss": 0.1372, + "step": 1440 + }, + { + "epoch": 0.66, + "learning_rate": 6.946890603722198e-05, + "loss": 0.1451, + "step": 1450 + }, + { + "epoch": 0.66, + "learning_rate": 6.939627780299592e-05, + "loss": 0.1243, + "step": 1460 + }, + { + "epoch": 0.67, + "learning_rate": 6.932364956876987e-05, + "loss": 0.1668, + "step": 1470 + }, + { + "epoch": 0.67, + "learning_rate": 6.925102133454381e-05, + "loss": 0.158, + "step": 1480 + }, + { + "epoch": 0.68, + "learning_rate": 6.917839310031775e-05, + "loss": 0.1576, + "step": 1490 + }, + { + "epoch": 0.68, + "learning_rate": 6.91057648660917e-05, + "loss": 0.1409, + "step": 1500 + }, + { + "epoch": 0.68, + "eval_loss": 0.1411726325750351, + "eval_mean_accuracy": 0.743708398966615, + "eval_mean_iou": 0.6416885840188883, + "eval_overall_accuracy": 0.9534817384869865, + "eval_per_category_accuracy": [ + 0.9921596290538537, + 0.6701695299470523, + 0.8661704220314735, + 0.6604720564297342, + 0.8799426122145962, + 0.760381383065638, + 0.8176756551726538, + 0.7018658111645197, + 0.2421107087428867, + 0.5839120879120879, + 0.5768852426764248, + 0.9228965773544773, + 0.8362993003974606, + 0.8946421343470073, + 0.8340877737540955, + 0.7522160842754368, + 0.9064593471224076, + 0.4884048257372654 + ], + "eval_per_category_iou": [ + 0.9811417721668967, + 0.5928042509918503, + 0.7835484031532512, + 0.5689413414349147, + 0.7742403844412381, + 0.6280709561867798, + 0.7250641832679158, + 0.5495596286935109, + 0.20547043069763357, + 0.46297902661172896, + 0.47025475306513564, + 0.8225947408853524, + 0.746202851572216, + 0.7547453437358493, + 0.6814659575217499, + 0.6435401439797769, + 0.7531297189341907, + 0.406640625 + ], + "eval_runtime": 75.8092, + "eval_samples_per_second": 1.174, + "eval_steps_per_second": 0.158, + "step": 1500 + }, + { + "epoch": 0.69, + "learning_rate": 6.903313663186565e-05, + "loss": 0.149, + "step": 1510 + }, + { + "epoch": 0.69, + "learning_rate": 6.89605083976396e-05, + "loss": 0.1326, + "step": 1520 + }, + { + "epoch": 0.69, + "learning_rate": 6.888788016341354e-05, + "loss": 0.1549, + "step": 1530 + }, + { + "epoch": 0.7, + "learning_rate": 6.881525192918748e-05, + "loss": 0.1538, + "step": 1540 + }, + { + "epoch": 0.7, + "learning_rate": 6.874262369496143e-05, + "loss": 0.1435, + "step": 1550 + }, + { + "epoch": 0.71, + "learning_rate": 6.866999546073537e-05, + "loss": 0.1432, + "step": 1560 + }, + { + "epoch": 0.71, + "learning_rate": 6.859736722650931e-05, + "loss": 0.1504, + "step": 1570 + }, + { + "epoch": 0.72, + "learning_rate": 6.852473899228326e-05, + "loss": 0.1373, + "step": 1580 + }, + { + "epoch": 0.72, + "learning_rate": 6.84521107580572e-05, + "loss": 0.1476, + "step": 1590 + }, + { + "epoch": 0.73, + "learning_rate": 6.837948252383114e-05, + "loss": 0.137, + "step": 1600 + }, + { + "epoch": 0.73, + "eval_loss": 0.13676507771015167, + "eval_mean_accuracy": 0.7591539253549602, + "eval_mean_iou": 0.6450359568354115, + "eval_overall_accuracy": 0.9548248548186227, + "eval_per_category_accuracy": [ + 0.9906033144503231, + 0.7076863291180233, + 0.8855043606250688, + 0.7053038524145415, + 0.9115360838394102, + 0.713271565410552, + 0.8991724867503924, + 0.6526473420003505, + 0.26078634247284016, + 0.5975970695970696, + 0.674821460190049, + 0.9255099176461751, + 0.8615002787619374, + 0.835172377382398, + 0.7989739610277634, + 0.742587358684481, + 0.9155684058838595, + 0.5865281501340482 + ], + "eval_per_category_iou": [ + 0.9825673729388702, + 0.6056818181818182, + 0.788577002408694, + 0.5720336652181088, + 0.7769033245790549, + 0.6014769492024065, + 0.7574049918658674, + 0.5589727801276676, + 0.21814955859442617, + 0.45864069809283914, + 0.5034824921286137, + 0.8186695203922474, + 0.7627312280813493, + 0.7667370819011305, + 0.6650399210549923, + 0.6483318658762962, + 0.7662021076878184, + 0.3590448447052066 + ], + "eval_runtime": 77.081, + "eval_samples_per_second": 1.155, + "eval_steps_per_second": 0.156, + "step": 1600 + }, + { + "epoch": 0.73, + "learning_rate": 6.830685428960509e-05, + "loss": 0.1419, + "step": 1610 + }, + { + "epoch": 0.74, + "learning_rate": 6.823422605537904e-05, + "loss": 0.1592, + "step": 1620 + }, + { + "epoch": 0.74, + "learning_rate": 6.816159782115299e-05, + "loss": 0.1275, + "step": 1630 + }, + { + "epoch": 0.74, + "learning_rate": 6.808896958692693e-05, + "loss": 0.146, + "step": 1640 + }, + { + "epoch": 0.75, + "learning_rate": 6.801634135270087e-05, + "loss": 0.1578, + "step": 1650 + }, + { + "epoch": 0.75, + "learning_rate": 6.794371311847482e-05, + "loss": 0.1545, + "step": 1660 + }, + { + "epoch": 0.76, + "learning_rate": 6.787108488424876e-05, + "loss": 0.1419, + "step": 1670 + }, + { + "epoch": 0.76, + "learning_rate": 6.77984566500227e-05, + "loss": 0.139, + "step": 1680 + }, + { + "epoch": 0.77, + "learning_rate": 6.772582841579665e-05, + "loss": 0.1429, + "step": 1690 + }, + { + "epoch": 0.77, + "learning_rate": 6.765320018157059e-05, + "loss": 0.1465, + "step": 1700 + }, + { + "epoch": 0.77, + "eval_loss": 0.14493517577648163, + "eval_mean_accuracy": 0.7614594499739448, + "eval_mean_iou": 0.6397058212331452, + "eval_overall_accuracy": 0.9530878817097524, + "eval_per_category_accuracy": [ + 0.9910301541293397, + 0.6895027445392574, + 0.8786986216573126, + 0.7249728703201302, + 0.8644518897007314, + 0.7707660782815681, + 0.7903360511123406, + 0.7853730086905001, + 0.21132953957578893, + 0.6756825396825397, + 0.7357905919848905, + 0.8946479897838558, + 0.8488390914159308, + 0.8385732889850941, + 0.7876196326952923, + 0.7315904419321686, + 0.8796258867600818, + 0.6074396782841823 + ], + "eval_per_category_iou": [ + 0.9826954864563329, + 0.6211237200618455, + 0.7859897103043074, + 0.5849614184862912, + 0.7605278248954189, + 0.6214059754580039, + 0.7406254585772285, + 0.5496833590615948, + 0.19048729307530893, + 0.48219974486417155, + 0.5046995715394218, + 0.819995651543702, + 0.7667894610720073, + 0.7709211367184656, + 0.6724932822910148, + 0.6410459900151405, + 0.7675053511570615, + 0.2515543466192961 + ], + "eval_runtime": 75.6776, + "eval_samples_per_second": 1.176, + "eval_steps_per_second": 0.159, + "step": 1700 + }, + { + "epoch": 0.78, + "learning_rate": 6.758057194734453e-05, + "loss": 0.1868, + "step": 1710 + }, + { + "epoch": 0.78, + "learning_rate": 6.750794371311849e-05, + "loss": 0.1528, + "step": 1720 + }, + { + "epoch": 0.79, + "learning_rate": 6.743531547889243e-05, + "loss": 0.1388, + "step": 1730 + }, + { + "epoch": 0.79, + "learning_rate": 6.736268724466638e-05, + "loss": 0.1397, + "step": 1740 + }, + { + "epoch": 0.79, + "learning_rate": 6.729005901044032e-05, + "loss": 0.1365, + "step": 1750 + }, + { + "epoch": 0.8, + "learning_rate": 6.721743077621426e-05, + "loss": 0.1386, + "step": 1760 + }, + { + "epoch": 0.8, + "learning_rate": 6.714480254198821e-05, + "loss": 0.1486, + "step": 1770 + }, + { + "epoch": 0.81, + "learning_rate": 6.707217430776215e-05, + "loss": 0.1505, + "step": 1780 + }, + { + "epoch": 0.81, + "learning_rate": 6.69995460735361e-05, + "loss": 0.129, + "step": 1790 + }, + { + "epoch": 0.82, + "learning_rate": 6.692691783931004e-05, + "loss": 0.1407, + "step": 1800 + }, + { + "epoch": 0.82, + "eval_loss": 0.13847893476486206, + "eval_mean_accuracy": 0.7597777775507725, + "eval_mean_iou": 0.6430529149418526, + "eval_overall_accuracy": 0.9541733988215414, + "eval_per_category_accuracy": [ + 0.989219455280459, + 0.7124629608640036, + 0.9102512517882689, + 0.6443298969072165, + 0.8693117376683845, + 0.7514032476737664, + 0.8841255258954577, + 0.737017989750593, + 0.1948784273150543, + 0.6101587301587301, + 0.6794448051309292, + 0.9216195593299411, + 0.8343704476377174, + 0.8683631628037942, + 0.8187241334712881, + 0.7679085303186023, + 0.895680911399592, + 0.5867292225201073 + ], + "eval_per_category_iou": [ + 0.982356198961782, + 0.5846609707809033, + 0.7888197647128946, + 0.5730831875490137, + 0.7745111703668928, + 0.6120406691378941, + 0.7480644634158191, + 0.5691702692485286, + 0.172056271124509, + 0.4593757813763991, + 0.5119479980432559, + 0.8253918794990458, + 0.758100887298397, + 0.758420079849063, + 0.6812195942158951, + 0.6619563893287855, + 0.7492243600002065, + 0.36455253404405946 + ], + "eval_runtime": 75.7862, + "eval_samples_per_second": 1.174, + "eval_steps_per_second": 0.158, + "step": 1800 + }, + { + "epoch": 0.82, + "learning_rate": 6.685428960508398e-05, + "loss": 0.1381, + "step": 1810 + }, + { + "epoch": 0.83, + "learning_rate": 6.678166137085794e-05, + "loss": 0.1347, + "step": 1820 + }, + { + "epoch": 0.83, + "learning_rate": 6.670903313663188e-05, + "loss": 0.1607, + "step": 1830 + }, + { + "epoch": 0.84, + "learning_rate": 6.663640490240581e-05, + "loss": 0.1324, + "step": 1840 + }, + { + "epoch": 0.84, + "learning_rate": 6.656377666817975e-05, + "loss": 0.1427, + "step": 1850 + }, + { + "epoch": 0.84, + "learning_rate": 6.64911484339537e-05, + "loss": 0.1455, + "step": 1860 + }, + { + "epoch": 0.85, + "learning_rate": 6.641852019972764e-05, + "loss": 0.1442, + "step": 1870 + }, + { + "epoch": 0.85, + "learning_rate": 6.634589196550158e-05, + "loss": 0.1352, + "step": 1880 + }, + { + "epoch": 0.86, + "learning_rate": 6.627326373127554e-05, + "loss": 0.1609, + "step": 1890 + }, + { + "epoch": 0.86, + "learning_rate": 6.620063549704948e-05, + "loss": 0.1539, + "step": 1900 + }, + { + "epoch": 0.86, + "eval_loss": 0.14046506583690643, + "eval_mean_accuracy": 0.7531720155067204, + "eval_mean_iou": 0.6426226055599306, + "eval_overall_accuracy": 0.9543066131934691, + "eval_per_category_accuracy": [ + 0.9924802996431188, + 0.6779578684888032, + 0.9062396830637174, + 0.6718665219750407, + 0.8569332939038171, + 0.7683724976704543, + 0.8697345206652121, + 0.7159754548919514, + 0.20801862390067252, + 0.5993455433455434, + 0.6479568750122962, + 0.9047059893331436, + 0.8610956243368164, + 0.871962057617514, + 0.8265810915675117, + 0.7592433196300102, + 0.8344447084453194, + 0.5841823056300268 + ], + "eval_per_category_iou": [ + 0.9821772119045775, + 0.6070843422407168, + 0.7870250768672508, + 0.5831518219815153, + 0.7685410986173595, + 0.6149549094212183, + 0.7837605189258348, + 0.5437053152555021, + 0.17449984811005512, + 0.47517192243355433, + 0.5019584533552802, + 0.8281520299369558, + 0.763911657831652, + 0.7773681113943911, + 0.6905063091695666, + 0.659743579725272, + 0.7300270499370994, + 0.2954676429709482 + ], + "eval_runtime": 79.4887, + "eval_samples_per_second": 1.12, + "eval_steps_per_second": 0.151, + "step": 1900 + }, + { + "epoch": 0.87, + "learning_rate": 6.612800726282343e-05, + "loss": 0.1351, + "step": 1910 + }, + { + "epoch": 0.87, + "learning_rate": 6.605537902859737e-05, + "loss": 0.1178, + "step": 1920 + }, + { + "epoch": 0.88, + "learning_rate": 6.598275079437131e-05, + "loss": 0.1608, + "step": 1930 + }, + { + "epoch": 0.88, + "learning_rate": 6.591012256014526e-05, + "loss": 0.1409, + "step": 1940 + }, + { + "epoch": 0.89, + "learning_rate": 6.58374943259192e-05, + "loss": 0.1614, + "step": 1950 + }, + { + "epoch": 0.89, + "learning_rate": 6.576486609169314e-05, + "loss": 0.1299, + "step": 1960 + }, + { + "epoch": 0.89, + "learning_rate": 6.569223785746709e-05, + "loss": 0.1488, + "step": 1970 + }, + { + "epoch": 0.9, + "learning_rate": 6.561960962324103e-05, + "loss": 0.1324, + "step": 1980 + }, + { + "epoch": 0.9, + "learning_rate": 6.554698138901497e-05, + "loss": 0.1705, + "step": 1990 + }, + { + "epoch": 0.91, + "learning_rate": 6.547435315478893e-05, + "loss": 0.1715, + "step": 2000 + }, + { + "epoch": 0.91, + "eval_loss": 0.15044108033180237, + "eval_mean_accuracy": 0.7669366594749936, + "eval_mean_iou": 0.638605446391735, + "eval_overall_accuracy": 0.9518973532687413, + "eval_per_category_accuracy": [ + 0.9927349057674444, + 0.6858757428067165, + 0.9015540745020358, + 0.7554937601736299, + 0.7849426362290928, + 0.6700126744951179, + 0.8660354181238625, + 0.8764435243942752, + 0.2963269529229177, + 0.7319365079365079, + 0.6655550965000295, + 0.8966643340331234, + 0.8368163588295595, + 0.8751077908593351, + 0.7769119675806173, + 0.7467818602261048, + 0.845331144526079, + 0.6003351206434316 + ], + "eval_per_category_iou": [ + 0.9816996750938647, + 0.6128061571401704, + 0.7911633072486774, + 0.5907403478998727, + 0.7348832458745334, + 0.5780840395049686, + 0.7868250201946592, + 0.5639012019683365, + 0.2508759635599159, + 0.5183775968343353, + 0.5151988182081233, + 0.8226874204231764, + 0.7582076905214873, + 0.7750139296387645, + 0.6662846209659903, + 0.6491362270935466, + 0.7512367549305662, + 0.14777601795024087 + ], + "eval_runtime": 88.822, + "eval_samples_per_second": 1.002, + "eval_steps_per_second": 0.135, + "step": 2000 + }, + { + "epoch": 0.91, + "learning_rate": 6.540172492056287e-05, + "loss": 0.1778, + "step": 2010 + }, + { + "epoch": 0.92, + "learning_rate": 6.532909668633682e-05, + "loss": 0.1385, + "step": 2020 + }, + { + "epoch": 0.92, + "learning_rate": 6.525646845211076e-05, + "loss": 0.1401, + "step": 2030 + }, + { + "epoch": 0.93, + "learning_rate": 6.51838402178847e-05, + "loss": 0.1419, + "step": 2040 + }, + { + "epoch": 0.93, + "learning_rate": 6.511121198365865e-05, + "loss": 0.1546, + "step": 2050 + }, + { + "epoch": 0.94, + "learning_rate": 6.503858374943259e-05, + "loss": 0.1494, + "step": 2060 + }, + { + "epoch": 0.94, + "learning_rate": 6.496595551520653e-05, + "loss": 0.1306, + "step": 2070 + }, + { + "epoch": 0.94, + "learning_rate": 6.489332728098048e-05, + "loss": 0.1382, + "step": 2080 + }, + { + "epoch": 0.95, + "learning_rate": 6.482069904675442e-05, + "loss": 0.1182, + "step": 2090 + }, + { + "epoch": 0.95, + "learning_rate": 6.474807081252838e-05, + "loss": 0.132, + "step": 2100 + }, + { + "epoch": 0.95, + "eval_loss": 0.13885533809661865, + "eval_mean_accuracy": 0.7716184264734858, + "eval_mean_iou": 0.6614885882734279, + "eval_overall_accuracy": 0.957003561298499, + "eval_per_category_accuracy": [ + 0.9907188024570434, + 0.7016467235544617, + 0.8972966187960824, + 0.7398941942485079, + 0.8977901860300126, + 0.7837386669257504, + 0.8968322737127088, + 0.7030237015280179, + 0.26797723745473356, + 0.7476336996336996, + 0.6871471010643531, + 0.9032036151866305, + 0.8693505746092837, + 0.8497351424599194, + 0.7875549663735126, + 0.7708954265159301, + 0.8718375180900599, + 0.5228552278820375 + ], + "eval_per_category_iou": [ + 0.9831048553381242, + 0.5972434704706774, + 0.7928731831980577, + 0.5866946326772077, + 0.7869940498207969, + 0.64052506965799, + 0.7954271887210573, + 0.5656907305204008, + 0.2305706400783406, + 0.5128138127876827, + 0.5270090835018257, + 0.8293610888245908, + 0.7775557065818417, + 0.785420326631531, + 0.6790745869189451, + 0.6645274890780126, + 0.7540405657859621, + 0.39786810832865815 + ], + "eval_runtime": 84.5809, + "eval_samples_per_second": 1.052, + "eval_steps_per_second": 0.142, + "step": 2100 + }, + { + "epoch": 0.96, + "learning_rate": 6.467544257830232e-05, + "loss": 0.1406, + "step": 2110 + }, + { + "epoch": 0.96, + "learning_rate": 6.460281434407626e-05, + "loss": 0.1566, + "step": 2120 + }, + { + "epoch": 0.97, + "learning_rate": 6.453018610985021e-05, + "loss": 0.1213, + "step": 2130 + }, + { + "epoch": 0.97, + "learning_rate": 6.445755787562415e-05, + "loss": 0.1348, + "step": 2140 + }, + { + "epoch": 0.98, + "learning_rate": 6.43849296413981e-05, + "loss": 0.1395, + "step": 2150 + }, + { + "epoch": 0.98, + "learning_rate": 6.431230140717204e-05, + "loss": 0.1235, + "step": 2160 + }, + { + "epoch": 0.99, + "learning_rate": 6.423967317294598e-05, + "loss": 0.1551, + "step": 2170 + }, + { + "epoch": 0.99, + "learning_rate": 6.416704493871992e-05, + "loss": 0.146, + "step": 2180 + }, + { + "epoch": 0.99, + "learning_rate": 6.409441670449387e-05, + "loss": 0.1482, + "step": 2190 + }, + { + "epoch": 1.0, + "learning_rate": 6.402178847026782e-05, + "loss": 0.1399, + "step": 2200 + }, + { + "epoch": 1.0, + "eval_loss": 0.13540491461753845, + "eval_mean_accuracy": 0.7651852022497144, + "eval_mean_iou": 0.6567450630698053, + "eval_overall_accuracy": 0.9556758752029934, + "eval_per_category_accuracy": [ + 0.9901550079804321, + 0.6457844200845221, + 0.8885581737647188, + 0.7129679869777537, + 0.9021107370186209, + 0.748916926854473, + 0.8618665330396322, + 0.7203270272564862, + 0.24423176409725814, + 0.8005079365079365, + 0.6490487713706742, + 0.9108973522632475, + 0.8443114580148553, + 0.886845995459585, + 0.8161913692015865, + 0.7391957862281603, + 0.8769659922301457, + 0.5344504021447721 + ], + "eval_per_category_iou": [ + 0.9832376527547144, + 0.556791846991484, + 0.7927362260590767, + 0.5874923154306153, + 0.7778940044397533, + 0.6219762445741573, + 0.7687129198409016, + 0.5668561551047446, + 0.2103457494207806, + 0.5276944276313248, + 0.5096671533072247, + 0.8256855746211959, + 0.7678766392286271, + 0.7780968814053941, + 0.6865619546247819, + 0.6525027783447869, + 0.7450485380898826, + 0.46223407338705 + ], + "eval_runtime": 84.7624, + "eval_samples_per_second": 1.05, + "eval_steps_per_second": 0.142, + "step": 2200 + }, + { + "epoch": 1.0, + "learning_rate": 6.394916023604177e-05, + "loss": 0.1206, + "step": 2210 + }, + { + "epoch": 1.01, + "learning_rate": 6.387653200181571e-05, + "loss": 0.1331, + "step": 2220 + }, + { + "epoch": 1.01, + "learning_rate": 6.380390376758965e-05, + "loss": 0.138, + "step": 2230 + }, + { + "epoch": 1.02, + "learning_rate": 6.37312755333636e-05, + "loss": 0.136, + "step": 2240 + }, + { + "epoch": 1.02, + "learning_rate": 6.365864729913754e-05, + "loss": 0.1374, + "step": 2250 + }, + { + "epoch": 1.03, + "learning_rate": 6.358601906491148e-05, + "loss": 0.1356, + "step": 2260 + }, + { + "epoch": 1.03, + "learning_rate": 6.351339083068543e-05, + "loss": 0.1176, + "step": 2270 + }, + { + "epoch": 1.03, + "learning_rate": 6.344076259645937e-05, + "loss": 0.1337, + "step": 2280 + }, + { + "epoch": 1.04, + "learning_rate": 6.336813436223331e-05, + "loss": 0.1173, + "step": 2290 + }, + { + "epoch": 1.04, + "learning_rate": 6.329550612800726e-05, + "loss": 0.1175, + "step": 2300 + }, + { + "epoch": 1.04, + "eval_loss": 0.13777972757816315, + "eval_mean_accuracy": 0.7627759347632277, + "eval_mean_iou": 0.6573236880408356, + "eval_overall_accuracy": 0.9549025632022472, + "eval_per_category_accuracy": [ + 0.9917725057114422, + 0.6548195404718341, + 0.903699997248817, + 0.6839392295170917, + 0.8730676049244813, + 0.5990509585362945, + 0.8252635132078492, + 0.8450377804632645, + 0.3003621314019659, + 0.7047032967032967, + 0.6207381612858801, + 0.9128029952596142, + 0.885446828408539, + 0.8925215141755979, + 0.8265110363855838, + 0.7685508735868448, + 0.8763303329805322, + 0.5653485254691689 + ], + "eval_per_category_iou": [ + 0.9829498718447592, + 0.601791640005357, + 0.7953290799803878, + 0.587679934728131, + 0.7885745909492605, + 0.5241967894765317, + 0.7877833104680789, + 0.5643395347827241, + 0.2465602174282317, + 0.500346769495381, + 0.48634671558162296, + 0.8297144417850802, + 0.7721932933906334, + 0.7827742814256885, + 0.6904011667686989, + 0.6651360591489007, + 0.7551893590882257, + 0.47051932838734867 + ], + "eval_runtime": 87.5329, + "eval_samples_per_second": 1.017, + "eval_steps_per_second": 0.137, + "step": 2300 + }, + { + "epoch": 1.05, + "learning_rate": 6.322287789378122e-05, + "loss": 0.112, + "step": 2310 + }, + { + "epoch": 1.05, + "learning_rate": 6.315024965955516e-05, + "loss": 0.1096, + "step": 2320 + }, + { + "epoch": 1.06, + "learning_rate": 6.30776214253291e-05, + "loss": 0.1316, + "step": 2330 + }, + { + "epoch": 1.06, + "learning_rate": 6.300499319110305e-05, + "loss": 0.122, + "step": 2340 + }, + { + "epoch": 1.07, + "learning_rate": 6.293236495687699e-05, + "loss": 0.1171, + "step": 2350 + }, + { + "epoch": 1.07, + "learning_rate": 6.285973672265093e-05, + "loss": 0.1172, + "step": 2360 + }, + { + "epoch": 1.08, + "learning_rate": 6.278710848842488e-05, + "loss": 0.1295, + "step": 2370 + }, + { + "epoch": 1.08, + "learning_rate": 6.271448025419882e-05, + "loss": 0.1036, + "step": 2380 + }, + { + "epoch": 1.08, + "learning_rate": 6.264185201997276e-05, + "loss": 0.1333, + "step": 2390 + }, + { + "epoch": 1.09, + "learning_rate": 6.25692237857467e-05, + "loss": 0.1166, + "step": 2400 + }, + { + "epoch": 1.09, + "eval_loss": 0.13342882692813873, + "eval_mean_accuracy": 0.7821907896480783, + "eval_mean_iou": 0.6697036848077302, + "eval_overall_accuracy": 0.958539555581768, + "eval_per_category_accuracy": [ + 0.9915974986695582, + 0.7106008840816723, + 0.9216514663805436, + 0.6840070537167662, + 0.862522496437278, + 0.8110374096334996, + 0.8533460696600514, + 0.7866519265958927, + 0.29689601655457837, + 0.7033455433455433, + 0.6781364968817014, + 0.9052950781432237, + 0.8650432530618851, + 0.901985111662531, + 0.8428231160544921, + 0.7853995375128469, + 0.8831837271232099, + 0.595911528150134 + ], + "eval_per_category_iou": [ + 0.9838216503792198, + 0.6230886090326977, + 0.7986503480665618, + 0.5904566744730679, + 0.7901460381632656, + 0.6592556695294761, + 0.7894455577822311, + 0.5933676785376083, + 0.2402863841902529, + 0.5234176801122362, + 0.5514907642216587, + 0.8321425456448425, + 0.7751464509318873, + 0.7992249996101608, + 0.7001190731986822, + 0.6766631432729929, + 0.7727151237975934, + 0.3552279355947101 + ], + "eval_runtime": 82.9803, + "eval_samples_per_second": 1.073, + "eval_steps_per_second": 0.145, + "step": 2400 + }, + { + "epoch": 1.09, + "learning_rate": 6.249659555152066e-05, + "loss": 0.1159, + "step": 2410 + }, + { + "epoch": 1.1, + "learning_rate": 6.24239673172946e-05, + "loss": 0.1178, + "step": 2420 + }, + { + "epoch": 1.1, + "learning_rate": 6.235133908306855e-05, + "loss": 0.1391, + "step": 2430 + }, + { + "epoch": 1.11, + "learning_rate": 6.227871084884249e-05, + "loss": 0.1283, + "step": 2440 + }, + { + "epoch": 1.11, + "learning_rate": 6.220608261461644e-05, + "loss": 0.103, + "step": 2450 + }, + { + "epoch": 1.12, + "learning_rate": 6.213345438039038e-05, + "loss": 0.1168, + "step": 2460 + }, + { + "epoch": 1.12, + "learning_rate": 6.206082614616432e-05, + "loss": 0.114, + "step": 2470 + }, + { + "epoch": 1.13, + "learning_rate": 6.198819791193827e-05, + "loss": 0.1299, + "step": 2480 + }, + { + "epoch": 1.13, + "learning_rate": 6.191556967771221e-05, + "loss": 0.1257, + "step": 2490 + }, + { + "epoch": 1.13, + "learning_rate": 6.184294144348615e-05, + "loss": 0.1227, + "step": 2500 + }, + { + "epoch": 1.13, + "eval_loss": 0.13308031857013702, + "eval_mean_accuracy": 0.7715319784801719, + "eval_mean_iou": 0.6651590960897119, + "eval_overall_accuracy": 0.9574322218305609, + "eval_per_category_accuracy": [ + 0.9910145671313145, + 0.7075729853138814, + 0.8978554528447232, + 0.653893109061313, + 0.8888540488098072, + 0.690135091569915, + 0.8967296175274126, + 0.7883589568509718, + 0.26006207966890843, + 0.7302075702075702, + 0.6023431505636546, + 0.9364337591377954, + 0.8841834073701059, + 0.8806689192756454, + 0.8377575875150888, + 0.7643820657759507, + 0.858155415600806, + 0.6189678284182306 + ], + "eval_per_category_iou": [ + 0.9829147742408483, + 0.6474789231156747, + 0.7973113698759822, + 0.5718607272080195, + 0.7868248176069152, + 0.6189919770898483, + 0.7855232213431205, + 0.5875494919877167, + 0.22661497543163683, + 0.5332215842219765, + 0.521154091663475, + 0.8224189056868451, + 0.7817246574416746, + 0.7985454806576056, + 0.7001360090792815, + 0.6653973685829153, + 0.766913291545905, + 0.3782820628353746 + ], + "eval_runtime": 84.6288, + "eval_samples_per_second": 1.052, + "eval_steps_per_second": 0.142, + "step": 2500 + }, + { + "epoch": 1.14, + "learning_rate": 6.17703132092601e-05, + "loss": 0.1234, + "step": 2510 + }, + { + "epoch": 1.14, + "learning_rate": 6.169768497503405e-05, + "loss": 0.1139, + "step": 2520 + }, + { + "epoch": 1.15, + "learning_rate": 6.1625056740808e-05, + "loss": 0.1306, + "step": 2530 + }, + { + "epoch": 1.15, + "learning_rate": 6.155242850658194e-05, + "loss": 0.1122, + "step": 2540 + }, + { + "epoch": 1.16, + "learning_rate": 6.147980027235588e-05, + "loss": 0.1281, + "step": 2550 + }, + { + "epoch": 1.16, + "learning_rate": 6.140717203812983e-05, + "loss": 0.1223, + "step": 2560 + }, + { + "epoch": 1.17, + "learning_rate": 6.133454380390377e-05, + "loss": 0.1115, + "step": 2570 + }, + { + "epoch": 1.17, + "learning_rate": 6.126191556967771e-05, + "loss": 0.1315, + "step": 2580 + }, + { + "epoch": 1.18, + "learning_rate": 6.118928733545166e-05, + "loss": 0.1266, + "step": 2590 + }, + { + "epoch": 1.18, + "learning_rate": 6.11166591012256e-05, + "loss": 0.1085, + "step": 2600 + }, + { + "epoch": 1.18, + "eval_loss": 0.15071263909339905, + "eval_mean_accuracy": 0.7684598270876742, + "eval_mean_iou": 0.6511462088637711, + "eval_overall_accuracy": 0.9541516679056575, + "eval_per_category_accuracy": [ + 0.9908049469692608, + 0.7208180184264642, + 0.9032684054143282, + 0.7053038524145415, + 0.8603680530349864, + 0.5475006513895575, + 0.9093806874136557, + 0.8241234986714067, + 0.22684945680289706, + 0.7364590964590965, + 0.6194888744614295, + 0.9100670928664903, + 0.898566624103016, + 0.8912676205057811, + 0.8460779875840663, + 0.7418293936279547, + 0.8662554424466092, + 0.6338471849865952 + ], + "eval_per_category_iou": [ + 0.9835189991166429, + 0.642371682948298, + 0.7972390934173503, + 0.5932793245093565, + 0.7729924294028407, + 0.5067484436887132, + 0.7685437207284704, + 0.5522156820737395, + 0.1984432275874553, + 0.5278800778569728, + 0.5263835371408989, + 0.8333098023031449, + 0.7733761585047307, + 0.7850704159852425, + 0.705621421444815, + 0.6715746135864066, + 0.7473020673059005, + 0.33476106194690264 + ], + "eval_runtime": 79.5578, + "eval_samples_per_second": 1.119, + "eval_steps_per_second": 0.151, + "step": 2600 + }, + { + "epoch": 1.18, + "learning_rate": 6.104403086699954e-05, + "loss": 0.149, + "step": 2610 + }, + { + "epoch": 1.19, + "learning_rate": 6.097140263277349e-05, + "loss": 0.133, + "step": 2620 + }, + { + "epoch": 1.19, + "learning_rate": 6.0898774398547436e-05, + "loss": 0.1239, + "step": 2630 + }, + { + "epoch": 1.2, + "learning_rate": 6.082614616432138e-05, + "loss": 0.1032, + "step": 2640 + }, + { + "epoch": 1.2, + "learning_rate": 6.075351793009533e-05, + "loss": 0.1371, + "step": 2650 + }, + { + "epoch": 1.21, + "learning_rate": 6.068088969586927e-05, + "loss": 0.1185, + "step": 2660 + }, + { + "epoch": 1.21, + "learning_rate": 6.0608261461643216e-05, + "loss": 0.1206, + "step": 2670 + }, + { + "epoch": 1.22, + "learning_rate": 6.053563322741716e-05, + "loss": 0.1166, + "step": 2680 + }, + { + "epoch": 1.22, + "learning_rate": 6.04630049931911e-05, + "loss": 0.1285, + "step": 2690 + }, + { + "epoch": 1.23, + "learning_rate": 6.039037675896505e-05, + "loss": 0.1229, + "step": 2700 + }, + { + "epoch": 1.23, + "eval_loss": 0.1449834704399109, + "eval_mean_accuracy": 0.7712138167360404, + "eval_mean_iou": 0.6459629394431743, + "eval_overall_accuracy": 0.9520613852511631, + "eval_per_category_accuracy": [ + 0.9912854258905917, + 0.7057271005035703, + 0.9015093677781446, + 0.7000813890396094, + 0.8255847701435313, + 0.6761776902388723, + 0.8554566112323266, + 0.7448269757746214, + 0.2387997930677703, + 0.7486007326007326, + 0.7296130161915442, + 0.9225881952928245, + 0.8640990594032696, + 0.8880866902486669, + 0.8219628384204173, + 0.7897738951695786, + 0.8721121475716891, + 0.6055630026809652 + ], + "eval_per_category_iou": [ + 0.9831245174321325, + 0.6157204006385353, + 0.8005328854448983, + 0.590976754837971, + 0.7490012287780567, + 0.5485306287683649, + 0.7679823554312188, + 0.49237726422180267, + 0.19991338241663056, + 0.5389678889404617, + 0.5608011553089015, + 0.8298057706942044, + 0.7745812442567186, + 0.7898227491489611, + 0.6985253709470599, + 0.6807784945045818, + 0.77171425918948, + 0.23417655901715825 + ], + "eval_runtime": 85.4956, + "eval_samples_per_second": 1.041, + "eval_steps_per_second": 0.14, + "step": 2700 + }, + { + "epoch": 1.23, + "learning_rate": 6.0317748524738996e-05, + "loss": 0.1234, + "step": 2710 + }, + { + "epoch": 1.23, + "learning_rate": 6.024512029051294e-05, + "loss": 0.1113, + "step": 2720 + }, + { + "epoch": 1.24, + "learning_rate": 6.017249205628688e-05, + "loss": 0.1159, + "step": 2730 + }, + { + "epoch": 1.24, + "learning_rate": 6.0099863822060826e-05, + "loss": 0.1205, + "step": 2740 + }, + { + "epoch": 1.25, + "learning_rate": 6.0027235587834776e-05, + "loss": 0.1389, + "step": 2750 + }, + { + "epoch": 1.25, + "learning_rate": 5.995460735360872e-05, + "loss": 0.1215, + "step": 2760 + }, + { + "epoch": 1.26, + "learning_rate": 5.988197911938266e-05, + "loss": 0.1106, + "step": 2770 + }, + { + "epoch": 1.26, + "learning_rate": 5.9809350885156606e-05, + "loss": 0.1076, + "step": 2780 + }, + { + "epoch": 1.27, + "learning_rate": 5.973672265093055e-05, + "loss": 0.1136, + "step": 2790 + }, + { + "epoch": 1.27, + "learning_rate": 5.96640944167045e-05, + "loss": 0.1403, + "step": 2800 + }, + { + "epoch": 1.27, + "eval_loss": 0.14680607616901398, + "eval_mean_accuracy": 0.770428266757619, + "eval_mean_iou": 0.6534443453914569, + "eval_overall_accuracy": 0.9538569503955627, + "eval_per_category_accuracy": [ + 0.9909573223556492, + 0.700416133680921, + 0.9043791955540883, + 0.652197504069452, + 0.8206913018807468, + 0.6879667371786662, + 0.8194730083830085, + 0.8910932761071763, + 0.24345576823590273, + 0.713025641025641, + 0.7126345196639714, + 0.932839922034689, + 0.8290424977069583, + 0.900564032170072, + 0.8147417658216934, + 0.8066546762589928, + 0.857495070555091, + 0.5900804289544236 + ], + "eval_per_category_iou": [ + 0.9831513783900067, + 0.6420715144498375, + 0.7962268549706841, + 0.5800808348917175, + 0.7652580537636267, + 0.5741292193840133, + 0.749765986169305, + 0.5667862735838759, + 0.20660286241109843, + 0.544457787291808, + 0.5596153103395002, + 0.8304156548058987, + 0.7615320758769086, + 0.7683942219619503, + 0.693968227739451, + 0.6743998711132593, + 0.7352683591527868, + 0.3298737307504965 + ], + "eval_runtime": 81.9751, + "eval_samples_per_second": 1.086, + "eval_steps_per_second": 0.146, + "step": 2800 + }, + { + "epoch": 1.28, + "learning_rate": 5.959146618247844e-05, + "loss": 0.1679, + "step": 2810 + }, + { + "epoch": 1.28, + "learning_rate": 5.9518837948252386e-05, + "loss": 0.0953, + "step": 2820 + }, + { + "epoch": 1.28, + "learning_rate": 5.944620971402633e-05, + "loss": 0.1176, + "step": 2830 + }, + { + "epoch": 1.29, + "learning_rate": 5.937358147980027e-05, + "loss": 0.1342, + "step": 2840 + }, + { + "epoch": 1.29, + "learning_rate": 5.930095324557422e-05, + "loss": 0.1101, + "step": 2850 + }, + { + "epoch": 1.3, + "learning_rate": 5.9228325011348166e-05, + "loss": 0.1171, + "step": 2860 + }, + { + "epoch": 1.3, + "learning_rate": 5.915569677712211e-05, + "loss": 0.1246, + "step": 2870 + }, + { + "epoch": 1.31, + "learning_rate": 5.908306854289605e-05, + "loss": 0.1304, + "step": 2880 + }, + { + "epoch": 1.31, + "learning_rate": 5.9010440308669996e-05, + "loss": 0.1311, + "step": 2890 + }, + { + "epoch": 1.32, + "learning_rate": 5.893781207444394e-05, + "loss": 0.1391, + "step": 2900 + }, + { + "epoch": 1.32, + "eval_loss": 0.13498260080814362, + "eval_mean_accuracy": 0.7738483377686847, + "eval_mean_iou": 0.6692940000228469, + "eval_overall_accuracy": 0.9572275140312281, + "eval_per_category_accuracy": [ + 0.9919934417012437, + 0.7063909713564015, + 0.90625, + 0.7253119913185024, + 0.8756254918683484, + 0.7003188496681225, + 0.865045394913125, + 0.8060849757854597, + 0.26539058458354886, + 0.7178608058608058, + 0.700210509748372, + 0.9006733008346084, + 0.843268348830099, + 0.8895033701142143, + 0.8045298758406622, + 0.782277749229188, + 0.8773547935187442, + 0.5711796246648794 + ], + "eval_per_category_iou": [ + 0.9824737521334264, + 0.6113851673300073, + 0.805191273527255, + 0.5949043168669337, + 0.7938930015552099, + 0.5960402924207401, + 0.7727594187490577, + 0.5946204163540835, + 0.22388059701492538, + 0.5397400117508813, + 0.5652370705057451, + 0.8345147169258385, + 0.7745163221903326, + 0.792605428122268, + 0.688049294184337, + 0.6662818752290968, + 0.7580698915127164, + 0.45312915403838994 + ], + "eval_runtime": 83.7881, + "eval_samples_per_second": 1.062, + "eval_steps_per_second": 0.143, + "step": 2900 + }, + { + "epoch": 1.32, + "learning_rate": 5.886518384021789e-05, + "loss": 0.1169, + "step": 2910 + }, + { + "epoch": 1.33, + "learning_rate": 5.879255560599183e-05, + "loss": 0.1069, + "step": 2920 + }, + { + "epoch": 1.33, + "learning_rate": 5.8719927371765777e-05, + "loss": 0.1102, + "step": 2930 + }, + { + "epoch": 1.33, + "learning_rate": 5.864729913753972e-05, + "loss": 0.1272, + "step": 2940 + }, + { + "epoch": 1.34, + "learning_rate": 5.857467090331366e-05, + "loss": 0.1301, + "step": 2950 + }, + { + "epoch": 1.34, + "learning_rate": 5.850204266908761e-05, + "loss": 0.1067, + "step": 2960 + }, + { + "epoch": 1.35, + "learning_rate": 5.842941443486156e-05, + "loss": 0.1238, + "step": 2970 + }, + { + "epoch": 1.35, + "learning_rate": 5.83567862006355e-05, + "loss": 0.1291, + "step": 2980 + }, + { + "epoch": 1.36, + "learning_rate": 5.828415796640944e-05, + "loss": 0.1353, + "step": 2990 + }, + { + "epoch": 1.36, + "learning_rate": 5.821152973218339e-05, + "loss": 0.1166, + "step": 3000 + }, + { + "epoch": 1.36, + "eval_loss": 0.13106530904769897, + "eval_mean_accuracy": 0.7774974792072407, + "eval_mean_iou": 0.6582082239520443, + "eval_overall_accuracy": 0.9579072587945489, + "eval_per_category_accuracy": [ + 0.9907893599712356, + 0.7040917113295229, + 0.8964609469571916, + 0.7219886055344547, + 0.89360754686429, + 0.6154107728792302, + 0.8968757720963089, + 0.8536018338382169, + 0.2566994309363683, + 0.6869352869352869, + 0.7524051230596707, + 0.9118383129129057, + 0.8560014747405715, + 0.8767092550551713, + 0.8214940075875151, + 0.7728545734840699, + 0.8906758662400138, + 0.596514745308311 + ], + "eval_per_category_iou": [ + 0.9834721187016149, + 0.6089343229239602, + 0.7993526714186493, + 0.6008692707157371, + 0.7995465685070772, + 0.5476689690407648, + 0.7929279255720083, + 0.6310686613088036, + 0.21930522407849376, + 0.533351029540025, + 0.5590533340155097, + 0.8322021238593187, + 0.7819552886767732, + 0.7960570469798658, + 0.6906342166185685, + 0.6753671022497643, + 0.7782930854104425, + 0.2176890715194208 + ], + "eval_runtime": 83.8227, + "eval_samples_per_second": 1.062, + "eval_steps_per_second": 0.143, + "step": 3000 + }, + { + "epoch": 1.37, + "learning_rate": 5.813890149795734e-05, + "loss": 0.1305, + "step": 3010 + }, + { + "epoch": 1.37, + "learning_rate": 5.806627326373128e-05, + "loss": 0.1173, + "step": 3020 + }, + { + "epoch": 1.38, + "learning_rate": 5.7993645029505223e-05, + "loss": 0.114, + "step": 3030 + }, + { + "epoch": 1.38, + "learning_rate": 5.792101679527917e-05, + "loss": 0.1251, + "step": 3040 + }, + { + "epoch": 1.38, + "learning_rate": 5.784838856105311e-05, + "loss": 0.1228, + "step": 3050 + }, + { + "epoch": 1.39, + "learning_rate": 5.777576032682706e-05, + "loss": 0.1148, + "step": 3060 + }, + { + "epoch": 1.39, + "learning_rate": 5.7703132092601004e-05, + "loss": 0.1097, + "step": 3070 + }, + { + "epoch": 1.4, + "learning_rate": 5.763050385837495e-05, + "loss": 0.1262, + "step": 3080 + }, + { + "epoch": 1.4, + "learning_rate": 5.755787562414889e-05, + "loss": 0.1187, + "step": 3090 + }, + { + "epoch": 1.41, + "learning_rate": 5.7485247389922834e-05, + "loss": 0.1166, + "step": 3100 + }, + { + "epoch": 1.41, + "eval_loss": 0.13692022860050201, + "eval_mean_accuracy": 0.7756804972624743, + "eval_mean_iou": 0.6519453203573877, + "eval_overall_accuracy": 0.9548195399595111, + "eval_per_category_accuracy": [ + 0.9909551035658947, + 0.6856328632264124, + 0.9032151012435348, + 0.6671188279978296, + 0.8419276642197038, + 0.6501706861450545, + 0.9188302762669339, + 0.7870113964591315, + 0.3085876875323332, + 0.6792380952380952, + 0.694593637490409, + 0.9245373280671166, + 0.8602863154865744, + 0.8926403041022122, + 0.8217041731332988, + 0.7894784172661871, + 0.8945114218092837, + 0.6518096514745308 + ], + "eval_per_category_iou": [ + 0.9840541655459478, + 0.6288930804532831, + 0.8037017826514703, + 0.5912478961288772, + 0.7658389171268511, + 0.5715827582525308, + 0.7699181361578668, + 0.546816874256367, + 0.25362472894255705, + 0.5236574767868304, + 0.5561234937386784, + 0.8321595951759896, + 0.7793014967925873, + 0.7799771646490314, + 0.7007831314226888, + 0.686008673762705, + 0.7744652770727694, + 0.18686111751594806 + ], + "eval_runtime": 78.5808, + "eval_samples_per_second": 1.133, + "eval_steps_per_second": 0.153, + "step": 3100 + }, + { + "epoch": 1.41, + "learning_rate": 5.7412619155696784e-05, + "loss": 0.1106, + "step": 3110 + }, + { + "epoch": 1.42, + "learning_rate": 5.733999092147073e-05, + "loss": 0.1186, + "step": 3120 + }, + { + "epoch": 1.42, + "learning_rate": 5.726736268724467e-05, + "loss": 0.113, + "step": 3130 + }, + { + "epoch": 1.43, + "learning_rate": 5.7194734453018614e-05, + "loss": 0.1327, + "step": 3140 + }, + { + "epoch": 1.43, + "learning_rate": 5.712210621879256e-05, + "loss": 0.1111, + "step": 3150 + }, + { + "epoch": 1.43, + "learning_rate": 5.70494779845665e-05, + "loss": 0.1182, + "step": 3160 + }, + { + "epoch": 1.44, + "learning_rate": 5.697684975034045e-05, + "loss": 0.1322, + "step": 3170 + }, + { + "epoch": 1.44, + "learning_rate": 5.6904221516114394e-05, + "loss": 0.115, + "step": 3180 + }, + { + "epoch": 1.45, + "learning_rate": 5.683159328188834e-05, + "loss": 0.1225, + "step": 3190 + }, + { + "epoch": 1.45, + "learning_rate": 5.675896504766228e-05, + "loss": 0.114, + "step": 3200 + }, + { + "epoch": 1.45, + "eval_loss": 0.13359396159648895, + "eval_mean_accuracy": 0.771436184706532, + "eval_mean_iou": 0.6682175047820916, + "eval_overall_accuracy": 0.9564487157243021, + "eval_per_category_accuracy": [ + 0.9918683019590915, + 0.7046098544341716, + 0.9019409596126334, + 0.729381443298969, + 0.8858042077514678, + 0.6629136323689824, + 0.9221465930326029, + 0.70561043943836, + 0.3356958096223487, + 0.7674627594627594, + 0.6211119636428023, + 0.9178043197210328, + 0.893944571336079, + 0.8782271263396864, + 0.8123329453354027, + 0.7683260534429599, + 0.8830510409691643, + 0.5036193029490617 + ], + "eval_per_category_iou": [ + 0.9831915677087678, + 0.6390765434999707, + 0.8028579256467909, + 0.6069191263615328, + 0.7893015970448374, + 0.556611521982168, + 0.7495947861064673, + 0.5759256556220503, + 0.2610112223965247, + 0.542952310530935, + 0.5284296331012319, + 0.8291532131325542, + 0.7835274181789521, + 0.7963091505301707, + 0.7004167847634271, + 0.673731820793295, + 0.7578031697273823, + 0.45110163895059135 + ], + "eval_runtime": 76.057, + "eval_samples_per_second": 1.17, + "eval_steps_per_second": 0.158, + "step": 3200 + }, + { + "epoch": 1.46, + "learning_rate": 5.6686336813436224e-05, + "loss": 0.1078, + "step": 3210 + }, + { + "epoch": 1.46, + "learning_rate": 5.6613708579210174e-05, + "loss": 0.1245, + "step": 3220 + }, + { + "epoch": 1.47, + "learning_rate": 5.654108034498412e-05, + "loss": 0.1169, + "step": 3230 + }, + { + "epoch": 1.47, + "learning_rate": 5.646845211075806e-05, + "loss": 0.1193, + "step": 3240 + }, + { + "epoch": 1.48, + "learning_rate": 5.6395823876532004e-05, + "loss": 0.1108, + "step": 3250 + }, + { + "epoch": 1.48, + "learning_rate": 5.632319564230595e-05, + "loss": 0.1122, + "step": 3260 + }, + { + "epoch": 1.48, + "learning_rate": 5.62505674080799e-05, + "loss": 0.1085, + "step": 3270 + }, + { + "epoch": 1.49, + "learning_rate": 5.617793917385384e-05, + "loss": 0.1168, + "step": 3280 + }, + { + "epoch": 1.49, + "learning_rate": 5.6105310939627784e-05, + "loss": 0.1183, + "step": 3290 + }, + { + "epoch": 1.5, + "learning_rate": 5.603268270540173e-05, + "loss": 0.1472, + "step": 3300 + }, + { + "epoch": 1.5, + "eval_loss": 0.14049187302589417, + "eval_mean_accuracy": 0.7717139026549295, + "eval_mean_iou": 0.6523727128294649, + "eval_overall_accuracy": 0.9561611561293013, + "eval_per_category_accuracy": [ + 0.9934544592848197, + 0.7343383150633915, + 0.8856195664135579, + 0.726465002712968, + 0.8905686838615036, + 0.690963129142948, + 0.8245953780357522, + 0.7321515987377007, + 0.25716502845318157, + 0.623912087912088, + 0.7878081410218576, + 0.9305982216634445, + 0.8350988256029351, + 0.9062571493937314, + 0.7898344542162442, + 0.8042844295991778, + 0.8639288061640433, + 0.6138069705093834 + ], + "eval_per_category_iou": [ + 0.9834976876071128, + 0.6155098938682446, + 0.7997279625606921, + 0.5913758833922261, + 0.7902125234696338, + 0.5654944340031806, + 0.7642206021553233, + 0.5704052509808999, + 0.22546262699564587, + 0.5149594872415044, + 0.5752302012555126, + 0.8332914408306752, + 0.7620157296824934, + 0.7798415216346063, + 0.685743159785904, + 0.6639006569494006, + 0.7615679852895975, + 0.2602517832277132 + ], + "eval_runtime": 76.2338, + "eval_samples_per_second": 1.167, + "eval_steps_per_second": 0.157, + "step": 3300 + }, + { + "epoch": 1.5, + "learning_rate": 5.596005447117567e-05, + "loss": 0.1187, + "step": 3310 + }, + { + "epoch": 1.51, + "learning_rate": 5.588742623694962e-05, + "loss": 0.1279, + "step": 3320 + }, + { + "epoch": 1.51, + "learning_rate": 5.5814798002723564e-05, + "loss": 0.1204, + "step": 3330 + }, + { + "epoch": 1.52, + "learning_rate": 5.574216976849751e-05, + "loss": 0.1206, + "step": 3340 + }, + { + "epoch": 1.52, + "learning_rate": 5.566954153427145e-05, + "loss": 0.1258, + "step": 3350 + }, + { + "epoch": 1.53, + "learning_rate": 5.5596913300045394e-05, + "loss": 0.1214, + "step": 3360 + }, + { + "epoch": 1.53, + "learning_rate": 5.5524285065819344e-05, + "loss": 0.1229, + "step": 3370 + }, + { + "epoch": 1.53, + "learning_rate": 5.545165683159329e-05, + "loss": 0.1376, + "step": 3380 + }, + { + "epoch": 1.54, + "learning_rate": 5.537902859736723e-05, + "loss": 0.1528, + "step": 3390 + }, + { + "epoch": 1.54, + "learning_rate": 5.5306400363141174e-05, + "loss": 0.1192, + "step": 3400 + }, + { + "epoch": 1.54, + "eval_loss": 0.13206158578395844, + "eval_mean_accuracy": 0.7787210569092442, + "eval_mean_iou": 0.6670240956145178, + "eval_overall_accuracy": 0.9592076419444566, + "eval_per_category_accuracy": [ + 0.9919085729931352, + 0.6951537427743325, + 0.9162230384065148, + 0.6920103092783505, + 0.8718161065914304, + 0.7556494243482792, + 0.8699885512254365, + 0.8346854096782294, + 0.2686497672012416, + 0.6940659340659341, + 0.6739558126266502, + 0.9222165553723713, + 0.8902397352660828, + 0.8791642468718652, + 0.7976105794102432, + 0.7731500513874615, + 0.8725287203809018, + 0.6179624664879356 + ], + "eval_per_category_iou": [ + 0.9836782455582729, + 0.6310559736594544, + 0.8034299637824291, + 0.59817083895175, + 0.7950283471434801, + 0.6311009231836003, + 0.8056107833249014, + 0.622326255424257, + 0.2306052666637062, + 0.532893848715295, + 0.5422434329763912, + 0.8347761669416341, + 0.7853621193671092, + 0.7871295318827107, + 0.697165385484965, + 0.6722443143737992, + 0.7898672581203839, + 0.26374506550718 + ], + "eval_runtime": 83.3802, + "eval_samples_per_second": 1.067, + "eval_steps_per_second": 0.144, + "step": 3400 + }, + { + "epoch": 1.55, + "learning_rate": 5.523377212891512e-05, + "loss": 0.1606, + "step": 3410 + }, + { + "epoch": 1.55, + "learning_rate": 5.516114389468906e-05, + "loss": 0.1167, + "step": 3420 + }, + { + "epoch": 1.56, + "learning_rate": 5.508851566046301e-05, + "loss": 0.132, + "step": 3430 + }, + { + "epoch": 1.56, + "learning_rate": 5.5015887426236954e-05, + "loss": 0.1212, + "step": 3440 + }, + { + "epoch": 1.57, + "learning_rate": 5.49432591920109e-05, + "loss": 0.124, + "step": 3450 + }, + { + "epoch": 1.57, + "learning_rate": 5.487063095778484e-05, + "loss": 0.1371, + "step": 3460 + }, + { + "epoch": 1.58, + "learning_rate": 5.4798002723558784e-05, + "loss": 0.1138, + "step": 3470 + }, + { + "epoch": 1.58, + "learning_rate": 5.4725374489332735e-05, + "loss": 0.1013, + "step": 3480 + }, + { + "epoch": 1.58, + "learning_rate": 5.465274625510668e-05, + "loss": 0.117, + "step": 3490 + }, + { + "epoch": 1.59, + "learning_rate": 5.458011802088062e-05, + "loss": 0.1087, + "step": 3500 + }, + { + "epoch": 1.59, + "eval_loss": 0.13412128388881683, + "eval_mean_accuracy": 0.7736412261063906, + "eval_mean_iou": 0.6695343192185108, + "eval_overall_accuracy": 0.9579999687966336, + "eval_per_category_accuracy": [ + 0.9908575877561856, + 0.6992017357794006, + 0.9181712198745461, + 0.6848209441128594, + 0.8999960890677092, + 0.7552696311147815, + 0.8742896713958109, + 0.6774670199948337, + 0.21112260734609414, + 0.7002295482295482, + 0.7326526195675697, + 0.9190694768970439, + 0.903854108591263, + 0.8790894532143674, + 0.8390131919296431, + 0.7836459403905447, + 0.9263622702292384, + 0.5304289544235925 + ], + "eval_per_category_iou": [ + 0.9836469704651415, + 0.633966585430308, + 0.8044954078680937, + 0.6015848427073404, + 0.7850568307979773, + 0.6209403648906235, + 0.7959676026910106, + 0.5669896894747982, + 0.18577866800200302, + 0.5234388690928209, + 0.5514831104595199, + 0.8385657341774435, + 0.7894441280999038, + 0.79647146518701, + 0.700614242321971, + 0.6860835236027849, + 0.7656166318978664, + 0.42147307876657614 + ], + "eval_runtime": 81.1535, + "eval_samples_per_second": 1.097, + "eval_steps_per_second": 0.148, + "step": 3500 + }, + { + "epoch": 1.59, + "learning_rate": 5.4507489786654565e-05, + "loss": 0.1186, + "step": 3510 + }, + { + "epoch": 1.6, + "learning_rate": 5.443486155242851e-05, + "loss": 0.1306, + "step": 3520 + }, + { + "epoch": 1.6, + "learning_rate": 5.436223331820246e-05, + "loss": 0.1179, + "step": 3530 + }, + { + "epoch": 1.61, + "learning_rate": 5.42896050839764e-05, + "loss": 0.1186, + "step": 3540 + }, + { + "epoch": 1.61, + "learning_rate": 5.4216976849750345e-05, + "loss": 0.113, + "step": 3550 + }, + { + "epoch": 1.62, + "learning_rate": 5.414434861552429e-05, + "loss": 0.1037, + "step": 3560 + }, + { + "epoch": 1.62, + "learning_rate": 5.407172038129823e-05, + "loss": 0.1094, + "step": 3570 + }, + { + "epoch": 1.63, + "learning_rate": 5.399909214707218e-05, + "loss": 0.1148, + "step": 3580 + }, + { + "epoch": 1.63, + "learning_rate": 5.3926463912846125e-05, + "loss": 0.1478, + "step": 3590 + }, + { + "epoch": 1.63, + "learning_rate": 5.385383567862007e-05, + "loss": 0.1159, + "step": 3600 + }, + { + "epoch": 1.63, + "eval_loss": 0.13787628710269928, + "eval_mean_accuracy": 0.7727645737217357, + "eval_mean_iou": 0.6571073050454355, + "eval_overall_accuracy": 0.9573489842789896, + "eval_per_category_accuracy": [ + 0.9921308957265332, + 0.7184054145954436, + 0.8962184989545504, + 0.7112723819858926, + 0.8583624995111334, + 0.7488882215519411, + 0.9106491002794336, + 0.8166342421237263, + 0.25333678220382827, + 0.6308473748473749, + 0.7146314112022665, + 0.920907908418435, + 0.834096181860691, + 0.8645090896291995, + 0.7966459734436971, + 0.7785906988694759, + 0.8413166169350733, + 0.6223190348525469 + ], + "eval_per_category_iou": [ + 0.9837198229522633, + 0.6457848160223568, + 0.8032140149296972, + 0.6036030850696443, + 0.7737964315713036, + 0.6443207897016582, + 0.7965703918921593, + 0.6002291604707481, + 0.22283400072806697, + 0.5215962137976707, + 0.5572404905999033, + 0.8371056660461305, + 0.7682493011698933, + 0.7694771756284191, + 0.692395601101598, + 0.6730952909817859, + 0.7677478761119262, + 0.1669513620426144 + ], + "eval_runtime": 76.762, + "eval_samples_per_second": 1.159, + "eval_steps_per_second": 0.156, + "step": 3600 + }, + { + "epoch": 1.64, + "learning_rate": 5.378120744439401e-05, + "loss": 0.1232, + "step": 3610 + }, + { + "epoch": 1.64, + "learning_rate": 5.3708579210167955e-05, + "loss": 0.1095, + "step": 3620 + }, + { + "epoch": 1.65, + "learning_rate": 5.3635950975941905e-05, + "loss": 0.1243, + "step": 3630 + }, + { + "epoch": 1.65, + "learning_rate": 5.356332274171585e-05, + "loss": 0.1032, + "step": 3640 + }, + { + "epoch": 1.66, + "learning_rate": 5.349069450748979e-05, + "loss": 0.121, + "step": 3650 + }, + { + "epoch": 1.66, + "learning_rate": 5.3418066273263735e-05, + "loss": 0.1096, + "step": 3660 + }, + { + "epoch": 1.67, + "learning_rate": 5.334543803903768e-05, + "loss": 0.1148, + "step": 3670 + }, + { + "epoch": 1.67, + "learning_rate": 5.327280980481162e-05, + "loss": 0.1102, + "step": 3680 + }, + { + "epoch": 1.67, + "learning_rate": 5.320018157058557e-05, + "loss": 0.1158, + "step": 3690 + }, + { + "epoch": 1.68, + "learning_rate": 5.3127553336359515e-05, + "loss": 0.1216, + "step": 3700 + }, + { + "epoch": 1.68, + "eval_loss": 0.13317522406578064, + "eval_mean_accuracy": 0.7891147473677718, + "eval_mean_iou": 0.6805865558672544, + "eval_overall_accuracy": 0.960010357117385, + "eval_per_category_accuracy": [ + 0.9914235455528075, + 0.719619812496964, + 0.9111866540112248, + 0.7301953336950624, + 0.8926250108922895, + 0.6292202314972244, + 0.9373727672279698, + 0.8306318902153749, + 0.35830315571650284, + 0.7416849816849816, + 0.7602549725550375, + 0.916444275756821, + 0.8636989011384277, + 0.8621156925892685, + 0.8228735557854803, + 0.7882130010277493, + 0.8991400085783141, + 0.5490616621983915 + ], + "eval_per_category_iou": [ + 0.983791077686731, + 0.6187506091024266, + 0.8036680300831244, + 0.6082142251850178, + 0.820750824561979, + 0.5742093437981723, + 0.754999908907833, + 0.6557655722789334, + 0.2825900689542617, + 0.5596544633051528, + 0.5896273917421954, + 0.8410978587834871, + 0.7865993481073821, + 0.784696276184641, + 0.6989280287079588, + 0.6807787048954773, + 0.7828143889530667, + 0.4236218843727376 + ], + "eval_runtime": 75.9913, + "eval_samples_per_second": 1.171, + "eval_steps_per_second": 0.158, + "step": 3700 + }, + { + "epoch": 1.68, + "learning_rate": 5.305492510213346e-05, + "loss": 0.1336, + "step": 3710 + }, + { + "epoch": 1.69, + "learning_rate": 5.29822968679074e-05, + "loss": 0.1175, + "step": 3720 + }, + { + "epoch": 1.69, + "learning_rate": 5.2909668633681345e-05, + "loss": 0.1318, + "step": 3730 + }, + { + "epoch": 1.7, + "learning_rate": 5.2837040399455295e-05, + "loss": 0.1106, + "step": 3740 + }, + { + "epoch": 1.7, + "learning_rate": 5.276441216522924e-05, + "loss": 0.099, + "step": 3750 + }, + { + "epoch": 1.71, + "learning_rate": 5.269178393100318e-05, + "loss": 0.1277, + "step": 3760 + }, + { + "epoch": 1.71, + "learning_rate": 5.2619155696777125e-05, + "loss": 0.1208, + "step": 3770 + }, + { + "epoch": 1.72, + "learning_rate": 5.254652746255107e-05, + "loss": 0.1033, + "step": 3780 + }, + { + "epoch": 1.72, + "learning_rate": 5.247389922832502e-05, + "loss": 0.1213, + "step": 3790 + }, + { + "epoch": 1.72, + "learning_rate": 5.240127099409896e-05, + "loss": 0.1368, + "step": 3800 + }, + { + "epoch": 1.72, + "eval_loss": 0.13429704308509827, + "eval_mean_accuracy": 0.775787841116801, + "eval_mean_iou": 0.6742017874273715, + "eval_overall_accuracy": 0.9586353087693118, + "eval_per_category_accuracy": [ + 0.9925796459543752, + 0.7299017147298369, + 0.9100345961263343, + 0.6766820401519262, + 0.8686804994740825, + 0.6663163147690989, + 0.9150859354066403, + 0.8352399687134773, + 0.2617692705638903, + 0.6804200244200245, + 0.7714985539750929, + 0.9155982018953636, + 0.8548459615488373, + 0.8289117082871372, + 0.8381401965856182, + 0.7949640287769785, + 0.8650674385092247, + 0.5584450402144772 + ], + "eval_per_category_iou": [ + 0.9843675219674684, + 0.6595364897290338, + 0.8064101782721317, + 0.6016039556198746, + 0.7936248541800263, + 0.597209880403655, + 0.7952325150637706, + 0.5868266284910741, + 0.22507895556247498, + 0.5462558520691034, + 0.5834058601682622, + 0.8430500293048027, + 0.7802171647119654, + 0.7659144588941692, + 0.7167505391804457, + 0.6883241842279434, + 0.7807226164351516, + 0.38110048941133423 + ], + "eval_runtime": 87.7077, + "eval_samples_per_second": 1.015, + "eval_steps_per_second": 0.137, + "step": 3800 + }, + { + "epoch": 1.73, + "learning_rate": 5.2328642759872905e-05, + "loss": 0.1086, + "step": 3810 + }, + { + "epoch": 1.73, + "learning_rate": 5.225601452564685e-05, + "loss": 0.124, + "step": 3820 + }, + { + "epoch": 1.74, + "learning_rate": 5.218338629142079e-05, + "loss": 0.0965, + "step": 3830 + }, + { + "epoch": 1.74, + "learning_rate": 5.211075805719474e-05, + "loss": 0.109, + "step": 3840 + }, + { + "epoch": 1.75, + "learning_rate": 5.2038129822968685e-05, + "loss": 0.1345, + "step": 3850 + }, + { + "epoch": 1.75, + "learning_rate": 5.196550158874263e-05, + "loss": 0.1153, + "step": 3860 + }, + { + "epoch": 1.76, + "learning_rate": 5.189287335451657e-05, + "loss": 0.1179, + "step": 3870 + }, + { + "epoch": 1.76, + "learning_rate": 5.1820245120290515e-05, + "loss": 0.1069, + "step": 3880 + }, + { + "epoch": 1.77, + "learning_rate": 5.1747616886064465e-05, + "loss": 0.1188, + "step": 3890 + }, + { + "epoch": 1.77, + "learning_rate": 5.167498865183841e-05, + "loss": 0.1007, + "step": 3900 + }, + { + "epoch": 1.77, + "eval_loss": 0.14351992309093475, + "eval_mean_accuracy": 0.7664950013504883, + "eval_mean_iou": 0.6695093049208686, + "eval_overall_accuracy": 0.9566625102182452, + "eval_per_category_accuracy": [ + 0.9922738412564651, + 0.7236516135300118, + 0.884450313634863, + 0.7062533912099838, + 0.8947739652668045, + 0.6356767164666864, + 0.9453869094224459, + 0.7349442641073858, + 0.2869115364718055, + 0.6710525030525031, + 0.6838910857974778, + 0.927684406542444, + 0.8642294480513641, + 0.8367650423244108, + 0.812623943783411, + 0.7936343782117163, + 0.8520364238921477, + 0.5506702412868633 + ], + "eval_per_category_iou": [ + 0.9842857601121769, + 0.6512020982077809, + 0.8017007587329098, + 0.6066767653227686, + 0.7885149683436959, + 0.5786775909271819, + 0.7782877114577514, + 0.5204883271991803, + 0.24535480445938773, + 0.5381951788917875, + 0.5561617535298589, + 0.8366714566388658, + 0.7948352148203284, + 0.7821566780857128, + 0.7020867476162098, + 0.6825906323547286, + 0.7816882668342591, + 0.4215927750410509 + ], + "eval_runtime": 95.7785, + "eval_samples_per_second": 0.929, + "eval_steps_per_second": 0.125, + "step": 3900 + }, + { + "epoch": 1.77, + "learning_rate": 5.160236041761235e-05, + "loss": 0.104, + "step": 3910 + }, + { + "epoch": 1.78, + "learning_rate": 5.1529732183386295e-05, + "loss": 0.0936, + "step": 3920 + }, + { + "epoch": 1.78, + "learning_rate": 5.145710394916024e-05, + "loss": 0.0966, + "step": 3930 + }, + { + "epoch": 1.79, + "learning_rate": 5.138447571493418e-05, + "loss": 0.1005, + "step": 3940 + }, + { + "epoch": 1.79, + "learning_rate": 5.131184748070813e-05, + "loss": 0.1222, + "step": 3950 + }, + { + "epoch": 1.8, + "learning_rate": 5.1239219246482076e-05, + "loss": 0.1345, + "step": 3960 + }, + { + "epoch": 1.8, + "learning_rate": 5.116659101225602e-05, + "loss": 0.114, + "step": 3970 + }, + { + "epoch": 1.81, + "learning_rate": 5.109396277802996e-05, + "loss": 0.1373, + "step": 3980 + }, + { + "epoch": 1.81, + "learning_rate": 5.1021334543803906e-05, + "loss": 0.1069, + "step": 3990 + }, + { + "epoch": 1.82, + "learning_rate": 5.0948706309577856e-05, + "loss": 0.137, + "step": 4000 + }, + { + "epoch": 1.82, + "eval_loss": 0.13786786794662476, + "eval_mean_accuracy": 0.7728080994273032, + "eval_mean_iou": 0.6646466356753247, + "eval_overall_accuracy": 0.9552087676659059, + "eval_per_category_accuracy": [ + 0.9923036839786628, + 0.7142764617302741, + 0.9116130873775724, + 0.6671188279978296, + 0.8388544947933141, + 0.676539818670812, + 0.888999084794009, + 0.7545236301759596, + 0.28965338851526123, + 0.662954822954823, + 0.6863011273092132, + 0.930776134391321, + 0.8912154020466522, + 0.8834054872146841, + 0.8102690118986032, + 0.7838514902363823, + 0.9125413101369136, + 0.6153485254691688 + ], + "eval_per_category_iou": [ + 0.9843104882002514, + 0.6397175051118813, + 0.8053449145384836, + 0.5962296175062133, + 0.7682931274146803, + 0.5578943535024309, + 0.8146034185608627, + 0.4992255233095971, + 0.25318802568508636, + 0.5288708797631108, + 0.545629443093214, + 0.8386195890684222, + 0.7933122548627232, + 0.8009565637104141, + 0.6963529003126085, + 0.6757295531314026, + 0.7936759275163174, + 0.37168535686814297 + ], + "eval_runtime": 87.0618, + "eval_samples_per_second": 1.022, + "eval_steps_per_second": 0.138, + "step": 4000 + }, + { + "epoch": 1.82, + "learning_rate": 5.08760780753518e-05, + "loss": 0.1123, + "step": 4010 + }, + { + "epoch": 1.82, + "learning_rate": 5.080344984112574e-05, + "loss": 0.1155, + "step": 4020 + }, + { + "epoch": 1.83, + "learning_rate": 5.0730821606899686e-05, + "loss": 0.1171, + "step": 4030 + }, + { + "epoch": 1.83, + "learning_rate": 5.065819337267363e-05, + "loss": 0.1139, + "step": 4040 + }, + { + "epoch": 1.84, + "learning_rate": 5.058556513844758e-05, + "loss": 0.0967, + "step": 4050 + }, + { + "epoch": 1.84, + "learning_rate": 5.051293690422152e-05, + "loss": 0.1091, + "step": 4060 + }, + { + "epoch": 1.85, + "learning_rate": 5.0440308669995466e-05, + "loss": 0.1095, + "step": 4070 + }, + { + "epoch": 1.85, + "learning_rate": 5.036768043576941e-05, + "loss": 0.1095, + "step": 4080 + }, + { + "epoch": 1.86, + "learning_rate": 5.029505220154335e-05, + "loss": 0.1035, + "step": 4090 + }, + { + "epoch": 1.86, + "learning_rate": 5.02224239673173e-05, + "loss": 0.1135, + "step": 4100 + }, + { + "epoch": 1.86, + "eval_loss": 0.13894569873809814, + "eval_mean_accuracy": 0.7828193365629676, + "eval_mean_iou": 0.6788571097023736, + "eval_overall_accuracy": 0.9589751597200886, + "eval_per_category_accuracy": [ + 0.9924508452091282, + 0.7176120079664502, + 0.8960138797182788, + 0.7147314161692893, + 0.8934531679580748, + 0.6898369980436232, + 0.9350029752894382, + 0.7650439944146693, + 0.3213140196585618, + 0.7172063492063492, + 0.7081488913809046, + 0.9127634590978638, + 0.8908422207434851, + 0.8344860355841824, + 0.8294641317468529, + 0.8032695272353546, + 0.838746208416005, + 0.6303619302949062 + ], + "eval_per_category_iou": [ + 0.9845350675889538, + 0.6610336341263331, + 0.8014127569522836, + 0.610014471780029, + 0.788742171126752, + 0.6212098013755968, + 0.782461315794455, + 0.5891649614037918, + 0.2686186316062624, + 0.541830552501273, + 0.55696192738273, + 0.839407061595349, + 0.7836525150889516, + 0.7900103295458332, + 0.7043775912722746, + 0.69891685855447, + 0.7882008380333764, + 0.4088774889140075 + ], + "eval_runtime": 81.5041, + "eval_samples_per_second": 1.092, + "eval_steps_per_second": 0.147, + "step": 4100 + }, + { + "epoch": 1.87, + "learning_rate": 5.0149795733091246e-05, + "loss": 0.1147, + "step": 4110 + }, + { + "epoch": 1.87, + "learning_rate": 5.007716749886519e-05, + "loss": 0.1361, + "step": 4120 + }, + { + "epoch": 1.87, + "learning_rate": 5.000453926463913e-05, + "loss": 0.1051, + "step": 4130 + }, + { + "epoch": 1.88, + "learning_rate": 4.9931911030413076e-05, + "loss": 0.127, + "step": 4140 + }, + { + "epoch": 1.88, + "learning_rate": 4.9859282796187026e-05, + "loss": 0.1275, + "step": 4150 + }, + { + "epoch": 1.89, + "learning_rate": 4.978665456196097e-05, + "loss": 0.1328, + "step": 4160 + }, + { + "epoch": 1.89, + "learning_rate": 4.971402632773491e-05, + "loss": 0.1083, + "step": 4170 + }, + { + "epoch": 1.9, + "learning_rate": 4.9641398093508856e-05, + "loss": 0.152, + "step": 4180 + }, + { + "epoch": 1.9, + "learning_rate": 4.95687698592828e-05, + "loss": 0.099, + "step": 4190 + }, + { + "epoch": 1.91, + "learning_rate": 4.949614162505674e-05, + "loss": 0.1281, + "step": 4200 + }, + { + "epoch": 1.91, + "eval_loss": 0.1334676295518875, + "eval_mean_accuracy": 0.7880120308161696, + "eval_mean_iou": 0.6755332328718038, + "eval_overall_accuracy": 0.9584213428283006, + "eval_per_category_accuracy": [ + 0.9914689198032866, + 0.7314561440437831, + 0.9120068504456916, + 0.7297205642973413, + 0.8634865069405325, + 0.680596098728576, + 0.900973319831435, + 0.8149055352939795, + 0.3026901189860321, + 0.7354725274725274, + 0.7353676051073206, + 0.9191090130587942, + 0.8866922648058558, + 0.8776507752142618, + 0.7936497672012416, + 0.7969874100719424, + 0.9242485489380479, + 0.5877345844504022 + ], + "eval_per_category_iou": [ + 0.9841648128511729, + 0.6392155198030309, + 0.8042780067509167, + 0.6069957686882934, + 0.7780489101673204, + 0.6120132319886901, + 0.7829185799149376, + 0.6028501099793939, + 0.2566340628974955, + 0.5437410633043026, + 0.5742686823992134, + 0.8401808496028104, + 0.798286128325838, + 0.7950475674253807, + 0.691800383300139, + 0.6887966113926932, + 0.7933927204435226, + 0.366965182457315 + ], + "eval_runtime": 80.5596, + "eval_samples_per_second": 1.105, + "eval_steps_per_second": 0.149, + "step": 4200 + }, + { + "epoch": 1.91, + "learning_rate": 4.942351339083069e-05, + "loss": 0.1076, + "step": 4210 + }, + { + "epoch": 1.92, + "learning_rate": 4.9350885156604636e-05, + "loss": 0.1024, + "step": 4220 + }, + { + "epoch": 1.92, + "learning_rate": 4.927825692237858e-05, + "loss": 0.1067, + "step": 4230 + }, + { + "epoch": 1.92, + "learning_rate": 4.920562868815252e-05, + "loss": 0.1173, + "step": 4240 + }, + { + "epoch": 1.93, + "learning_rate": 4.9133000453926466e-05, + "loss": 0.1102, + "step": 4250 + }, + { + "epoch": 1.93, + "learning_rate": 4.9060372219700416e-05, + "loss": 0.0959, + "step": 4260 + }, + { + "epoch": 1.94, + "learning_rate": 4.898774398547436e-05, + "loss": 0.1285, + "step": 4270 + }, + { + "epoch": 1.94, + "learning_rate": 4.89151157512483e-05, + "loss": 0.1278, + "step": 4280 + }, + { + "epoch": 1.95, + "learning_rate": 4.8842487517022246e-05, + "loss": 0.1186, + "step": 4290 + }, + { + "epoch": 1.95, + "learning_rate": 4.876985928279619e-05, + "loss": 0.1245, + "step": 4300 + }, + { + "epoch": 1.95, + "eval_loss": 0.13443762063980103, + "eval_mean_accuracy": 0.7825141928725435, + "eval_mean_iou": 0.6684080027410144, + "eval_overall_accuracy": 0.9573094228680215, + "eval_per_category_accuracy": [ + 0.9919866189227488, + 0.7269709677941677, + 0.9073831435017057, + 0.7095767769940314, + 0.8775246954790309, + 0.6467724199453274, + 0.8928182428740948, + 0.7752861759451438, + 0.3290222452146922, + 0.645997557997558, + 0.7973597749316336, + 0.9167328897375985, + 0.8575661385177059, + 0.886784400682822, + 0.8199204604242111, + 0.7835881294964029, + 0.9045492836490544, + 0.6154155495978553 + ], + "eval_per_category_iou": [ + 0.9841008078941216, + 0.6601237998617911, + 0.8036749084318817, + 0.6052296656253615, + 0.7720933405454484, + 0.5803545423384261, + 0.7963377956006369, + 0.5713431842385517, + 0.2788862091646569, + 0.5162121235774388, + 0.5750017734269702, + 0.8372946231899758, + 0.7948864346738904, + 0.8019631403880126, + 0.7034095374586811, + 0.6923875903874315, + 0.7961282648075675, + 0.26191630772741537 + ], + "eval_runtime": 80.2554, + "eval_samples_per_second": 1.109, + "eval_steps_per_second": 0.15, + "step": 4300 + }, + { + "epoch": 1.96, + "learning_rate": 4.869723104857014e-05, + "loss": 0.1116, + "step": 4310 + }, + { + "epoch": 1.96, + "learning_rate": 4.862460281434408e-05, + "loss": 0.1118, + "step": 4320 + }, + { + "epoch": 1.97, + "learning_rate": 4.8551974580118026e-05, + "loss": 0.1187, + "step": 4330 + }, + { + "epoch": 1.97, + "learning_rate": 4.847934634589197e-05, + "loss": 0.1381, + "step": 4340 + }, + { + "epoch": 1.97, + "learning_rate": 4.840671811166591e-05, + "loss": 0.1106, + "step": 4350 + }, + { + "epoch": 1.98, + "learning_rate": 4.833408987743986e-05, + "loss": 0.0977, + "step": 4360 + }, + { + "epoch": 1.98, + "learning_rate": 4.8261461643213807e-05, + "loss": 0.1118, + "step": 4370 + }, + { + "epoch": 1.99, + "learning_rate": 4.818883340898775e-05, + "loss": 0.143, + "step": 4380 + }, + { + "epoch": 1.99, + "learning_rate": 4.811620517476169e-05, + "loss": 0.1297, + "step": 4390 + }, + { + "epoch": 2.0, + "learning_rate": 4.8043576940535637e-05, + "loss": 0.1279, + "step": 4400 + }, + { + "epoch": 2.0, + "eval_loss": 0.12572704255580902, + "eval_mean_accuracy": 0.7862403936198279, + "eval_mean_iou": 0.6747473672019839, + "eval_overall_accuracy": 0.9576788912998156, + "eval_per_category_accuracy": [ + 0.9915765311063784, + 0.7196683884130248, + 0.9192923269505887, + 0.7331795984807379, + 0.8581648945111781, + 0.7115117095553328, + 0.9112928763567146, + 0.7519079901660606, + 0.29948266942576307, + 0.7491965811965812, + 0.683487772728167, + 0.9157444856938398, + 0.8900823696563135, + 0.8693926755011175, + 0.8411364028280738, + 0.8102389516957862, + 0.9084990110252937, + 0.5884718498659517 + ], + "eval_per_category_iou": [ + 0.9844717060862208, + 0.6535599800017645, + 0.8063888964304353, + 0.6055343939054447, + 0.7820480995831319, + 0.5855220360295534, + 0.8090355387973314, + 0.5450858581824229, + 0.24937537692771602, + 0.5302421724311955, + 0.5408211714341311, + 0.8375502175037155, + 0.8053053786011244, + 0.800377494521874, + 0.7112820466082772, + 0.7012536483669215, + 0.8107974135557, + 0.3868011806687519 + ], + "eval_runtime": 80.6668, + "eval_samples_per_second": 1.103, + "eval_steps_per_second": 0.149, + "step": 4400 + }, + { + "epoch": 2.0, + "learning_rate": 4.797094870630959e-05, + "loss": 0.1082, + "step": 4410 + }, + { + "epoch": 2.01, + "learning_rate": 4.789832047208353e-05, + "loss": 0.0905, + "step": 4420 + }, + { + "epoch": 2.01, + "learning_rate": 4.782569223785747e-05, + "loss": 0.0983, + "step": 4430 + }, + { + "epoch": 2.02, + "learning_rate": 4.775306400363142e-05, + "loss": 0.1054, + "step": 4440 + }, + { + "epoch": 2.02, + "learning_rate": 4.768043576940536e-05, + "loss": 0.0985, + "step": 4450 + }, + { + "epoch": 2.02, + "learning_rate": 4.760780753517931e-05, + "loss": 0.1075, + "step": 4460 + }, + { + "epoch": 2.03, + "learning_rate": 4.7535179300953253e-05, + "loss": 0.0946, + "step": 4470 + }, + { + "epoch": 2.03, + "learning_rate": 4.74625510667272e-05, + "loss": 0.0959, + "step": 4480 + }, + { + "epoch": 2.04, + "learning_rate": 4.738992283250114e-05, + "loss": 0.1003, + "step": 4490 + }, + { + "epoch": 2.04, + "learning_rate": 4.7317294598275083e-05, + "loss": 0.1002, + "step": 4500 + }, + { + "epoch": 2.04, + "eval_loss": 0.13972726464271545, + "eval_mean_accuracy": 0.7788409927090669, + "eval_mean_iou": 0.6773702141156384, + "eval_overall_accuracy": 0.9584378017468399, + "eval_per_category_accuracy": [ + 0.9922478814163378, + 0.673666995903431, + 0.9096373940794542, + 0.7090341833966359, + 0.8820256982557928, + 0.7314044842098755, + 0.8473119738870504, + 0.7690487415845215, + 0.28660113812726334, + 0.7365860805860805, + 0.7050010820594542, + 0.9094859112887602, + 0.8745706166933439, + 0.9047216796015698, + 0.8286342472840145, + 0.810129753340185, + 0.8998342965936688, + 0.5491957104557641 + ], + "eval_per_category_iou": [ + 0.9850577942021761, + 0.6230905169831666, + 0.8079639313816529, + 0.6100606909430439, + 0.7888574153189829, + 0.6199027954268561, + 0.7932197150475545, + 0.5528114514838938, + 0.25061069392924995, + 0.5259310922025388, + 0.5354586614467373, + 0.841570176882694, + 0.7832103915347324, + 0.7804022770398482, + 0.710008680715882, + 0.6975333222719983, + 0.8216968638169574, + 0.4652773834535234 + ], + "eval_runtime": 80.0929, + "eval_samples_per_second": 1.111, + "eval_steps_per_second": 0.15, + "step": 4500 + }, + { + "epoch": 2.05, + "learning_rate": 4.724466636404903e-05, + "loss": 0.1034, + "step": 4510 + }, + { + "epoch": 2.05, + "learning_rate": 4.717203812982298e-05, + "loss": 0.1153, + "step": 4520 + }, + { + "epoch": 2.06, + "learning_rate": 4.709940989559692e-05, + "loss": 0.0998, + "step": 4530 + }, + { + "epoch": 2.06, + "learning_rate": 4.7026781661370864e-05, + "loss": 0.1161, + "step": 4540 + }, + { + "epoch": 2.07, + "learning_rate": 4.695415342714481e-05, + "loss": 0.1006, + "step": 4550 + }, + { + "epoch": 2.07, + "learning_rate": 4.688152519291875e-05, + "loss": 0.1049, + "step": 4560 + }, + { + "epoch": 2.07, + "learning_rate": 4.68088969586927e-05, + "loss": 0.1089, + "step": 4570 + }, + { + "epoch": 2.08, + "learning_rate": 4.6736268724466644e-05, + "loss": 0.1044, + "step": 4580 + }, + { + "epoch": 2.08, + "learning_rate": 4.666364049024059e-05, + "loss": 0.098, + "step": 4590 + }, + { + "epoch": 2.09, + "learning_rate": 4.659101225601453e-05, + "loss": 0.0907, + "step": 4600 + }, + { + "epoch": 2.09, + "eval_loss": 0.13046495616436005, + "eval_mean_accuracy": 0.7811944164041577, + "eval_mean_iou": 0.6782751969723191, + "eval_overall_accuracy": 0.9586188927125395, + "eval_per_category_accuracy": [ + 0.9928277621186691, + 0.7083987758869152, + 0.9055965940354352, + 0.6987927292457949, + 0.8713186634491815, + 0.7051015946899607, + 0.878982277018586, + 0.8087493880883609, + 0.31200206932229696, + 0.7381587301587301, + 0.6864093332546382, + 0.9234421763866321, + 0.8785721993417621, + 0.8796526054590571, + 0.841713010863942, + 0.7948805241521069, + 0.8484292119368168, + 0.5884718498659517 + ], + "eval_per_category_iou": [ + 0.9844003862902916, + 0.6621263715474839, + 0.8059651700180577, + 0.6015999065747986, + 0.785216312190489, + 0.6208224621761497, + 0.8085146904898324, + 0.5663717485850763, + 0.2662693156732892, + 0.5292836331036028, + 0.5412749387197865, + 0.8419965464907948, + 0.807288546628603, + 0.8064357394082153, + 0.7138417523959252, + 0.7065443292376559, + 0.7819161642589012, + 0.3790855317127931 + ], + "eval_runtime": 80.5469, + "eval_samples_per_second": 1.105, + "eval_steps_per_second": 0.149, + "step": 4600 + }, + { + "epoch": 2.09, + "learning_rate": 4.6518384021788474e-05, + "loss": 0.1112, + "step": 4610 + }, + { + "epoch": 2.1, + "learning_rate": 4.6445755787562424e-05, + "loss": 0.0881, + "step": 4620 + }, + { + "epoch": 2.1, + "learning_rate": 4.637312755333637e-05, + "loss": 0.1018, + "step": 4630 + }, + { + "epoch": 2.11, + "learning_rate": 4.630049931911031e-05, + "loss": 0.0866, + "step": 4640 + }, + { + "epoch": 2.11, + "learning_rate": 4.6227871084884254e-05, + "loss": 0.1176, + "step": 4650 + }, + { + "epoch": 2.12, + "learning_rate": 4.61552428506582e-05, + "loss": 0.088, + "step": 4660 + }, + { + "epoch": 2.12, + "learning_rate": 4.608261461643215e-05, + "loss": 0.096, + "step": 4670 + }, + { + "epoch": 2.12, + "learning_rate": 4.600998638220609e-05, + "loss": 0.1117, + "step": 4680 + }, + { + "epoch": 2.13, + "learning_rate": 4.5937358147980034e-05, + "loss": 0.0836, + "step": 4690 + }, + { + "epoch": 2.13, + "learning_rate": 4.586472991375398e-05, + "loss": 0.1096, + "step": 4700 + }, + { + "epoch": 2.13, + "eval_loss": 0.13663238286972046, + "eval_mean_accuracy": 0.7862924352091032, + "eval_mean_iou": 0.6753792995078041, + "eval_overall_accuracy": 0.9587173890531733, + "eval_per_category_accuracy": [ + 0.9912568589725028, + 0.7287844686604381, + 0.9090837184989545, + 0.7161557243624526, + 0.8716486912442459, + 0.733279161275222, + 0.9053092387086896, + 0.7746160085116683, + 0.2463528194516296, + 0.724014652014652, + 0.7265930866237778, + 0.9221967872914961, + 0.8754833372300056, + 0.8802377558383049, + 0.8189827987584066, + 0.7959789311408016, + 0.9121771946444165, + 0.621112600536193 + ], + "eval_per_category_iou": [ + 0.9840687506780187, + 0.6647319450598139, + 0.8056522779326183, + 0.6075723574428908, + 0.779166766332831, + 0.6165828987116383, + 0.818051751624903, + 0.5792731359655048, + 0.21812019054598755, + 0.5545737501309351, + 0.5726912550299665, + 0.8376480896923469, + 0.798322325129454, + 0.790813220866898, + 0.7083919865012259, + 0.6933484031243705, + 0.8084097453722061, + 0.31940854099886257 + ], + "eval_runtime": 81.8556, + "eval_samples_per_second": 1.087, + "eval_steps_per_second": 0.147, + "step": 4700 + }, + { + "epoch": 2.14, + "learning_rate": 4.579210167952792e-05, + "loss": 0.1073, + "step": 4710 + }, + { + "epoch": 2.14, + "learning_rate": 4.571947344530187e-05, + "loss": 0.1135, + "step": 4720 + }, + { + "epoch": 2.15, + "learning_rate": 4.5646845211075814e-05, + "loss": 0.0965, + "step": 4730 + }, + { + "epoch": 2.15, + "learning_rate": 4.557421697684976e-05, + "loss": 0.0962, + "step": 4740 + }, + { + "epoch": 2.16, + "learning_rate": 4.55015887426237e-05, + "loss": 0.1166, + "step": 4750 + }, + { + "epoch": 2.16, + "learning_rate": 4.5428960508397644e-05, + "loss": 0.0883, + "step": 4760 + }, + { + "epoch": 2.17, + "learning_rate": 4.535633227417159e-05, + "loss": 0.1268, + "step": 4770 + }, + { + "epoch": 2.17, + "learning_rate": 4.528370403994554e-05, + "loss": 0.0955, + "step": 4780 + }, + { + "epoch": 2.17, + "learning_rate": 4.521107580571948e-05, + "loss": 0.0928, + "step": 4790 + }, + { + "epoch": 2.18, + "learning_rate": 4.5138447571493424e-05, + "loss": 0.0943, + "step": 4800 + }, + { + "epoch": 2.18, + "eval_loss": 0.1498226672410965, + "eval_mean_accuracy": 0.7786997550272359, + "eval_mean_iou": 0.664842613989284, + "eval_overall_accuracy": 0.9562002031990652, + "eval_per_category_accuracy": [ + 0.9923740750836234, + 0.7165109538690717, + 0.8956235556289205, + 0.6829896907216495, + 0.8612250274965986, + 0.6496959446031824, + 0.9242658342815981, + 0.7623831948742127, + 0.30144852560786345, + 0.7222466422466423, + 0.7190875287729446, + 0.9306417114413699, + 0.8555203855907055, + 0.8759173222110765, + 0.8311023452319365, + 0.8024922918807811, + 0.846287719125012, + 0.6467828418230563 + ], + "eval_per_category_iou": [ + 0.9839855105743271, + 0.6651285134525777, + 0.8047618311882947, + 0.5993334126889656, + 0.7710710817091653, + 0.5758863388149268, + 0.7987690817885456, + 0.5345619201418574, + 0.2649118021458447, + 0.5450390682588825, + 0.5745082166911609, + 0.8368648587152832, + 0.7960356772315004, + 0.7922615920920681, + 0.7041768637907733, + 0.6976128653991122, + 0.7758146139724083, + 0.2464438031514161 + ], + "eval_runtime": 79.8882, + "eval_samples_per_second": 1.114, + "eval_steps_per_second": 0.15, + "step": 4800 + }, + { + "epoch": 2.18, + "learning_rate": 4.506581933726737e-05, + "loss": 0.1111, + "step": 4810 + }, + { + "epoch": 2.19, + "learning_rate": 4.499319110304131e-05, + "loss": 0.1019, + "step": 4820 + }, + { + "epoch": 2.19, + "learning_rate": 4.492056286881526e-05, + "loss": 0.1044, + "step": 4830 + }, + { + "epoch": 2.2, + "learning_rate": 4.4847934634589204e-05, + "loss": 0.1078, + "step": 4840 + }, + { + "epoch": 2.2, + "learning_rate": 4.477530640036315e-05, + "loss": 0.1105, + "step": 4850 + }, + { + "epoch": 2.21, + "learning_rate": 4.470267816613709e-05, + "loss": 0.1199, + "step": 4860 + }, + { + "epoch": 2.21, + "learning_rate": 4.4630049931911034e-05, + "loss": 0.0992, + "step": 4870 + }, + { + "epoch": 2.22, + "learning_rate": 4.4557421697684984e-05, + "loss": 0.1098, + "step": 4880 + }, + { + "epoch": 2.22, + "learning_rate": 4.448479346345893e-05, + "loss": 0.0953, + "step": 4890 + }, + { + "epoch": 2.22, + "learning_rate": 4.441216522923287e-05, + "loss": 0.0964, + "step": 4900 + }, + { + "epoch": 2.22, + "eval_loss": 0.1363184005022049, + "eval_mean_accuracy": 0.779909349604894, + "eval_mean_iou": 0.6639330164541964, + "eval_overall_accuracy": 0.955620283491156, + "eval_per_category_accuracy": [ + 0.9925093657889024, + 0.7195226606648424, + 0.9051151370089139, + 0.6953336950623983, + 0.8525475607103625, + 0.6841136023388197, + 0.9184005122369653, + 0.6965911779953865, + 0.2818416968442835, + 0.7013137973137973, + 0.7799976391430089, + 0.926304594497357, + 0.8919932377749402, + 0.8673468489872059, + 0.845878599758579, + 0.8095002569373073, + 0.8552455773853422, + 0.6148123324396783 + ], + "eval_per_category_iou": [ + 0.9848753150184794, + 0.6500724139444387, + 0.8076650433149517, + 0.6053020015351007, + 0.7691890553423301, + 0.562433739489086, + 0.8069825680415114, + 0.486605441610325, + 0.23783123062819225, + 0.5461301487087818, + 0.5789838775629418, + 0.8411738729261734, + 0.8066273632852206, + 0.7983356280877946, + 0.7129400009083889, + 0.6961135231277411, + 0.7901193038470858, + 0.26941376879699247 + ], + "eval_runtime": 78.9031, + "eval_samples_per_second": 1.128, + "eval_steps_per_second": 0.152, + "step": 4900 + }, + { + "epoch": 2.23, + "learning_rate": 4.4339536995006814e-05, + "loss": 0.0879, + "step": 4910 + }, + { + "epoch": 2.23, + "learning_rate": 4.426690876078076e-05, + "loss": 0.0961, + "step": 4920 + }, + { + "epoch": 2.24, + "learning_rate": 4.419428052655471e-05, + "loss": 0.1006, + "step": 4930 + }, + { + "epoch": 2.24, + "learning_rate": 4.412165229232865e-05, + "loss": 0.1023, + "step": 4940 + }, + { + "epoch": 2.25, + "learning_rate": 4.4049024058102595e-05, + "loss": 0.1079, + "step": 4950 + }, + { + "epoch": 2.25, + "learning_rate": 4.397639582387654e-05, + "loss": 0.0982, + "step": 4960 + }, + { + "epoch": 2.26, + "learning_rate": 4.390376758965048e-05, + "loss": 0.1068, + "step": 4970 + }, + { + "epoch": 2.26, + "learning_rate": 4.383113935542443e-05, + "loss": 0.0893, + "step": 4980 + }, + { + "epoch": 2.27, + "learning_rate": 4.3758511121198375e-05, + "loss": 0.1011, + "step": 4990 + }, + { + "epoch": 2.27, + "learning_rate": 4.368588288697232e-05, + "loss": 0.1051, + "step": 5000 + }, + { + "epoch": 2.27, + "eval_loss": 0.13948704302310944, + "eval_mean_accuracy": 0.7862352771041999, + "eval_mean_iou": 0.6728738609815013, + "eval_overall_accuracy": 0.958048488316911, + "eval_per_category_accuracy": [ + 0.9922981370042766, + 0.7038812156932592, + 0.9110095466050402, + 0.7093733043950081, + 0.8633389893190381, + 0.6356612597653231, + 0.922238809605835, + 0.8056749272479963, + 0.34345576823590274, + 0.6931965811965812, + 0.7369119990556572, + 0.9255296857270503, + 0.8810495836555582, + 0.8887510339123242, + 0.8609135195723401, + 0.7904098150051387, + 0.8652834392251129, + 0.6232573726541555 + ], + "eval_per_category_iou": [ + 0.9845730934839846, + 0.6543289782647962, + 0.8066816691078623, + 0.6087892898719441, + 0.776111375309405, + 0.5984657762070579, + 0.8045457788220866, + 0.5666638292141569, + 0.2786217894913547, + 0.5356773526370218, + 0.5755764369626516, + 0.8442773419890001, + 0.8032563783039286, + 0.796446848608625, + 0.7188146788330364, + 0.704896715282472, + 0.7796387273996102, + 0.2743634378780279 + ], + "eval_runtime": 75.4373, + "eval_samples_per_second": 1.18, + "eval_steps_per_second": 0.159, + "step": 5000 + }, + { + "epoch": 2.27, + "learning_rate": 4.361325465274626e-05, + "loss": 0.0912, + "step": 5010 + }, + { + "epoch": 2.28, + "learning_rate": 4.3540626418520205e-05, + "loss": 0.1056, + "step": 5020 + }, + { + "epoch": 2.28, + "learning_rate": 4.346799818429415e-05, + "loss": 0.1234, + "step": 5030 + }, + { + "epoch": 2.29, + "learning_rate": 4.33953699500681e-05, + "loss": 0.1045, + "step": 5040 + }, + { + "epoch": 2.29, + "learning_rate": 4.332274171584204e-05, + "loss": 0.1124, + "step": 5050 + }, + { + "epoch": 2.3, + "learning_rate": 4.3250113481615985e-05, + "loss": 0.0884, + "step": 5060 + }, + { + "epoch": 2.3, + "learning_rate": 4.317748524738993e-05, + "loss": 0.0937, + "step": 5070 + }, + { + "epoch": 2.31, + "learning_rate": 4.310485701316387e-05, + "loss": 0.0971, + "step": 5080 + }, + { + "epoch": 2.31, + "learning_rate": 4.303222877893782e-05, + "loss": 0.0995, + "step": 5090 + }, + { + "epoch": 2.32, + "learning_rate": 4.2959600544711765e-05, + "loss": 0.0963, + "step": 5100 + }, + { + "epoch": 2.32, + "eval_loss": 0.13856534659862518, + "eval_mean_accuracy": 0.7810347220217441, + "eval_mean_iou": 0.6805732655966783, + "eval_overall_accuracy": 0.9593023235878247, + "eval_per_category_accuracy": [ + 0.9915753662417573, + 0.7019867549668874, + 0.9124057719819523, + 0.6568095496473142, + 0.8796743359820179, + 0.7558415290652228, + 0.8941475534769128, + 0.7791572509045454, + 0.29218830832902226, + 0.7648644688644689, + 0.6649747191563871, + 0.9269015905397873, + 0.8923484344369909, + 0.8519305562888267, + 0.7913433350577685, + 0.8191932168550874, + 0.8974243457492602, + 0.585857908847185 + ], + "eval_per_category_iou": [ + 0.9842123272869723, + 0.6475578790141897, + 0.8065977864490307, + 0.5950961715725435, + 0.7896473057668808, + 0.636168832737687, + 0.8243746170060541, + 0.5740744437745472, + 0.25528837461580184, + 0.5439937196490229, + 0.5443798418399395, + 0.8415165992455051, + 0.8064600018691664, + 0.7964307307583155, + 0.6916026939198417, + 0.693300860564613, + 0.7965899286487079, + 0.4230266660213909 + ], + "eval_runtime": 75.6227, + "eval_samples_per_second": 1.177, + "eval_steps_per_second": 0.159, + "step": 5100 + }, + { + "epoch": 2.32, + "learning_rate": 4.288697231048571e-05, + "loss": 0.1163, + "step": 5110 + }, + { + "epoch": 2.32, + "learning_rate": 4.281434407625965e-05, + "loss": 0.101, + "step": 5120 + }, + { + "epoch": 2.33, + "learning_rate": 4.2741715842033595e-05, + "loss": 0.1173, + "step": 5130 + }, + { + "epoch": 2.33, + "learning_rate": 4.2669087607807545e-05, + "loss": 0.1094, + "step": 5140 + }, + { + "epoch": 2.34, + "learning_rate": 4.259645937358149e-05, + "loss": 0.0956, + "step": 5150 + }, + { + "epoch": 2.34, + "learning_rate": 4.252383113935543e-05, + "loss": 0.1393, + "step": 5160 + }, + { + "epoch": 2.35, + "learning_rate": 4.2451202905129375e-05, + "loss": 0.1037, + "step": 5170 + }, + { + "epoch": 2.35, + "learning_rate": 4.237857467090332e-05, + "loss": 0.1083, + "step": 5180 + }, + { + "epoch": 2.36, + "learning_rate": 4.230594643667727e-05, + "loss": 0.0906, + "step": 5190 + }, + { + "epoch": 2.36, + "learning_rate": 4.223331820245121e-05, + "loss": 0.1062, + "step": 5200 + }, + { + "epoch": 2.36, + "eval_loss": 0.14069417119026184, + "eval_mean_accuracy": 0.793414086796973, + "eval_mean_iou": 0.6864809607686351, + "eval_overall_accuracy": 0.9608978957272647, + "eval_per_category_accuracy": [ + 0.9927017903303589, + 0.7209151702585858, + 0.9042227220204688, + 0.7253798155181769, + 0.8823262225265583, + 0.7159411585460101, + 0.9293168665852377, + 0.7939551258776755, + 0.3227625452664252, + 0.7244737484737485, + 0.7663440162112181, + 0.9198048495056003, + 0.872911533550348, + 0.8701362124491843, + 0.8376929211933092, + 0.7936729188078109, + 0.8828905832945046, + 0.6260053619302949 + ], + "eval_per_category_iou": [ + 0.9851730866254734, + 0.6629984811031361, + 0.8096963332727704, + 0.6109333942648235, + 0.7896189845195383, + 0.6560161214939089, + 0.8236946973622523, + 0.5911623165092575, + 0.2780055253542465, + 0.5391958008912928, + 0.5810987207697759, + 0.8444977639679405, + 0.8053277583510663, + 0.8013833511621122, + 0.7164935148738466, + 0.7034907223421033, + 0.8141598208467665, + 0.3437109001251196 + ], + "eval_runtime": 73.8903, + "eval_samples_per_second": 1.204, + "eval_steps_per_second": 0.162, + "step": 5200 + }, + { + "epoch": 2.36, + "learning_rate": 4.2160689968225155e-05, + "loss": 0.0955, + "step": 5210 + }, + { + "epoch": 2.37, + "learning_rate": 4.20880617339991e-05, + "loss": 0.0926, + "step": 5220 + }, + { + "epoch": 2.37, + "learning_rate": 4.201543349977304e-05, + "loss": 0.0952, + "step": 5230 + }, + { + "epoch": 2.38, + "learning_rate": 4.194280526554699e-05, + "loss": 0.0929, + "step": 5240 + }, + { + "epoch": 2.38, + "learning_rate": 4.1870177031320935e-05, + "loss": 0.0853, + "step": 5250 + }, + { + "epoch": 2.39, + "learning_rate": 4.179754879709488e-05, + "loss": 0.1079, + "step": 5260 + }, + { + "epoch": 2.39, + "learning_rate": 4.172492056286882e-05, + "loss": 0.1007, + "step": 5270 + }, + { + "epoch": 2.4, + "learning_rate": 4.1652292328642765e-05, + "loss": 0.093, + "step": 5280 + }, + { + "epoch": 2.4, + "learning_rate": 4.157966409441671e-05, + "loss": 0.0977, + "step": 5290 + }, + { + "epoch": 2.41, + "learning_rate": 4.150703586019066e-05, + "loss": 0.0995, + "step": 5300 + }, + { + "epoch": 2.41, + "eval_loss": 0.14190153777599335, + "eval_mean_accuracy": 0.7876905051576213, + "eval_mean_iou": 0.6820334465496983, + "eval_overall_accuracy": 0.9599077889088834, + "eval_per_category_accuracy": [ + 0.9924348144531521, + 0.7192312051684775, + 0.9121719214262133, + 0.7101871947911015, + 0.8761002927710191, + 0.7157976320333511, + 0.9125560694164605, + 0.7983265684356557, + 0.30067252974650804, + 0.6966837606837607, + 0.7734167502803517, + 0.9045399374537921, + 0.8795074006798195, + 0.8910300406525526, + 0.8141328246249353, + 0.7826952723535457, + 0.8835293282686308, + 0.6154155495978553 + ], + "eval_per_category_iou": [ + 0.9850695155037767, + 0.6698890028352537, + 0.8080701914728328, + 0.6105183371231998, + 0.7831849613766758, + 0.6462454572095555, + 0.8141802421069952, + 0.5798586920155084, + 0.26502507979936163, + 0.5442675742498703, + 0.5795879252515572, + 0.8418875756193056, + 0.8090034946959201, + 0.8123608620834888, + 0.7139204975049146, + 0.6994793370876172, + 0.7976399076243352, + 0.3164133843344016 + ], + "eval_runtime": 75.1424, + "eval_samples_per_second": 1.184, + "eval_steps_per_second": 0.16, + "step": 5300 + }, + { + "epoch": 2.41, + "learning_rate": 4.14344076259646e-05, + "loss": 0.0978, + "step": 5310 + }, + { + "epoch": 2.41, + "learning_rate": 4.1361779391738545e-05, + "loss": 0.0935, + "step": 5320 + }, + { + "epoch": 2.42, + "learning_rate": 4.128915115751249e-05, + "loss": 0.1003, + "step": 5330 + }, + { + "epoch": 2.42, + "learning_rate": 4.121652292328643e-05, + "loss": 0.0929, + "step": 5340 + }, + { + "epoch": 2.43, + "learning_rate": 4.114389468906038e-05, + "loss": 0.0869, + "step": 5350 + }, + { + "epoch": 2.43, + "learning_rate": 4.1071266454834326e-05, + "loss": 0.1052, + "step": 5360 + }, + { + "epoch": 2.44, + "learning_rate": 4.099863822060827e-05, + "loss": 0.0841, + "step": 5370 + }, + { + "epoch": 2.44, + "learning_rate": 4.092600998638221e-05, + "loss": 0.1085, + "step": 5380 + }, + { + "epoch": 2.45, + "learning_rate": 4.0853381752156156e-05, + "loss": 0.1065, + "step": 5390 + }, + { + "epoch": 2.45, + "learning_rate": 4.0780753517930106e-05, + "loss": 0.1133, + "step": 5400 + }, + { + "epoch": 2.45, + "eval_loss": 0.1448107659816742, + "eval_mean_accuracy": 0.7860606643190547, + "eval_mean_iou": 0.6723266330890474, + "eval_overall_accuracy": 0.9580608753675826, + "eval_per_category_accuracy": [ + 0.9918118337598402, + 0.7352774494405674, + 0.9131021651810278, + 0.7190043407487792, + 0.8650838140235054, + 0.7536422612712474, + 0.8617081989233281, + 0.7797081971773487, + 0.29979306777030523, + 0.7275604395604396, + 0.7188120954573177, + 0.9322508332246089, + 0.8405931334640218, + 0.8651514351582986, + 0.8220221158820487, + 0.8033915724563206, + 0.9154202911072505, + 0.6047587131367292 + ], + "eval_per_category_iou": [ + 0.9847888440066707, + 0.668482261151185, + 0.8071594358421708, + 0.607855504587156, + 0.7777053350472859, + 0.6382738398013226, + 0.7800267121108553, + 0.5656370150411607, + 0.2649142857142857, + 0.5581039869922598, + 0.5782921810699588, + 0.8393263946236865, + 0.7634740564689355, + 0.7677998992624292, + 0.7079388505235019, + 0.6958418177165048, + 0.8128287887422735, + 0.28343018690120936 + ], + "eval_runtime": 76.8852, + "eval_samples_per_second": 1.158, + "eval_steps_per_second": 0.156, + "step": 5400 + }, + { + "epoch": 2.46, + "learning_rate": 4.070812528370405e-05, + "loss": 0.0992, + "step": 5410 + }, + { + "epoch": 2.46, + "learning_rate": 4.063549704947799e-05, + "loss": 0.1028, + "step": 5420 + }, + { + "epoch": 2.46, + "learning_rate": 4.0562868815251936e-05, + "loss": 0.1002, + "step": 5430 + }, + { + "epoch": 2.47, + "learning_rate": 4.049024058102588e-05, + "loss": 0.0933, + "step": 5440 + }, + { + "epoch": 2.47, + "learning_rate": 4.041761234679983e-05, + "loss": 0.0934, + "step": 5450 + }, + { + "epoch": 2.48, + "learning_rate": 4.034498411257377e-05, + "loss": 0.0845, + "step": 5460 + }, + { + "epoch": 2.48, + "learning_rate": 4.0272355878347716e-05, + "loss": 0.0949, + "step": 5470 + }, + { + "epoch": 2.49, + "learning_rate": 4.019972764412166e-05, + "loss": 0.1047, + "step": 5480 + }, + { + "epoch": 2.49, + "learning_rate": 4.01270994098956e-05, + "loss": 0.083, + "step": 5490 + }, + { + "epoch": 2.5, + "learning_rate": 4.005447117566955e-05, + "loss": 0.0874, + "step": 5500 + }, + { + "epoch": 2.5, + "eval_loss": 0.1436825543642044, + "eval_mean_accuracy": 0.7867991040853224, + "eval_mean_iou": 0.6832179193851488, + "eval_overall_accuracy": 0.9600632056761323, + "eval_per_category_accuracy": [ + 0.9926575809445011, + 0.7220648002720251, + 0.9126533784527346, + 0.7089663591969615, + 0.8688733015747334, + 0.7494203736988769, + 0.8806473951427964, + 0.7973583481004999, + 0.25659596482152097, + 0.6673699633699633, + 0.7635503354384309, + 0.9160489141393175, + 0.884889304533928, + 0.8988437780476216, + 0.8554222710812209, + 0.8011626413155191, + 0.8899476352550197, + 0.595911528150134 + ], + "eval_per_category_iou": [ + 0.984880249300407, + 0.67195057635802, + 0.8092375386688906, + 0.613007271874267, + 0.7855266414530675, + 0.6435047723058371, + 0.8203687735427027, + 0.5757417658383376, + 0.22948089201443508, + 0.5395190902988906, + 0.5825784129033227, + 0.846772602018814, + 0.8051629267494426, + 0.8230038229595105, + 0.7209706868204239, + 0.7078282484336693, + 0.8028281928515756, + 0.3355600845410628 + ], + "eval_runtime": 75.9568, + "eval_samples_per_second": 1.172, + "eval_steps_per_second": 0.158, + "step": 5500 + }, + { + "epoch": 2.5, + "learning_rate": 3.998184294144349e-05, + "loss": 0.1144, + "step": 5510 + }, + { + "epoch": 2.51, + "learning_rate": 3.990921470721743e-05, + "loss": 0.0819, + "step": 5520 + }, + { + "epoch": 2.51, + "learning_rate": 3.9836586472991376e-05, + "loss": 0.0906, + "step": 5530 + }, + { + "epoch": 2.51, + "learning_rate": 3.9763958238765326e-05, + "loss": 0.1255, + "step": 5540 + }, + { + "epoch": 2.52, + "learning_rate": 3.969133000453927e-05, + "loss": 0.1035, + "step": 5550 + }, + { + "epoch": 2.52, + "learning_rate": 3.961870177031321e-05, + "loss": 0.0909, + "step": 5560 + }, + { + "epoch": 2.53, + "learning_rate": 3.9546073536087156e-05, + "loss": 0.0893, + "step": 5570 + }, + { + "epoch": 2.53, + "learning_rate": 3.94734453018611e-05, + "loss": 0.098, + "step": 5580 + }, + { + "epoch": 2.54, + "learning_rate": 3.940081706763505e-05, + "loss": 0.0941, + "step": 5590 + }, + { + "epoch": 2.54, + "learning_rate": 3.932818883340899e-05, + "loss": 0.0825, + "step": 5600 + }, + { + "epoch": 2.54, + "eval_loss": 0.1394958645105362, + "eval_mean_accuracy": 0.7899059096006436, + "eval_mean_iou": 0.6796236093353095, + "eval_overall_accuracy": 0.958950857098183, + "eval_per_category_accuracy": [ + 0.9926132606191554, + 0.7325571981411616, + 0.904631960493012, + 0.7543407487791645, + 0.8680581809499174, + 0.7480116057746237, + 0.8881900148590478, + 0.7584741859091426, + 0.27501293326435594, + 0.7362246642246643, + 0.7294654626296012, + 0.9225763344442994, + 0.8980900311134291, + 0.8857988842546152, + 0.8347667701327816, + 0.8093653648509763, + 0.871482659771101, + 0.6086461126005361 + ], + "eval_per_category_iou": [ + 0.9847737868822146, + 0.6712761695624434, + 0.8086321934304831, + 0.6129174473713215, + 0.7762552482886116, + 0.6416916391685009, + 0.8251962470579106, + 0.5465201941975035, + 0.2496360648039446, + 0.5549248280838156, + 0.5758973642110495, + 0.8395847950060266, + 0.8004792953236646, + 0.8124702388158479, + 0.715316130700604, + 0.7050640702814616, + 0.8061817237235351, + 0.30640753112663227 + ], + "eval_runtime": 75.446, + "eval_samples_per_second": 1.18, + "eval_steps_per_second": 0.159, + "step": 5600 + }, + { + "epoch": 2.55, + "learning_rate": 3.9255560599182936e-05, + "loss": 0.104, + "step": 5610 + }, + { + "epoch": 2.55, + "learning_rate": 3.918293236495688e-05, + "loss": 0.1045, + "step": 5620 + }, + { + "epoch": 2.56, + "learning_rate": 3.911030413073082e-05, + "loss": 0.0895, + "step": 5630 + }, + { + "epoch": 2.56, + "learning_rate": 3.903767589650477e-05, + "loss": 0.1053, + "step": 5640 + }, + { + "epoch": 2.56, + "learning_rate": 3.8965047662278716e-05, + "loss": 0.0967, + "step": 5650 + }, + { + "epoch": 2.57, + "learning_rate": 3.889241942805266e-05, + "loss": 0.0967, + "step": 5660 + }, + { + "epoch": 2.57, + "learning_rate": 3.88197911938266e-05, + "loss": 0.1163, + "step": 5670 + }, + { + "epoch": 2.58, + "learning_rate": 3.8747162959600546e-05, + "loss": 0.1213, + "step": 5680 + }, + { + "epoch": 2.58, + "learning_rate": 3.8674534725374496e-05, + "loss": 0.1102, + "step": 5690 + }, + { + "epoch": 2.59, + "learning_rate": 3.860190649114844e-05, + "loss": 0.1005, + "step": 5700 + }, + { + "epoch": 2.59, + "eval_loss": 0.1268976330757141, + "eval_mean_accuracy": 0.7871117980869821, + "eval_mean_iou": 0.686933111067596, + "eval_overall_accuracy": 0.9599076603235823, + "eval_per_category_accuracy": [ + 0.9926646256019716, + 0.7225667514046535, + 0.8977488445031363, + 0.7453879544221378, + 0.8739060539173475, + 0.7545255013491492, + 0.9012464896804435, + 0.8032218615481048, + 0.2687015002586653, + 0.7288986568986568, + 0.7437191367132936, + 0.9206390625185326, + 0.8672643562397713, + 0.8803741442725657, + 0.8389539144680117, + 0.7926258992805756, + 0.8485032693251212, + 0.5870643431635388 + ], + "eval_per_category_iou": [ + 0.985185230857788, + 0.6716788584846022, + 0.8071765933598732, + 0.6127683300808475, + 0.7851744404675747, + 0.6319301249960702, + 0.8206798319274474, + 0.5758901107725064, + 0.23264355460001793, + 0.556702799889586, + 0.5797618226015475, + 0.8450397552629001, + 0.7983956754429897, + 0.8062322215686116, + 0.7237232351218423, + 0.7104749512036435, + 0.803467177811932, + 0.4178712847669481 + ], + "eval_runtime": 75.3313, + "eval_samples_per_second": 1.181, + "eval_steps_per_second": 0.159, + "step": 5700 + }, + { + "epoch": 2.59, + "learning_rate": 3.852927825692238e-05, + "loss": 0.0995, + "step": 5710 + }, + { + "epoch": 2.6, + "learning_rate": 3.8456650022696326e-05, + "loss": 0.0926, + "step": 5720 + }, + { + "epoch": 2.6, + "learning_rate": 3.838402178847027e-05, + "loss": 0.0974, + "step": 5730 + }, + { + "epoch": 2.61, + "learning_rate": 3.831139355424421e-05, + "loss": 0.1003, + "step": 5740 + }, + { + "epoch": 2.61, + "learning_rate": 3.823876532001816e-05, + "loss": 0.099, + "step": 5750 + }, + { + "epoch": 2.61, + "learning_rate": 3.8166137085792106e-05, + "loss": 0.103, + "step": 5760 + }, + { + "epoch": 2.62, + "learning_rate": 3.809350885156605e-05, + "loss": 0.095, + "step": 5770 + }, + { + "epoch": 2.62, + "learning_rate": 3.802088061733999e-05, + "loss": 0.1116, + "step": 5780 + }, + { + "epoch": 2.63, + "learning_rate": 3.7948252383113936e-05, + "loss": 0.1032, + "step": 5790 + }, + { + "epoch": 2.63, + "learning_rate": 3.7875624148887886e-05, + "loss": 0.0992, + "step": 5800 + }, + { + "epoch": 2.63, + "eval_loss": 0.13441899418830872, + "eval_mean_accuracy": 0.7896584094262842, + "eval_mean_iou": 0.6706728560954918, + "eval_overall_accuracy": 0.9575793234150062, + "eval_per_category_accuracy": [ + 0.9917902005597341, + 0.734305931119351, + 0.9237372069990095, + 0.7105941399891481, + 0.8412854479698487, + 0.730797256656318, + 0.9053161984500656, + 0.766902760692422, + 0.29694774961200204, + 0.7168644688644689, + 0.7139428279131992, + 0.9216156057137661, + 0.8879961512868011, + 0.899992080671559, + 0.8425967839282635, + 0.8107464028776978, + 0.8867847676295155, + 0.6316353887399464 + ], + "eval_per_category_iou": [ + 0.9846100446977286, + 0.6570368867897192, + 0.8084709338373836, + 0.6118670793669334, + 0.7667789610342505, + 0.6227294108792594, + 0.8130077235702489, + 0.5434300379522429, + 0.2513024823781796, + 0.5553503999273547, + 0.5764596554490362, + 0.8449028264068605, + 0.7983644727587295, + 0.803406685335229, + 0.7228602072989191, + 0.7092795207669527, + 0.8002823710255025, + 0.20197171024432062 + ], + "eval_runtime": 76.0516, + "eval_samples_per_second": 1.17, + "eval_steps_per_second": 0.158, + "step": 5800 + }, + { + "epoch": 2.64, + "learning_rate": 3.780299591466183e-05, + "loss": 0.1177, + "step": 5810 + }, + { + "epoch": 2.64, + "learning_rate": 3.773036768043577e-05, + "loss": 0.0972, + "step": 5820 + }, + { + "epoch": 2.65, + "learning_rate": 3.7657739446209716e-05, + "loss": 0.0787, + "step": 5830 + }, + { + "epoch": 2.65, + "learning_rate": 3.758511121198366e-05, + "loss": 0.0953, + "step": 5840 + }, + { + "epoch": 2.66, + "learning_rate": 3.751248297775761e-05, + "loss": 0.082, + "step": 5850 + }, + { + "epoch": 2.66, + "learning_rate": 3.743985474353155e-05, + "loss": 0.0967, + "step": 5860 + }, + { + "epoch": 2.66, + "learning_rate": 3.73672265093055e-05, + "loss": 0.0956, + "step": 5870 + }, + { + "epoch": 2.67, + "learning_rate": 3.729459827507944e-05, + "loss": 0.1307, + "step": 5880 + }, + { + "epoch": 2.67, + "learning_rate": 3.722197004085338e-05, + "loss": 0.0974, + "step": 5890 + }, + { + "epoch": 2.68, + "learning_rate": 3.7149341806627333e-05, + "loss": 0.122, + "step": 5900 + }, + { + "epoch": 2.68, + "eval_loss": 0.13111147284507751, + "eval_mean_accuracy": 0.7883410579171871, + "eval_mean_iou": 0.6767765658762446, + "eval_overall_accuracy": 0.9591094884979591, + "eval_per_category_accuracy": [ + 0.9920656078380079, + 0.7211904337829305, + 0.9036587295036865, + 0.6854991861096039, + 0.8738655723374956, + 0.738629388047112, + 0.9014152634088117, + 0.7453923730972032, + 0.2596999482669426, + 0.6926788766788767, + 0.766471895964902, + 0.9355521027307627, + 0.8793590273906084, + 0.9128037942382486, + 0.8493975254354199, + 0.8116007194244604, + 0.8823197242596575, + 0.638538873994638 + ], + "eval_per_category_iou": [ + 0.9849246790162564, + 0.6685579622041098, + 0.8104779074411926, + 0.6064078718425632, + 0.7795262192778223, + 0.6397543562370068, + 0.8151970040281974, + 0.5529108469380098, + 0.22835827685029342, + 0.5441911149651981, + 0.5801441462906156, + 0.837493098517774, + 0.8077946108476929, + 0.825658025644495, + 0.7171273231875156, + 0.7089273791288637, + 0.7899439180042545, + 0.2845834453505392 + ], + "eval_runtime": 76.2941, + "eval_samples_per_second": 1.167, + "eval_steps_per_second": 0.157, + "step": 5900 + }, + { + "epoch": 2.68, + "learning_rate": 3.707671357240128e-05, + "loss": 0.1152, + "step": 5910 + }, + { + "epoch": 2.69, + "learning_rate": 3.700408533817522e-05, + "loss": 0.0882, + "step": 5920 + }, + { + "epoch": 2.69, + "learning_rate": 3.6931457103949163e-05, + "loss": 0.0882, + "step": 5930 + }, + { + "epoch": 2.7, + "learning_rate": 3.685882886972311e-05, + "loss": 0.1144, + "step": 5940 + }, + { + "epoch": 2.7, + "learning_rate": 3.678620063549706e-05, + "loss": 0.0885, + "step": 5950 + }, + { + "epoch": 2.71, + "learning_rate": 3.6713572401271e-05, + "loss": 0.098, + "step": 5960 + }, + { + "epoch": 2.71, + "learning_rate": 3.6640944167044944e-05, + "loss": 0.0956, + "step": 5970 + }, + { + "epoch": 2.71, + "learning_rate": 3.656831593281889e-05, + "loss": 0.1106, + "step": 5980 + }, + { + "epoch": 2.72, + "learning_rate": 3.649568769859283e-05, + "loss": 0.1136, + "step": 5990 + }, + { + "epoch": 2.72, + "learning_rate": 3.6423059464366774e-05, + "loss": 0.0938, + "step": 6000 + }, + { + "epoch": 2.72, + "eval_loss": 0.1398986577987671, + "eval_mean_accuracy": 0.7859709892856883, + "eval_mean_iou": 0.687898138786663, + "eval_overall_accuracy": 0.9594852576095066, + "eval_per_category_accuracy": [ + 0.9923034066299434, + 0.7256108421444648, + 0.9027697534940025, + 0.7238876831253391, + 0.8872773255809965, + 0.7376556158612253, + 0.8999154391422814, + 0.7498649730036326, + 0.2488877392653906, + 0.7262026862026862, + 0.7584744929075922, + 0.9152463300557855, + 0.8725293599266226, + 0.8950073033806734, + 0.827357087428867, + 0.8159750770811922, + 0.86911590906987, + 0.5993967828418231 + ], + "eval_per_category_iou": [ + 0.984698799272596, + 0.6722924824099494, + 0.8096993288259389, + 0.6134613173928037, + 0.793041382524653, + 0.6348061385039867, + 0.8116350488899908, + 0.5510817993792497, + 0.22583673660986717, + 0.5578189948828763, + 0.5852149823536109, + 0.8380704136120916, + 0.7979941279514445, + 0.8050082507924322, + 0.710620591339122, + 0.7023525834185719, + 0.7917829784243446, + 0.4967505415764039 + ], + "eval_runtime": 78.0349, + "eval_samples_per_second": 1.141, + "eval_steps_per_second": 0.154, + "step": 6000 + }, + { + "epoch": 2.73, + "learning_rate": 3.6350431230140724e-05, + "loss": 0.0994, + "step": 6010 + }, + { + "epoch": 2.73, + "learning_rate": 3.627780299591467e-05, + "loss": 0.0855, + "step": 6020 + }, + { + "epoch": 2.74, + "learning_rate": 3.620517476168861e-05, + "loss": 0.0998, + "step": 6030 + }, + { + "epoch": 2.74, + "learning_rate": 3.6132546527462554e-05, + "loss": 0.0939, + "step": 6040 + }, + { + "epoch": 2.75, + "learning_rate": 3.60599182932365e-05, + "loss": 0.1038, + "step": 6050 + }, + { + "epoch": 2.75, + "learning_rate": 3.598729005901045e-05, + "loss": 0.0786, + "step": 6060 + }, + { + "epoch": 2.76, + "learning_rate": 3.591466182478439e-05, + "loss": 0.1073, + "step": 6070 + }, + { + "epoch": 2.76, + "learning_rate": 3.5842033590558334e-05, + "loss": 0.1006, + "step": 6080 + }, + { + "epoch": 2.76, + "learning_rate": 3.576940535633228e-05, + "loss": 0.0964, + "step": 6090 + }, + { + "epoch": 2.77, + "learning_rate": 3.569677712210622e-05, + "loss": 0.0984, + "step": 6100 + }, + { + "epoch": 2.77, + "eval_loss": 0.137928768992424, + "eval_mean_accuracy": 0.7898073705363208, + "eval_mean_iou": 0.6917297696873022, + "eval_overall_accuracy": 0.9597389135467872, + "eval_per_category_accuracy": [ + 0.992448127191679, + 0.7315209119318642, + 0.9064425828106085, + 0.7121540965816603, + 0.8710037304805026, + 0.7046511422502307, + 0.8946434350499536, + 0.8119159743710632, + 0.32762545266425247, + 0.7186910866910867, + 0.7254224950323634, + 0.9250908343316214, + 0.8800739168749887, + 0.8981046407264663, + 0.8404304621486464, + 0.8129946043165468, + 0.8826221252619009, + 0.5806970509383378 + ], + "eval_per_category_iou": [ + 0.9841650313607442, + 0.6620651250036637, + 0.8106955128303709, + 0.6111402130260171, + 0.7907538668824434, + 0.6277769581083341, + 0.816592235908447, + 0.5801459816329033, + 0.27775097583439323, + 0.5573939393939394, + 0.585910188775186, + 0.8438983222009031, + 0.7917123384634053, + 0.8002634457290038, + 0.7176573989710742, + 0.7108468921825769, + 0.7864883071889135, + 0.4958791208791209 + ], + "eval_runtime": 76.5926, + "eval_samples_per_second": 1.162, + "eval_steps_per_second": 0.157, + "step": 6100 + }, + { + "epoch": 2.77, + "learning_rate": 3.562414888788017e-05, + "loss": 0.1072, + "step": 6110 + }, + { + "epoch": 2.78, + "learning_rate": 3.5551520653654114e-05, + "loss": 0.0932, + "step": 6120 + }, + { + "epoch": 2.78, + "learning_rate": 3.547889241942806e-05, + "loss": 0.099, + "step": 6130 + }, + { + "epoch": 2.79, + "learning_rate": 3.5406264185202e-05, + "loss": 0.0938, + "step": 6140 + }, + { + "epoch": 2.79, + "learning_rate": 3.5333635950975944e-05, + "loss": 0.1093, + "step": 6150 + }, + { + "epoch": 2.8, + "learning_rate": 3.5261007716749894e-05, + "loss": 0.0923, + "step": 6160 + }, + { + "epoch": 2.8, + "learning_rate": 3.518837948252384e-05, + "loss": 0.0922, + "step": 6170 + }, + { + "epoch": 2.81, + "learning_rate": 3.511575124829778e-05, + "loss": 0.1006, + "step": 6180 + }, + { + "epoch": 2.81, + "learning_rate": 3.5043123014071724e-05, + "loss": 0.0874, + "step": 6190 + }, + { + "epoch": 2.81, + "learning_rate": 3.497049477984567e-05, + "loss": 0.0839, + "step": 6200 + }, + { + "epoch": 2.81, + "eval_loss": 0.13149453699588776, + "eval_mean_accuracy": 0.7891440579467708, + "eval_mean_iou": 0.6860310300219666, + "eval_overall_accuracy": 0.9601006668605162, + "eval_per_category_accuracy": [ + 0.9920891270094053, + 0.7334477566022766, + 0.913305064927919, + 0.7107976125881714, + 0.8711361532756117, + 0.7494358304002402, + 0.8823664512626711, + 0.8053714552026489, + 0.24852560786342473, + 0.7472625152625153, + 0.6937476637352692, + 0.9306179897443196, + 0.8881715015376869, + 0.9095656688312831, + 0.8478509225728574, + 0.8069373072970195, + 0.8711494015237308, + 0.6028150134048257 + ], + "eval_per_category_iou": [ + 0.9850031413953964, + 0.664549162289839, + 0.810160003660713, + 0.6136909293201382, + 0.789657160520625, + 0.6421957071280444, + 0.8123257017182747, + 0.5809162583014003, + 0.2266144629463654, + 0.5653423786931524, + 0.5706552522130338, + 0.843401208933351, + 0.7949519704780417, + 0.8085170454323247, + 0.7231519485951454, + 0.7109813176675666, + 0.7935351491131911, + 0.4129097419887981 + ], + "eval_runtime": 76.4248, + "eval_samples_per_second": 1.165, + "eval_steps_per_second": 0.157, + "step": 6200 + }, + { + "epoch": 2.82, + "learning_rate": 3.489786654561962e-05, + "loss": 0.09, + "step": 6210 + }, + { + "epoch": 2.82, + "learning_rate": 3.482523831139356e-05, + "loss": 0.0945, + "step": 6220 + }, + { + "epoch": 2.83, + "learning_rate": 3.4752610077167504e-05, + "loss": 0.0961, + "step": 6230 + }, + { + "epoch": 2.83, + "learning_rate": 3.467998184294145e-05, + "loss": 0.0924, + "step": 6240 + }, + { + "epoch": 2.84, + "learning_rate": 3.460735360871539e-05, + "loss": 0.0886, + "step": 6250 + }, + { + "epoch": 2.84, + "learning_rate": 3.4534725374489334e-05, + "loss": 0.0921, + "step": 6260 + }, + { + "epoch": 2.85, + "learning_rate": 3.4462097140263284e-05, + "loss": 0.0909, + "step": 6270 + }, + { + "epoch": 2.85, + "learning_rate": 3.438946890603723e-05, + "loss": 0.0939, + "step": 6280 + }, + { + "epoch": 2.86, + "learning_rate": 3.431684067181117e-05, + "loss": 0.1058, + "step": 6290 + }, + { + "epoch": 2.86, + "learning_rate": 3.4244212437585114e-05, + "loss": 0.0846, + "step": 6300 + }, + { + "epoch": 2.86, + "eval_loss": 0.13326804339885712, + "eval_mean_accuracy": 0.7897628723404772, + "eval_mean_iou": 0.6789238793132619, + "eval_overall_accuracy": 0.9593457425578257, + "eval_per_category_accuracy": [ + 0.9920089177597812, + 0.7340792435110672, + 0.9173836937383075, + 0.7316196418882257, + 0.8678104885715011, + 0.7462804552219362, + 0.9081000950004697, + 0.7618846336568562, + 0.25835488877392654, + 0.693997557997558, + 0.7528281099372406, + 0.9127595054816888, + 0.8971728144164883, + 0.8777299684986714, + 0.8506261855492326, + 0.7959982014388489, + 0.8912930111425512, + 0.625804289544236 + ], + "eval_per_category_iou": [ + 0.9847307623055664, + 0.6671081093011964, + 0.8108197239522068, + 0.6178121420389462, + 0.7841557259556485, + 0.6326530230224796, + 0.8226430321008514, + 0.5525983418407454, + 0.23203085071783675, + 0.542537512886106, + 0.5779894115958885, + 0.8460912619153201, + 0.808235379891934, + 0.812661156620813, + 0.7267047861075815, + 0.7150828640014772, + 0.8132121208708603, + 0.2735636225132577 + ], + "eval_runtime": 77.206, + "eval_samples_per_second": 1.153, + "eval_steps_per_second": 0.155, + "step": 6300 + }, + { + "epoch": 2.86, + "learning_rate": 3.417158420335906e-05, + "loss": 0.0909, + "step": 6310 + }, + { + "epoch": 2.87, + "learning_rate": 3.409895596913301e-05, + "loss": 0.0992, + "step": 6320 + }, + { + "epoch": 2.87, + "learning_rate": 3.402632773490695e-05, + "loss": 0.0878, + "step": 6330 + }, + { + "epoch": 2.88, + "learning_rate": 3.3953699500680894e-05, + "loss": 0.1106, + "step": 6340 + }, + { + "epoch": 2.88, + "learning_rate": 3.388107126645484e-05, + "loss": 0.0934, + "step": 6350 + }, + { + "epoch": 2.89, + "learning_rate": 3.380844303222878e-05, + "loss": 0.0987, + "step": 6360 + }, + { + "epoch": 2.89, + "learning_rate": 3.373581479800273e-05, + "loss": 0.0919, + "step": 6370 + }, + { + "epoch": 2.9, + "learning_rate": 3.3663186563776674e-05, + "loss": 0.0949, + "step": 6380 + }, + { + "epoch": 2.9, + "learning_rate": 3.359055832955062e-05, + "loss": 0.1013, + "step": 6390 + }, + { + "epoch": 2.91, + "learning_rate": 3.351793009532456e-05, + "loss": 0.0927, + "step": 6400 + }, + { + "epoch": 2.91, + "eval_loss": 0.1403910517692566, + "eval_mean_accuracy": 0.788558184698478, + "eval_mean_iou": 0.6774119670611869, + "eval_overall_accuracy": 0.9594932298981742, + "eval_per_category_accuracy": [ + 0.9924294893577413, + 0.7339497077349051, + 0.9050033701991856, + 0.7270075963103635, + 0.8760371689515888, + 0.7769487588268805, + 0.866345126615095, + 0.7706600336348184, + 0.25675116399379205, + 0.7258119658119658, + 0.7562120049577997, + 0.9191722709175948, + 0.8761802420732694, + 0.8969739366101754, + 0.7995182359027418, + 0.8099113566289825, + 0.8880437432306918, + 0.6170911528150134 + ], + "eval_per_category_iou": [ + 0.9850749687225412, + 0.6666666666666666, + 0.8110797424620212, + 0.6137066300240467, + 0.7827799603700107, + 0.6567757301541971, + 0.8086142972910207, + 0.5685894548294493, + 0.23417004812682835, + 0.5639591365858101, + 0.5987802408362283, + 0.8434607835667925, + 0.7802691480714792, + 0.7886176031440264, + 0.6986682614878928, + 0.7000555216256732, + 0.8084970698790307, + 0.2836501432576481 + ], + "eval_runtime": 76.4223, + "eval_samples_per_second": 1.165, + "eval_steps_per_second": 0.157, + "step": 6400 + }, + { + "epoch": 2.91, + "learning_rate": 3.3445301861098504e-05, + "loss": 0.0984, + "step": 6410 + }, + { + "epoch": 2.91, + "learning_rate": 3.3372673626872455e-05, + "loss": 0.0938, + "step": 6420 + }, + { + "epoch": 2.92, + "learning_rate": 3.330004539264639e-05, + "loss": 0.0861, + "step": 6430 + }, + { + "epoch": 2.92, + "learning_rate": 3.3227417158420334e-05, + "loss": 0.0917, + "step": 6440 + }, + { + "epoch": 2.93, + "learning_rate": 3.315478892419428e-05, + "loss": 0.1024, + "step": 6450 + }, + { + "epoch": 2.93, + "learning_rate": 3.308216068996823e-05, + "loss": 0.0953, + "step": 6460 + }, + { + "epoch": 2.94, + "learning_rate": 3.300953245574217e-05, + "loss": 0.1043, + "step": 6470 + }, + { + "epoch": 2.94, + "learning_rate": 3.2936904221516115e-05, + "loss": 0.1355, + "step": 6480 + }, + { + "epoch": 2.95, + "learning_rate": 3.286427598729006e-05, + "loss": 0.098, + "step": 6490 + }, + { + "epoch": 2.95, + "learning_rate": 3.2791647753064e-05, + "loss": 0.0909, + "step": 6500 + }, + { + "epoch": 2.95, + "eval_loss": 0.13613344728946686, + "eval_mean_accuracy": 0.7934710007087689, + "eval_mean_iou": 0.6817604654493301, + "eval_overall_accuracy": 0.9605776754657874, + "eval_per_category_accuracy": [ + 0.9919079628259527, + 0.721287585615052, + 0.9140788351491141, + 0.7199538795442214, + 0.8629835747705071, + 0.7538122849862435, + 0.9116878416798032, + 0.8138650597099313, + 0.27568546301086394, + 0.7141978021978022, + 0.7089653544236558, + 0.9209948879742856, + 0.8852759743179325, + 0.8958828291360893, + 0.8405220727711674, + 0.811157502569373, + 0.9038920243278521, + 0.6363270777479892 + ], + "eval_per_category_iou": [ + 0.9848577133289431, + 0.668126527979842, + 0.8098557003832959, + 0.6173665232057695, + 0.7912109618271054, + 0.6499847682875747, + 0.8221945356288797, + 0.5992894452964657, + 0.2481143495669988, + 0.553552636559791, + 0.5704064834747372, + 0.8454001088731627, + 0.8014164478905916, + 0.799446429272506, + 0.7140162786226344, + 0.7018340465736675, + 0.8094589366640875, + 0.28515648465188925 + ], + "eval_runtime": 82.5196, + "eval_samples_per_second": 1.079, + "eval_steps_per_second": 0.145, + "step": 6500 + }, + { + "epoch": 2.96, + "learning_rate": 3.271901951883795e-05, + "loss": 0.0998, + "step": 6510 + }, + { + "epoch": 2.96, + "learning_rate": 3.2646391284611895e-05, + "loss": 0.0985, + "step": 6520 + }, + { + "epoch": 2.96, + "learning_rate": 3.257376305038584e-05, + "loss": 0.087, + "step": 6530 + }, + { + "epoch": 2.97, + "learning_rate": 3.250113481615978e-05, + "loss": 0.0805, + "step": 6540 + }, + { + "epoch": 2.97, + "learning_rate": 3.2428506581933725e-05, + "loss": 0.1091, + "step": 6550 + }, + { + "epoch": 2.98, + "learning_rate": 3.2355878347707675e-05, + "loss": 0.0962, + "step": 6560 + }, + { + "epoch": 2.98, + "learning_rate": 3.228325011348162e-05, + "loss": 0.1055, + "step": 6570 + }, + { + "epoch": 2.99, + "learning_rate": 3.221062187925556e-05, + "loss": 0.0825, + "step": 6580 + }, + { + "epoch": 2.99, + "learning_rate": 3.2137993645029505e-05, + "loss": 0.1204, + "step": 6590 + }, + { + "epoch": 3.0, + "learning_rate": 3.206536541080345e-05, + "loss": 0.1103, + "step": 6600 + }, + { + "epoch": 3.0, + "eval_loss": 0.13448721170425415, + "eval_mean_accuracy": 0.7842717389115598, + "eval_mean_iou": 0.6855736569488313, + "eval_overall_accuracy": 0.9587874251804994, + "eval_per_category_accuracy": [ + 0.9925916274190494, + 0.7192150131964572, + 0.9178823456586332, + 0.736096039066739, + 0.871658983171327, + 0.6566337954151008, + 0.9092432325214795, + 0.8086428115962448, + 0.29896533885152615, + 0.697982905982906, + 0.7484310137913396, + 0.9177805980239826, + 0.8860223369242667, + 0.8895209686218609, + 0.829609630970857, + 0.7929663412127441, + 0.8548413474741802, + 0.5888069705093834 + ], + "eval_per_category_iou": [ + 0.9849901246619346, + 0.6681307441223808, + 0.8113800624710255, + 0.6178061137359823, + 0.7902524897517402, + 0.5755218161233199, + 0.8346864812457373, + 0.5466369043621395, + 0.26839123165521084, + 0.5516176595465458, + 0.5830906471291499, + 0.8445767965771167, + 0.7993136988468356, + 0.7957939400618746, + 0.7185719019991318, + 0.7044205673070888, + 0.7959448704651303, + 0.44919977501661806 + ], + "eval_runtime": 82.4042, + "eval_samples_per_second": 1.08, + "eval_steps_per_second": 0.146, + "step": 6600 + }, + { + "epoch": 3.0, + "learning_rate": 3.19927371765774e-05, + "loss": 0.1185, + "step": 6610 + }, + { + "epoch": 3.0, + "learning_rate": 3.192010894235134e-05, + "loss": 0.0955, + "step": 6620 + }, + { + "epoch": 3.01, + "learning_rate": 3.1847480708125285e-05, + "loss": 0.0764, + "step": 6630 + }, + { + "epoch": 3.01, + "learning_rate": 3.177485247389923e-05, + "loss": 0.0909, + "step": 6640 + }, + { + "epoch": 3.02, + "learning_rate": 3.170222423967317e-05, + "loss": 0.0899, + "step": 6650 + }, + { + "epoch": 3.02, + "learning_rate": 3.162959600544712e-05, + "loss": 0.0773, + "step": 6660 + }, + { + "epoch": 3.03, + "learning_rate": 3.1556967771221065e-05, + "loss": 0.076, + "step": 6670 + }, + { + "epoch": 3.03, + "learning_rate": 3.148433953699501e-05, + "loss": 0.1028, + "step": 6680 + }, + { + "epoch": 3.04, + "learning_rate": 3.141171130276895e-05, + "loss": 0.0983, + "step": 6690 + }, + { + "epoch": 3.04, + "learning_rate": 3.1339083068542895e-05, + "loss": 0.099, + "step": 6700 + }, + { + "epoch": 3.04, + "eval_loss": 0.13621346652507782, + "eval_mean_accuracy": 0.798219757662134, + "eval_mean_iou": 0.6859926694874328, + "eval_overall_accuracy": 0.9610216805104459, + "eval_per_category_accuracy": [ + 0.99322392702933, + 0.7230848945093022, + 0.9045305106195665, + 0.7362995116657624, + 0.8652773022526283, + 0.7670388051528226, + 0.9050082298941772, + 0.8048457982669579, + 0.3523538541127781, + 0.7736556776556777, + 0.7125656613350646, + 0.9152147011263853, + 0.8902442314263619, + 0.8796218080706756, + 0.8431195033626487, + 0.7978096094552929, + 0.8702144269963866, + 0.6338471849865952 + ], + "eval_per_category_iou": [ + 0.985045217091938, + 0.6707773188133683, + 0.8138994442450272, + 0.6172741229317109, + 0.7934656634943166, + 0.662450250677468, + 0.8354277692651164, + 0.5877626455037622, + 0.29528310066764935, + 0.5570105420098036, + 0.5721168275230227, + 0.8490454620477177, + 0.8029107394851664, + 0.795529966019147, + 0.7260577204193293, + 0.7072379097695554, + 0.8079467352721251, + 0.26862661553756567 + ], + "eval_runtime": 83.2771, + "eval_samples_per_second": 1.069, + "eval_steps_per_second": 0.144, + "step": 6700 + }, + { + "epoch": 3.05, + "learning_rate": 3.126645483431684e-05, + "loss": 0.0841, + "step": 6710 + }, + { + "epoch": 3.05, + "learning_rate": 3.119382660009079e-05, + "loss": 0.1462, + "step": 6720 + }, + { + "epoch": 3.05, + "learning_rate": 3.112119836586473e-05, + "loss": 0.0919, + "step": 6730 + }, + { + "epoch": 3.06, + "learning_rate": 3.1048570131638675e-05, + "loss": 0.0887, + "step": 6740 + }, + { + "epoch": 3.06, + "learning_rate": 3.097594189741262e-05, + "loss": 0.0831, + "step": 6750 + }, + { + "epoch": 3.07, + "learning_rate": 3.090331366318656e-05, + "loss": 0.0868, + "step": 6760 + }, + { + "epoch": 3.07, + "learning_rate": 3.083068542896051e-05, + "loss": 0.0918, + "step": 6770 + }, + { + "epoch": 3.08, + "learning_rate": 3.0758057194734455e-05, + "loss": 0.087, + "step": 6780 + }, + { + "epoch": 3.08, + "learning_rate": 3.06854289605084e-05, + "loss": 0.1027, + "step": 6790 + }, + { + "epoch": 3.09, + "learning_rate": 3.061280072628234e-05, + "loss": 0.0885, + "step": 6800 + }, + { + "epoch": 3.09, + "eval_loss": 0.12955991923809052, + "eval_mean_accuracy": 0.8001941118044491, + "eval_mean_iou": 0.6919037217109923, + "eval_overall_accuracy": 0.9617451014143698, + "eval_per_category_accuracy": [ + 0.9927245883950861, + 0.729610259233472, + 0.9164878397711016, + 0.7216494845360825, + 0.8643489704299212, + 0.7607081818944617, + 0.9047802983641128, + 0.8274327890706711, + 0.32338334195550955, + 0.7358534798534798, + 0.7726494717582483, + 0.9263401770429324, + 0.886921568980091, + 0.8920419548422294, + 0.8435775564752543, + 0.8017792908530319, + 0.8809527483005373, + 0.6222520107238606 + ], + "eval_per_category_iou": [ + 0.9853156922191287, + 0.6713949399529159, + 0.810264362049832, + 0.6117755289788408, + 0.7942374740717857, + 0.6503930586076437, + 0.8294382237534692, + 0.6112378052279226, + 0.2745761222876219, + 0.5723913654633731, + 0.5958625084396028, + 0.8461885723985251, + 0.8148460251564533, + 0.8138547252817828, + 0.7261893164474752, + 0.7106314902532337, + 0.8129164341279513, + 0.3227533460803059 + ], + "eval_runtime": 80.8852, + "eval_samples_per_second": 1.1, + "eval_steps_per_second": 0.148, + "step": 6800 + }, + { + "epoch": 3.09, + "learning_rate": 3.0540172492056285e-05, + "loss": 0.0952, + "step": 6810 + }, + { + "epoch": 3.1, + "learning_rate": 3.0467544257830232e-05, + "loss": 0.0908, + "step": 6820 + }, + { + "epoch": 3.1, + "learning_rate": 3.039491602360418e-05, + "loss": 0.0908, + "step": 6830 + }, + { + "epoch": 3.1, + "learning_rate": 3.0322287789378122e-05, + "loss": 0.0889, + "step": 6840 + }, + { + "epoch": 3.11, + "learning_rate": 3.0249659555152065e-05, + "loss": 0.0812, + "step": 6850 + }, + { + "epoch": 3.11, + "learning_rate": 3.0177031320926012e-05, + "loss": 0.075, + "step": 6860 + }, + { + "epoch": 3.12, + "learning_rate": 3.0104403086699956e-05, + "loss": 0.0874, + "step": 6870 + }, + { + "epoch": 3.12, + "learning_rate": 3.0031774852473902e-05, + "loss": 0.0806, + "step": 6880 + }, + { + "epoch": 3.13, + "learning_rate": 2.9959146618247846e-05, + "loss": 0.0824, + "step": 6890 + }, + { + "epoch": 3.13, + "learning_rate": 2.988651838402179e-05, + "loss": 0.07, + "step": 6900 + }, + { + "epoch": 3.13, + "eval_loss": 0.14067216217517853, + "eval_mean_accuracy": 0.798203682941023, + "eval_mean_iou": 0.6923460941833122, + "eval_overall_accuracy": 0.9625431446546919, + "eval_per_category_accuracy": [ + 0.9924110734027792, + 0.7320066710924723, + 0.9121994332563003, + 0.7206999457406402, + 0.8750999174587448, + 0.7592420033651447, + 0.907545055625733, + 0.8627854759724202, + 0.2620279358510088, + 0.7625592185592186, + 0.7480473745302878, + 0.9202120719716288, + 0.8997895796989371, + 0.8822791827253049, + 0.8390886359717192, + 0.7952530832476875, + 0.8624384012244155, + 0.6339812332439678 + ], + "eval_per_category_iou": [ + 0.9851986993485564, + 0.6720680274131446, + 0.8130522140671002, + 0.6155004633920297, + 0.8019457961284027, + 0.6852198381430089, + 0.8350000880469114, + 0.626904146267834, + 0.23910683094934618, + 0.5700235115440221, + 0.5920893837349632, + 0.8446416971799552, + 0.8095238095238095, + 0.8059246220250296, + 0.7220269505782411, + 0.708903305600568, + 0.7915114991518296, + 0.3435888122048674 + ], + "eval_runtime": 81.7478, + "eval_samples_per_second": 1.089, + "eval_steps_per_second": 0.147, + "step": 6900 + }, + { + "epoch": 3.14, + "learning_rate": 2.9813890149795736e-05, + "loss": 0.0953, + "step": 6910 + }, + { + "epoch": 3.14, + "learning_rate": 2.974126191556968e-05, + "loss": 0.0813, + "step": 6920 + }, + { + "epoch": 3.15, + "learning_rate": 2.9668633681343622e-05, + "loss": 0.09, + "step": 6930 + }, + { + "epoch": 3.15, + "learning_rate": 2.959600544711757e-05, + "loss": 0.087, + "step": 6940 + }, + { + "epoch": 3.15, + "learning_rate": 2.9523377212891512e-05, + "loss": 0.089, + "step": 6950 + }, + { + "epoch": 3.16, + "learning_rate": 2.945074897866546e-05, + "loss": 0.0836, + "step": 6960 + }, + { + "epoch": 3.16, + "learning_rate": 2.9378120744439402e-05, + "loss": 0.0816, + "step": 6970 + }, + { + "epoch": 3.17, + "learning_rate": 2.9305492510213346e-05, + "loss": 0.0932, + "step": 6980 + }, + { + "epoch": 3.17, + "learning_rate": 2.9232864275987292e-05, + "loss": 0.0813, + "step": 6990 + }, + { + "epoch": 3.18, + "learning_rate": 2.9160236041761236e-05, + "loss": 0.089, + "step": 7000 + }, + { + "epoch": 3.18, + "eval_loss": 0.13973882794380188, + "eval_mean_accuracy": 0.7966314280330089, + "eval_mean_iou": 0.6919191740005027, + "eval_overall_accuracy": 0.962646570098534, + "eval_per_category_accuracy": [ + 0.9923436776639871, + 0.7139040463738079, + 0.9156005832507979, + 0.7291779706999457, + 0.8699217058800524, + 0.7384792372338688, + 0.9310446223818323, + 0.8831686816849201, + 0.26735644076564924, + 0.7537680097680097, + 0.7258749926223219, + 0.9226593603839752, + 0.8784373145333885, + 0.8745534378684687, + 0.8363079841351957, + 0.7912769784172662, + 0.8772869075794651, + 0.6382037533512064 + ], + "eval_per_category_iou": [ + 0.9851632581820728, + 0.6638860446907185, + 0.8099934133767777, + 0.6114776475941304, + 0.8028141902459472, + 0.6733761121726402, + 0.8356453610308161, + 0.6397615333884661, + 0.24225378521539398, + 0.56523904747255, + 0.5834618212870935, + 0.8470755203229015, + 0.8098479164680475, + 0.8077524797938958, + 0.726401273140022, + 0.7105381553902059, + 0.790298100871731, + 0.34955947136563875 + ], + "eval_runtime": 84.3262, + "eval_samples_per_second": 1.055, + "eval_steps_per_second": 0.142, + "step": 7000 + }, + { + "epoch": 3.18, + "learning_rate": 2.9087607807535183e-05, + "loss": 0.0774, + "step": 7010 + }, + { + "epoch": 3.19, + "learning_rate": 2.9014979573309126e-05, + "loss": 0.085, + "step": 7020 + }, + { + "epoch": 3.19, + "learning_rate": 2.894235133908307e-05, + "loss": 0.1044, + "step": 7030 + }, + { + "epoch": 3.2, + "learning_rate": 2.8869723104857016e-05, + "loss": 0.0782, + "step": 7040 + }, + { + "epoch": 3.2, + "learning_rate": 2.879709487063096e-05, + "loss": 0.0847, + "step": 7050 + }, + { + "epoch": 3.2, + "learning_rate": 2.8724466636404903e-05, + "loss": 0.0926, + "step": 7060 + }, + { + "epoch": 3.21, + "learning_rate": 2.865183840217885e-05, + "loss": 0.0853, + "step": 7070 + }, + { + "epoch": 3.21, + "learning_rate": 2.8579210167952793e-05, + "loss": 0.0882, + "step": 7080 + }, + { + "epoch": 3.22, + "learning_rate": 2.850658193372674e-05, + "loss": 0.076, + "step": 7090 + }, + { + "epoch": 3.22, + "learning_rate": 2.8433953699500683e-05, + "loss": 0.0812, + "step": 7100 + }, + { + "epoch": 3.22, + "eval_loss": 0.1491415947675705, + "eval_mean_accuracy": 0.7976677477854113, + "eval_mean_iou": 0.6869542568563782, + "eval_overall_accuracy": 0.9612365894103319, + "eval_per_category_accuracy": [ + 0.9923379642803694, + 0.7278777182273029, + 0.9126310250907891, + 0.7221920781334781, + 0.8433726507818777, + 0.7604984123759599, + 0.919574968594167, + 0.8715464249006039, + 0.2744438696326953, + 0.7457094017094017, + 0.6974266658797144, + 0.9302344889753413, + 0.8945470568134812, + 0.8943737571053975, + 0.8326812812553889, + 0.8170092497430627, + 0.8775275940914548, + 0.6440348525469168 + ], + "eval_per_category_iou": [ + 0.9852574724071208, + 0.6644151467675663, + 0.8121287515033556, + 0.6136114792831211, + 0.7844025163654371, + 0.6836819768065079, + 0.8484344022153549, + 0.6007086759264332, + 0.24860583907399597, + 0.5591632546931421, + 0.5687937231243181, + 0.8454285765616859, + 0.8128664288836865, + 0.8062410267393253, + 0.7190475304103419, + 0.7080619481835287, + 0.7912495478701205, + 0.3130783265997654 + ], + "eval_runtime": 82.047, + "eval_samples_per_second": 1.085, + "eval_steps_per_second": 0.146, + "step": 7100 + }, + { + "epoch": 3.23, + "learning_rate": 2.8361325465274626e-05, + "loss": 0.0896, + "step": 7110 + }, + { + "epoch": 3.23, + "learning_rate": 2.8288697231048573e-05, + "loss": 0.0718, + "step": 7120 + }, + { + "epoch": 3.24, + "learning_rate": 2.8216068996822516e-05, + "loss": 0.0919, + "step": 7130 + }, + { + "epoch": 3.24, + "learning_rate": 2.8143440762596463e-05, + "loss": 0.0891, + "step": 7140 + }, + { + "epoch": 3.25, + "learning_rate": 2.8070812528370406e-05, + "loss": 0.0797, + "step": 7150 + }, + { + "epoch": 3.25, + "learning_rate": 2.799818429414435e-05, + "loss": 0.0898, + "step": 7160 + }, + { + "epoch": 3.25, + "learning_rate": 2.7925556059918296e-05, + "loss": 0.0868, + "step": 7170 + }, + { + "epoch": 3.26, + "learning_rate": 2.785292782569224e-05, + "loss": 0.0898, + "step": 7180 + }, + { + "epoch": 3.26, + "learning_rate": 2.7780299591466183e-05, + "loss": 0.1017, + "step": 7190 + }, + { + "epoch": 3.27, + "learning_rate": 2.770767135724013e-05, + "loss": 0.0826, + "step": 7200 + }, + { + "epoch": 3.27, + "eval_loss": 0.1519315540790558, + "eval_mean_accuracy": 0.7974368292032994, + "eval_mean_iou": 0.6892930701953764, + "eval_overall_accuracy": 0.9600244157769707, + "eval_per_category_accuracy": [ + 0.9929770311994012, + 0.7247850515714309, + 0.9172547320347749, + 0.7556972327726533, + 0.8699738516439295, + 0.7603394291619376, + 0.8973420747685016, + 0.7366657454122433, + 0.34811174340403517, + 0.7283223443223443, + 0.7590647071553641, + 0.9218172401386928, + 0.8768636584356959, + 0.8855921017897682, + 0.8276642524573202, + 0.8179342240493319, + 0.9057095160658247, + 0.6277479892761394 + ], + "eval_per_category_iou": [ + 0.9849399335448075, + 0.6687882862692365, + 0.8103227492089629, + 0.6225276567214214, + 0.7786314294329074, + 0.6449094181523288, + 0.8361890895412523, + 0.5534463080553863, + 0.2995059420483376, + 0.5553801004066918, + 0.5828524382137894, + 0.8478996883443704, + 0.8065883618015633, + 0.8069498843423146, + 0.7226446343207739, + 0.7120386058501507, + 0.8248469128239139, + 0.3488138244385684 + ], + "eval_runtime": 83.9979, + "eval_samples_per_second": 1.06, + "eval_steps_per_second": 0.143, + "step": 7200 + }, + { + "epoch": 3.27, + "learning_rate": 2.7635043123014073e-05, + "loss": 0.096, + "step": 7210 + }, + { + "epoch": 3.28, + "learning_rate": 2.756241488878802e-05, + "loss": 0.0858, + "step": 7220 + }, + { + "epoch": 3.28, + "learning_rate": 2.7489786654561963e-05, + "loss": 0.0909, + "step": 7230 + }, + { + "epoch": 3.29, + "learning_rate": 2.7417158420335906e-05, + "loss": 0.0905, + "step": 7240 + }, + { + "epoch": 3.29, + "learning_rate": 2.7344530186109853e-05, + "loss": 0.0888, + "step": 7250 + }, + { + "epoch": 3.3, + "learning_rate": 2.7271901951883796e-05, + "loss": 0.0942, + "step": 7260 + }, + { + "epoch": 3.3, + "learning_rate": 2.7199273717657743e-05, + "loss": 0.0925, + "step": 7270 + }, + { + "epoch": 3.3, + "learning_rate": 2.7126645483431686e-05, + "loss": 0.0876, + "step": 7280 + }, + { + "epoch": 3.31, + "learning_rate": 2.705401724920563e-05, + "loss": 0.0889, + "step": 7290 + }, + { + "epoch": 3.31, + "learning_rate": 2.6981389014979577e-05, + "loss": 0.0791, + "step": 7300 + }, + { + "epoch": 3.31, + "eval_loss": 0.1503271609544754, + "eval_mean_accuracy": 0.7965757722351132, + "eval_mean_iou": 0.6894053238568253, + "eval_overall_accuracy": 0.9597292696492056, + "eval_per_category_accuracy": [ + 0.9920047575289915, + 0.7369937984747162, + 0.9152773192472763, + 0.7202930005425936, + 0.8684081064706718, + 0.76895101992148, + 0.9000302748749857, + 0.7476033837133056, + 0.3396275219865494, + 0.7115115995115995, + 0.7837159889039721, + 0.9278227831085702, + 0.8741299929859899, + 0.8907572637840311, + 0.8435613898948094, + 0.7977068345323741, + 0.9058267735973068, + 0.614142091152815 + ], + "eval_per_category_iou": [ + 0.9851162074854626, + 0.6692446810074841, + 0.812920074221703, + 0.6180527265320375, + 0.779525915031291, + 0.6397384766023269, + 0.8374950821583133, + 0.5406745694779389, + 0.28814080056179775, + 0.5568287797942116, + 0.5911057032415067, + 0.8447939465499367, + 0.8142915780126991, + 0.8119265319217196, + 0.728554740041236, + 0.7093116900177632, + 0.8260610187806374, + 0.35551330798479086 + ], + "eval_runtime": 83.7008, + "eval_samples_per_second": 1.063, + "eval_steps_per_second": 0.143, + "step": 7300 + }, + { + "epoch": 3.32, + "learning_rate": 2.690876078075352e-05, + "loss": 0.0941, + "step": 7310 + }, + { + "epoch": 3.32, + "learning_rate": 2.6836132546527463e-05, + "loss": 0.0751, + "step": 7320 + }, + { + "epoch": 3.33, + "learning_rate": 2.676350431230141e-05, + "loss": 0.0778, + "step": 7330 + }, + { + "epoch": 3.33, + "learning_rate": 2.6690876078075353e-05, + "loss": 0.0805, + "step": 7340 + }, + { + "epoch": 3.34, + "learning_rate": 2.66182478438493e-05, + "loss": 0.0864, + "step": 7350 + }, + { + "epoch": 3.34, + "learning_rate": 2.6545619609623243e-05, + "loss": 0.0833, + "step": 7360 + }, + { + "epoch": 3.35, + "learning_rate": 2.6472991375397187e-05, + "loss": 0.0852, + "step": 7370 + }, + { + "epoch": 3.35, + "learning_rate": 2.6400363141171133e-05, + "loss": 0.1008, + "step": 7380 + }, + { + "epoch": 3.35, + "learning_rate": 2.6327734906945077e-05, + "loss": 0.0867, + "step": 7390 + }, + { + "epoch": 3.36, + "learning_rate": 2.6255106672719023e-05, + "loss": 0.0871, + "step": 7400 + }, + { + "epoch": 3.36, + "eval_loss": 0.13909876346588135, + "eval_mean_accuracy": 0.7960482843801368, + "eval_mean_iou": 0.6890686903763231, + "eval_overall_accuracy": 0.9619064331054688, + "eval_per_category_accuracy": [ + 0.9923535512783945, + 0.7248012435434512, + 0.9101773137449103, + 0.7408437330439501, + 0.871394137581109, + 0.7620529149130671, + 0.9019198446585724, + 0.8367790055148818, + 0.2545783755819969, + 0.7233601953601954, + 0.7375907454405949, + 0.9220307354121448, + 0.8765983849792277, + 0.89043169139257, + 0.8468108725642353, + 0.7999357656731757, + 0.9095296430125311, + 0.6276809651474531 + ], + "eval_per_category_iou": [ + 0.9850871328712067, + 0.6697238098087914, + 0.8138968117903024, + 0.6260675187711354, + 0.7880510625834967, + 0.685249495172118, + 0.8415382517387154, + 0.6059089975658216, + 0.23402130492676432, + 0.5607517681089186, + 0.5906652487297649, + 0.8481546675201117, + 0.808253081224945, + 0.8084169825564906, + 0.7309461678365267, + 0.7167424460431655, + 0.8236273761921128, + 0.2661343033334281 + ], + "eval_runtime": 84.5569, + "eval_samples_per_second": 1.053, + "eval_steps_per_second": 0.142, + "step": 7400 + }, + { + "epoch": 3.36, + "learning_rate": 2.6182478438492967e-05, + "loss": 0.0843, + "step": 7410 + }, + { + "epoch": 3.37, + "learning_rate": 2.610985020426691e-05, + "loss": 0.101, + "step": 7420 + }, + { + "epoch": 3.37, + "learning_rate": 2.6037221970040857e-05, + "loss": 0.0947, + "step": 7430 + }, + { + "epoch": 3.38, + "learning_rate": 2.59645937358148e-05, + "loss": 0.0794, + "step": 7440 + }, + { + "epoch": 3.38, + "learning_rate": 2.5891965501588744e-05, + "loss": 0.0911, + "step": 7450 + }, + { + "epoch": 3.39, + "learning_rate": 2.581933726736269e-05, + "loss": 0.0844, + "step": 7460 + }, + { + "epoch": 3.39, + "learning_rate": 2.5746709033136634e-05, + "loss": 0.0918, + "step": 7470 + }, + { + "epoch": 3.4, + "learning_rate": 2.567408079891058e-05, + "loss": 0.083, + "step": 7480 + }, + { + "epoch": 3.4, + "learning_rate": 2.5601452564684524e-05, + "loss": 0.0809, + "step": 7490 + }, + { + "epoch": 3.4, + "learning_rate": 2.5528824330458467e-05, + "loss": 0.0942, + "step": 7500 + }, + { + "epoch": 3.4, + "eval_loss": 0.15069787204265594, + "eval_mean_accuracy": 0.7945067044242303, + "eval_mean_iou": 0.6872757002457379, + "eval_overall_accuracy": 0.9596373311589274, + "eval_per_category_accuracy": [ + 0.9923054035407225, + 0.7146326851147201, + 0.917318353141851, + 0.7225990233315247, + 0.8705529440743544, + 0.7270037405217299, + 0.9133233809031657, + 0.7338261141307784, + 0.31764097258147955, + 0.7381001221001221, + 0.7534871825139192, + 0.9268541471456868, + 0.8925237846878765, + 0.8954164686834556, + 0.8394065787204691, + 0.8224563206577595, + 0.9096654148910893, + 0.6140080428954423 + ], + "eval_per_category_iou": [ + 0.9849579847340046, + 0.6687931870529761, + 0.812854635323659, + 0.6228224014965509, + 0.7807169755593295, + 0.6371301239838145, + 0.8182895233225562, + 0.5428083152727263, + 0.27799157875673475, + 0.5710971038152246, + 0.5941007205404442, + 0.8458027715742268, + 0.8026979268179263, + 0.8048221071904523, + 0.7280090482933965, + 0.7212545909285505, + 0.8197395057059595, + 0.3370741040547502 + ], + "eval_runtime": 81.7189, + "eval_samples_per_second": 1.089, + "eval_steps_per_second": 0.147, + "step": 7500 + }, + { + "epoch": 3.41, + "learning_rate": 2.5456196096232414e-05, + "loss": 0.0807, + "step": 7510 + }, + { + "epoch": 3.41, + "learning_rate": 2.5383567862006357e-05, + "loss": 0.0887, + "step": 7520 + }, + { + "epoch": 3.42, + "learning_rate": 2.5310939627780304e-05, + "loss": 0.0824, + "step": 7530 + }, + { + "epoch": 3.42, + "learning_rate": 2.5238311393554247e-05, + "loss": 0.0711, + "step": 7540 + }, + { + "epoch": 3.43, + "learning_rate": 2.516568315932819e-05, + "loss": 0.0764, + "step": 7550 + }, + { + "epoch": 3.43, + "learning_rate": 2.5093054925102137e-05, + "loss": 0.0866, + "step": 7560 + }, + { + "epoch": 3.44, + "learning_rate": 2.502042669087608e-05, + "loss": 0.0827, + "step": 7570 + }, + { + "epoch": 3.44, + "learning_rate": 2.4947798456650024e-05, + "loss": 0.0825, + "step": 7580 + }, + { + "epoch": 3.45, + "learning_rate": 2.487517022242397e-05, + "loss": 0.0796, + "step": 7590 + }, + { + "epoch": 3.45, + "learning_rate": 2.4802541988197914e-05, + "loss": 0.0941, + "step": 7600 + }, + { + "epoch": 3.45, + "eval_loss": 0.14226721227169037, + "eval_mean_accuracy": 0.8043973312375484, + "eval_mean_iou": 0.6950231914394323, + "eval_overall_accuracy": 0.9625836918863018, + "eval_per_category_accuracy": [ + 0.992655972321929, + 0.7354879450768309, + 0.9027353637063937, + 0.7259224091155725, + 0.8658872704642963, + 0.7484466015129903, + 0.9037432968990873, + 0.868287713175564, + 0.33507501293326436, + 0.752986568986569, + 0.7652029353321923, + 0.9223391174737974, + 0.8734151035016097, + 0.8883418686095419, + 0.8428770046559751, + 0.8087808324768756, + 0.9187497878564398, + 0.6282171581769437 + ], + "eval_per_category_iou": [ + 0.9850488257103458, + 0.6723854636962475, + 0.8136399633630944, + 0.6180274858528698, + 0.7959631356402571, + 0.682059650916174, + 0.8380410879116093, + 0.6294282638927081, + 0.3037992495309568, + 0.5688573705649684, + 0.5968892913047482, + 0.8476645531675235, + 0.8067695246796935, + 0.8121415183132356, + 0.730002193586327, + 0.7154399940906069, + 0.8157739717957921, + 0.2784859018926226 + ], + "eval_runtime": 82.5733, + "eval_samples_per_second": 1.078, + "eval_steps_per_second": 0.145, + "step": 7600 + }, + { + "epoch": 3.45, + "learning_rate": 2.472991375397186e-05, + "loss": 0.0845, + "step": 7610 + }, + { + "epoch": 3.46, + "learning_rate": 2.4657285519745804e-05, + "loss": 0.0874, + "step": 7620 + }, + { + "epoch": 3.46, + "learning_rate": 2.4584657285519747e-05, + "loss": 0.0838, + "step": 7630 + }, + { + "epoch": 3.47, + "learning_rate": 2.4512029051293694e-05, + "loss": 0.0739, + "step": 7640 + }, + { + "epoch": 3.47, + "learning_rate": 2.4439400817067637e-05, + "loss": 0.0805, + "step": 7650 + }, + { + "epoch": 3.48, + "learning_rate": 2.4366772582841584e-05, + "loss": 0.0998, + "step": 7660 + }, + { + "epoch": 3.48, + "learning_rate": 2.4294144348615527e-05, + "loss": 0.0787, + "step": 7670 + }, + { + "epoch": 3.49, + "learning_rate": 2.422151611438947e-05, + "loss": 0.1046, + "step": 7680 + }, + { + "epoch": 3.49, + "learning_rate": 2.4148887880163417e-05, + "loss": 0.0791, + "step": 7690 + }, + { + "epoch": 3.5, + "learning_rate": 2.407625964593736e-05, + "loss": 0.1024, + "step": 7700 + }, + { + "epoch": 3.5, + "eval_loss": 0.1411096453666687, + "eval_mean_accuracy": 0.793477814813021, + "eval_mean_iou": 0.6876190052060385, + "eval_overall_accuracy": 0.9597613302509437, + "eval_per_category_accuracy": [ + 0.9928391888859046, + 0.7323790864489386, + 0.913808875316386, + 0.7333830710797613, + 0.8687285284671272, + 0.7570427355711693, + 0.9060295719411067, + 0.7227078377074855, + 0.29130884635281945, + 0.7281269841269842, + 0.7375809085364654, + 0.9135146461711204, + 0.8883333633077352, + 0.9011139855340267, + 0.8490149163648905, + 0.8106821685508736, + 0.9079898664807003, + 0.6280160857908847 + ], + "eval_per_category_iou": [ + 0.9846414923408805, + 0.6744553628677512, + 0.8143157468925446, + 0.6213296558064701, + 0.7800849995194287, + 0.6416433351798895, + 0.8314123085029099, + 0.5326802302281645, + 0.2638212143928036, + 0.5724928190713178, + 0.5924401286315906, + 0.8512147948939932, + 0.8182251137827216, + 0.8160195064423851, + 0.7297326089272398, + 0.711005323793696, + 0.8306726852361555, + 0.3109547671987522 + ], + "eval_runtime": 79.8015, + "eval_samples_per_second": 1.115, + "eval_steps_per_second": 0.15, + "step": 7700 + }, + { + "epoch": 3.5, + "learning_rate": 2.4003631411711304e-05, + "loss": 0.0825, + "step": 7710 + }, + { + "epoch": 3.5, + "learning_rate": 2.393100317748525e-05, + "loss": 0.0873, + "step": 7720 + }, + { + "epoch": 3.51, + "learning_rate": 2.3858374943259194e-05, + "loss": 0.0785, + "step": 7730 + }, + { + "epoch": 3.51, + "learning_rate": 2.378574670903314e-05, + "loss": 0.0828, + "step": 7740 + }, + { + "epoch": 3.52, + "learning_rate": 2.3713118474807084e-05, + "loss": 0.0728, + "step": 7750 + }, + { + "epoch": 3.52, + "learning_rate": 2.3640490240581028e-05, + "loss": 0.0829, + "step": 7760 + }, + { + "epoch": 3.53, + "learning_rate": 2.3567862006354974e-05, + "loss": 0.0822, + "step": 7770 + }, + { + "epoch": 3.53, + "learning_rate": 2.3495233772128918e-05, + "loss": 0.093, + "step": 7780 + }, + { + "epoch": 3.54, + "learning_rate": 2.3422605537902864e-05, + "loss": 0.087, + "step": 7790 + }, + { + "epoch": 3.54, + "learning_rate": 2.3349977303676808e-05, + "loss": 0.0898, + "step": 7800 + }, + { + "epoch": 3.54, + "eval_loss": 0.14239805936813354, + "eval_mean_accuracy": 0.7949254811371873, + "eval_mean_iou": 0.6902498843796094, + "eval_overall_accuracy": 0.9612459761373112, + "eval_per_category_accuracy": [ + 0.993142109157134, + 0.731925711232371, + 0.9112519946076812, + 0.6812262615301139, + 0.8621513009338895, + 0.7472674760089913, + 0.8922179651804139, + 0.8149019225315349, + 0.3208484221417486, + 0.7321514041514041, + 0.6952527100670877, + 0.9328992262773146, + 0.9065652932395734, + 0.8931110641817573, + 0.8570928177271944, + 0.8205421377183967, + 0.8944620502170807, + 0.6216487935656837 + ], + "eval_per_category_iou": [ + 0.985075545768495, + 0.6730743459551214, + 0.8130401231016586, + 0.6100583090379009, + 0.7897747164235203, + 0.6521364622636056, + 0.8290202684678771, + 0.5880449815488054, + 0.2901520467836257, + 0.5639794735970866, + 0.580207691991955, + 0.8465515261972073, + 0.8064788371851176, + 0.8109337860780985, + 0.735473101075587, + 0.7208550355794571, + 0.8313334212835765, + 0.29830824649427506 + ], + "eval_runtime": 84.1376, + "eval_samples_per_second": 1.058, + "eval_steps_per_second": 0.143, + "step": 7800 + }, + { + "epoch": 3.55, + "learning_rate": 2.327734906945075e-05, + "loss": 0.0771, + "step": 7810 + }, + { + "epoch": 3.55, + "learning_rate": 2.3204720835224698e-05, + "loss": 0.08, + "step": 7820 + }, + { + "epoch": 3.55, + "learning_rate": 2.313209260099864e-05, + "loss": 0.0957, + "step": 7830 + }, + { + "epoch": 3.56, + "learning_rate": 2.3059464366772584e-05, + "loss": 0.0752, + "step": 7840 + }, + { + "epoch": 3.56, + "learning_rate": 2.298683613254653e-05, + "loss": 0.0889, + "step": 7850 + }, + { + "epoch": 3.57, + "learning_rate": 2.2914207898320474e-05, + "loss": 0.0747, + "step": 7860 + }, + { + "epoch": 3.57, + "learning_rate": 2.284157966409442e-05, + "loss": 0.0913, + "step": 7870 + }, + { + "epoch": 3.58, + "learning_rate": 2.2768951429868365e-05, + "loss": 0.0857, + "step": 7880 + }, + { + "epoch": 3.58, + "learning_rate": 2.2696323195642308e-05, + "loss": 0.0987, + "step": 7890 + }, + { + "epoch": 3.59, + "learning_rate": 2.2623694961416255e-05, + "loss": 0.0784, + "step": 7900 + }, + { + "epoch": 3.59, + "eval_loss": 0.1483561098575592, + "eval_mean_accuracy": 0.7962622662878549, + "eval_mean_iou": 0.6917747207606494, + "eval_overall_accuracy": 0.959908646144224, + "eval_per_category_accuracy": [ + 0.9926994606011167, + 0.7252870027040593, + 0.9169589798613403, + 0.7194791101465002, + 0.864814851662455, + 0.7613750281532775, + 0.9122063424123159, + 0.7299171051657084, + 0.3656492498706674, + 0.7317802197802198, + 0.735928308642704, + 0.9242170851569388, + 0.8911389673219071, + 0.8768940393854601, + 0.8574700379375755, + 0.8056333504624872, + 0.9110169622276463, + 0.610254691689008 + ], + "eval_per_category_iou": [ + 0.9850454586836164, + 0.6737309167481387, + 0.814432328415651, + 0.6164574616457462, + 0.7742703745339059, + 0.6492239851933024, + 0.8477682606776312, + 0.5281189348145727, + 0.32247467834656446, + 0.5707624794295117, + 0.595128431536326, + 0.8513640982310973, + 0.8199196627656362, + 0.8131956457877729, + 0.7408912914926409, + 0.7208103448275862, + 0.8240899689329169, + 0.3042606516290727 + ], + "eval_runtime": 81.0689, + "eval_samples_per_second": 1.098, + "eval_steps_per_second": 0.148, + "step": 7900 + }, + { + "epoch": 3.59, + "learning_rate": 2.2551066727190198e-05, + "loss": 0.0839, + "step": 7910 + }, + { + "epoch": 3.6, + "learning_rate": 2.2478438492964145e-05, + "loss": 0.0772, + "step": 7920 + }, + { + "epoch": 3.6, + "learning_rate": 2.2405810258738088e-05, + "loss": 0.0883, + "step": 7930 + }, + { + "epoch": 3.6, + "learning_rate": 2.233318202451203e-05, + "loss": 0.0827, + "step": 7940 + }, + { + "epoch": 3.61, + "learning_rate": 2.2260553790285978e-05, + "loss": 0.0916, + "step": 7950 + }, + { + "epoch": 3.61, + "learning_rate": 2.218792555605992e-05, + "loss": 0.0903, + "step": 7960 + }, + { + "epoch": 3.62, + "learning_rate": 2.2115297321833865e-05, + "loss": 0.1006, + "step": 7970 + }, + { + "epoch": 3.62, + "learning_rate": 2.204266908760781e-05, + "loss": 0.0864, + "step": 7980 + }, + { + "epoch": 3.63, + "learning_rate": 2.1970040853381755e-05, + "loss": 0.1101, + "step": 7990 + }, + { + "epoch": 3.63, + "learning_rate": 2.18974126191557e-05, + "loss": 0.084, + "step": 8000 + }, + { + "epoch": 3.63, + "eval_loss": 0.14724896848201752, + "eval_mean_accuracy": 0.801441710329252, + "eval_mean_iou": 0.6890652338941418, + "eval_overall_accuracy": 0.9606205372328169, + "eval_per_category_accuracy": [ + 0.9925550728578445, + 0.7332534529380333, + 0.91065189281391, + 0.7396907216494846, + 0.8666385811412101, + 0.7315413864219503, + 0.9267191431166418, + 0.7671664923508787, + 0.3653388515261252, + 0.7739291819291819, + 0.7120836530327176, + 0.9230744900823539, + 0.9066102548423646, + 0.863554370589374, + 0.8586016985687187, + 0.805511305241521, + 0.9154511483523774, + 0.6335790884718498 + ], + "eval_per_category_iou": [ + 0.9853020987467782, + 0.6722732738528229, + 0.8148160684339277, + 0.6205052344105598, + 0.7830637741753895, + 0.6515298115611984, + 0.8499542003249064, + 0.5557586789447995, + 0.31802215617400703, + 0.5738465995509524, + 0.5848103924641709, + 0.8502836998244631, + 0.8176778777138871, + 0.811335105261635, + 0.738910247789006, + 0.7220123903180489, + 0.8094385799294439, + 0.2436340206185567 + ], + "eval_runtime": 81.6433, + "eval_samples_per_second": 1.09, + "eval_steps_per_second": 0.147, + "step": 8000 + }, + { + "epoch": 3.64, + "learning_rate": 2.1824784384929645e-05, + "loss": 0.0765, + "step": 8010 + }, + { + "epoch": 3.64, + "learning_rate": 2.1752156150703588e-05, + "loss": 0.0851, + "step": 8020 + }, + { + "epoch": 3.65, + "learning_rate": 2.1679527916477535e-05, + "loss": 0.0775, + "step": 8030 + }, + { + "epoch": 3.65, + "learning_rate": 2.1606899682251478e-05, + "loss": 0.0853, + "step": 8040 + }, + { + "epoch": 3.65, + "learning_rate": 2.1534271448025425e-05, + "loss": 0.0858, + "step": 8050 + }, + { + "epoch": 3.66, + "learning_rate": 2.1461643213799368e-05, + "loss": 0.102, + "step": 8060 + }, + { + "epoch": 3.66, + "learning_rate": 2.138901497957331e-05, + "loss": 0.0888, + "step": 8070 + }, + { + "epoch": 3.67, + "learning_rate": 2.131638674534726e-05, + "loss": 0.0821, + "step": 8080 + }, + { + "epoch": 3.67, + "learning_rate": 2.12437585111212e-05, + "loss": 0.0951, + "step": 8090 + }, + { + "epoch": 3.68, + "learning_rate": 2.1171130276895145e-05, + "loss": 0.076, + "step": 8100 + }, + { + "epoch": 3.68, + "eval_loss": 0.14653557538986206, + "eval_mean_accuracy": 0.7983003293456227, + "eval_mean_iou": 0.6930321233038834, + "eval_overall_accuracy": 0.961235389280855, + "eval_per_category_accuracy": [ + 0.9925582901029885, + 0.724315484382843, + 0.920943036755805, + 0.7198182311448725, + 0.8697680131023093, + 0.7394044312154708, + 0.9183552739180212, + 0.7878025914345015, + 0.3520434557682359, + 0.718046398046398, + 0.743561746247221, + 0.9272811376925906, + 0.8918088952034963, + 0.8836518663217359, + 0.8556971029487843, + 0.8165274922918808, + 0.8969645727968698, + 0.6108579088471849 + ], + "eval_per_category_iou": [ + 0.9850240593046129, + 0.672747507256403, + 0.812150004624905, + 0.6193394024276377, + 0.786230670566666, + 0.6648691149374166, + 0.8268529426508924, + 0.577714664946371, + 0.3062418433013816, + 0.5654745878751049, + 0.5948891896996789, + 0.8507597494223438, + 0.8241910753389651, + 0.8191116675706869, + 0.7335596978726353, + 0.7211002825019004, + 0.8257213791849651, + 0.28860037998733373 + ], + "eval_runtime": 79.5832, + "eval_samples_per_second": 1.118, + "eval_steps_per_second": 0.151, + "step": 8100 + }, + { + "epoch": 3.68, + "learning_rate": 2.1098502042669092e-05, + "loss": 0.0831, + "step": 8110 + }, + { + "epoch": 3.69, + "learning_rate": 2.1025873808443035e-05, + "loss": 0.0771, + "step": 8120 + }, + { + "epoch": 3.69, + "learning_rate": 2.0953245574216982e-05, + "loss": 0.1087, + "step": 8130 + }, + { + "epoch": 3.69, + "learning_rate": 2.0880617339990925e-05, + "loss": 0.0749, + "step": 8140 + }, + { + "epoch": 3.7, + "learning_rate": 2.080798910576487e-05, + "loss": 0.0833, + "step": 8150 + }, + { + "epoch": 3.7, + "learning_rate": 2.0735360871538815e-05, + "loss": 0.075, + "step": 8160 + }, + { + "epoch": 3.71, + "learning_rate": 2.066273263731276e-05, + "loss": 0.0843, + "step": 8170 + }, + { + "epoch": 3.71, + "learning_rate": 2.0590104403086705e-05, + "loss": 0.0824, + "step": 8180 + }, + { + "epoch": 3.72, + "learning_rate": 2.051747616886065e-05, + "loss": 0.0899, + "step": 8190 + }, + { + "epoch": 3.72, + "learning_rate": 2.0444847934634592e-05, + "loss": 0.0821, + "step": 8200 + }, + { + "epoch": 3.72, + "eval_loss": 0.14239969849586487, + "eval_mean_accuracy": 0.7946176582333029, + "eval_mean_iou": 0.6883602339194399, + "eval_overall_accuracy": 0.960033716780416, + "eval_per_category_accuracy": [ + 0.9926663451640313, + 0.7392120986414935, + 0.9139017277429294, + 0.6989962018448183, + 0.8748522250803284, + 0.7411885761728324, + 0.92021526480076, + 0.7268932229995683, + 0.34604242110708744, + 0.7233211233211233, + 0.7549233705168309, + 0.9279572060585214, + 0.8988498822006007, + 0.8787154849268782, + 0.8400478530781169, + 0.803365878725591, + 0.898510520777726, + 0.6234584450402145 + ], + "eval_per_category_iou": [ + 0.9851649722468053, + 0.669850632391349, + 0.8142412868632708, + 0.6157614865268567, + 0.7849918610693152, + 0.6346062589447933, + 0.826438002968982, + 0.5387425863200032, + 0.3039487435815877, + 0.5563360705618957, + 0.5873385171125941, + 0.84827840442949, + 0.8220391950459304, + 0.8170482763134749, + 0.7302443891676152, + 0.719451443297783, + 0.8246380669717703, + 0.3113640167364017 + ], + "eval_runtime": 80.7089, + "eval_samples_per_second": 1.103, + "eval_steps_per_second": 0.149, + "step": 8200 + }, + { + "epoch": 3.73, + "learning_rate": 2.037221970040854e-05, + "loss": 0.0822, + "step": 8210 + }, + { + "epoch": 3.73, + "learning_rate": 2.0299591466182482e-05, + "loss": 0.0858, + "step": 8220 + }, + { + "epoch": 3.74, + "learning_rate": 2.0226963231956425e-05, + "loss": 0.0896, + "step": 8230 + }, + { + "epoch": 3.74, + "learning_rate": 2.0154334997730372e-05, + "loss": 0.0871, + "step": 8240 + }, + { + "epoch": 3.74, + "learning_rate": 2.0081706763504315e-05, + "loss": 0.0911, + "step": 8250 + }, + { + "epoch": 3.75, + "learning_rate": 2.0009078529278262e-05, + "loss": 0.082, + "step": 8260 + }, + { + "epoch": 3.75, + "learning_rate": 1.9936450295052202e-05, + "loss": 0.0837, + "step": 8270 + }, + { + "epoch": 3.76, + "learning_rate": 1.986382206082615e-05, + "loss": 0.0807, + "step": 8280 + }, + { + "epoch": 3.76, + "learning_rate": 1.9791193826600092e-05, + "loss": 0.0798, + "step": 8290 + }, + { + "epoch": 3.77, + "learning_rate": 1.9718565592374035e-05, + "loss": 0.0924, + "step": 8300 + }, + { + "epoch": 3.77, + "eval_loss": 0.14418603479862213, + "eval_mean_accuracy": 0.7959393875842294, + "eval_mean_iou": 0.687513928062221, + "eval_overall_accuracy": 0.960693445098534, + "eval_per_category_accuracy": [ + 0.9926985730852149, + 0.7343545070354118, + 0.9097921481236931, + 0.7383342376559957, + 0.8746937294032809, + 0.7354629723678342, + 0.9263032985694252, + 0.7636097277241584, + 0.3122090015519917, + 0.7296019536019536, + 0.727429223474788, + 0.9245531425318168, + 0.8809506681294175, + 0.8907264663956497, + 0.8506639075702708, + 0.8141957862281604, + 0.8894261478123756, + 0.6319034852546916 + ], + "eval_per_category_iou": [ + 0.9851814454753052, + 0.6659276117759342, + 0.8151579764714583, + 0.6203202461678728, + 0.7890716829217115, + 0.6438546028137649, + 0.8253026803292666, + 0.5633787612648031, + 0.2740316941379467, + 0.5648205562529303, + 0.5850580714579575, + 0.8508184374920412, + 0.8190878307763053, + 0.8183537933571281, + 0.735997463621147, + 0.7236056402352001, + 0.813958545125946, + 0.2813236654432608 + ], + "eval_runtime": 82.9377, + "eval_samples_per_second": 1.073, + "eval_steps_per_second": 0.145, + "step": 8300 + }, + { + "epoch": 3.77, + "learning_rate": 1.9645937358147982e-05, + "loss": 0.0788, + "step": 8310 + }, + { + "epoch": 3.78, + "learning_rate": 1.9573309123921926e-05, + "loss": 0.0843, + "step": 8320 + }, + { + "epoch": 3.78, + "learning_rate": 1.9500680889695872e-05, + "loss": 0.0696, + "step": 8330 + }, + { + "epoch": 3.79, + "learning_rate": 1.9428052655469816e-05, + "loss": 0.0742, + "step": 8340 + }, + { + "epoch": 3.79, + "learning_rate": 1.935542442124376e-05, + "loss": 0.0831, + "step": 8350 + }, + { + "epoch": 3.79, + "learning_rate": 1.9282796187017706e-05, + "loss": 0.0905, + "step": 8360 + }, + { + "epoch": 3.8, + "learning_rate": 1.921016795279165e-05, + "loss": 0.0997, + "step": 8370 + }, + { + "epoch": 3.8, + "learning_rate": 1.9137539718565596e-05, + "loss": 0.0804, + "step": 8380 + }, + { + "epoch": 3.81, + "learning_rate": 1.906491148433954e-05, + "loss": 0.0944, + "step": 8390 + }, + { + "epoch": 3.81, + "learning_rate": 1.8992283250113482e-05, + "loss": 0.0804, + "step": 8400 + }, + { + "epoch": 3.81, + "eval_loss": 0.14236502349376678, + "eval_mean_accuracy": 0.7955755743850114, + "eval_mean_iou": 0.6886094268621761, + "eval_overall_accuracy": 0.9601393281743767, + "eval_per_category_accuracy": [ + 0.9924674306625428, + 0.7279586780874042, + 0.9179425277869484, + 0.7224633749321758, + 0.8754855216600467, + 0.7505155913954752, + 0.9079417608841656, + 0.7424678418982898, + 0.32928091050181063, + 0.7566495726495727, + 0.7350823348875641, + 0.9229637888294528, + 0.8719223782889413, + 0.8943077627017229, + 0.8335111657182273, + 0.8231564748201439, + 0.8926507299281334, + 0.6235924932975871 + ], + "eval_per_category_iou": [ + 0.9852852475973827, + 0.673134797646319, + 0.8135391236235183, + 0.6244577324422559, + 0.7840482873547397, + 0.648832112886846, + 0.8229458092964135, + 0.5457687568382923, + 0.28741081910954575, + 0.5683365616012209, + 0.5921737683353012, + 0.8525880990902484, + 0.8006655491606318, + 0.7989317129538648, + 0.7314250855921464, + 0.7214093910615469, + 0.8279734850653142, + 0.31604334386358235 + ], + "eval_runtime": 86.7054, + "eval_samples_per_second": 1.026, + "eval_steps_per_second": 0.138, + "step": 8400 + }, + { + "epoch": 3.82, + "learning_rate": 1.891965501588743e-05, + "loss": 0.1009, + "step": 8410 + }, + { + "epoch": 3.82, + "learning_rate": 1.8847026781661372e-05, + "loss": 0.0694, + "step": 8420 + }, + { + "epoch": 3.83, + "learning_rate": 1.8774398547435316e-05, + "loss": 0.0786, + "step": 8430 + }, + { + "epoch": 3.83, + "learning_rate": 1.8701770313209262e-05, + "loss": 0.0801, + "step": 8440 + }, + { + "epoch": 3.84, + "learning_rate": 1.8629142078983206e-05, + "loss": 0.0862, + "step": 8450 + }, + { + "epoch": 3.84, + "learning_rate": 1.8556513844757153e-05, + "loss": 0.0799, + "step": 8460 + }, + { + "epoch": 3.84, + "learning_rate": 1.8483885610531096e-05, + "loss": 0.0841, + "step": 8470 + }, + { + "epoch": 3.85, + "learning_rate": 1.841125737630504e-05, + "loss": 0.0777, + "step": 8480 + }, + { + "epoch": 3.85, + "learning_rate": 1.8338629142078986e-05, + "loss": 0.0887, + "step": 8490 + }, + { + "epoch": 3.86, + "learning_rate": 1.826600090785293e-05, + "loss": 0.0896, + "step": 8500 + }, + { + "epoch": 3.86, + "eval_loss": 0.1432206928730011, + "eval_mean_accuracy": 0.8011006689138838, + "eval_mean_iou": 0.6992310087988485, + "eval_overall_accuracy": 0.9620774086941494, + "eval_per_category_accuracy": [ + 0.9925154674607272, + 0.7424990689616089, + 0.9183053400462199, + 0.7421323928377646, + 0.8784132318503581, + 0.752926836808147, + 0.8923345408484621, + 0.8249977871830027, + 0.3462493533367822, + 0.7310476190476191, + 0.7342461980365539, + 0.9241775489951884, + 0.9014351743610957, + 0.8902073104200764, + 0.8597387480600104, + 0.8217754367934224, + 0.8713006020248524, + 0.5955093833780161 + ], + "eval_per_category_iou": [ + 0.9850162649833359, + 0.6722866483895087, + 0.8149722191278287, + 0.6246503396700348, + 0.7913935183725647, + 0.6781338313177295, + 0.8403050858565998, + 0.5984890775903867, + 0.29448257655755017, + 0.5724327301096817, + 0.5893751085703457, + 0.8525572523260182, + 0.8102996035194219, + 0.809072915208829, + 0.7390627605759075, + 0.7261797995163872, + 0.8113796888559393, + 0.3760687378312029 + ], + "eval_runtime": 85.2104, + "eval_samples_per_second": 1.044, + "eval_steps_per_second": 0.141, + "step": 8500 + }, + { + "epoch": 3.86, + "learning_rate": 1.8193372673626876e-05, + "loss": 0.076, + "step": 8510 + }, + { + "epoch": 3.87, + "learning_rate": 1.812074443940082e-05, + "loss": 0.0866, + "step": 8520 + }, + { + "epoch": 3.87, + "learning_rate": 1.8048116205174763e-05, + "loss": 0.075, + "step": 8530 + }, + { + "epoch": 3.88, + "learning_rate": 1.797548797094871e-05, + "loss": 0.1002, + "step": 8540 + }, + { + "epoch": 3.88, + "learning_rate": 1.7902859736722653e-05, + "loss": 0.0928, + "step": 8550 + }, + { + "epoch": 3.89, + "learning_rate": 1.7830231502496596e-05, + "loss": 0.0759, + "step": 8560 + }, + { + "epoch": 3.89, + "learning_rate": 1.7757603268270543e-05, + "loss": 0.0882, + "step": 8570 + }, + { + "epoch": 3.89, + "learning_rate": 1.7684975034044486e-05, + "loss": 0.0862, + "step": 8580 + }, + { + "epoch": 3.9, + "learning_rate": 1.7612346799818433e-05, + "loss": 0.0934, + "step": 8590 + }, + { + "epoch": 3.9, + "learning_rate": 1.7539718565592376e-05, + "loss": 0.0839, + "step": 8600 + }, + { + "epoch": 3.9, + "eval_loss": 0.1454068124294281, + "eval_mean_accuracy": 0.7997635750166031, + "eval_mean_iou": 0.6893662286822321, + "eval_overall_accuracy": 0.9600287876772077, + "eval_per_category_accuracy": [ + 0.9921701128354435, + 0.7270033517382082, + 0.9204925305381314, + 0.7166304937601736, + 0.8673377460542467, + 0.76710725625886, + 0.9120828070028918, + 0.7287068297467634, + 0.3685980341438179, + 0.7192869352869353, + 0.7775187393023668, + 0.92664460548841, + 0.8900913619768718, + 0.8818744170494343, + 0.847468313502328, + 0.8173047276464542, + 0.911029305125697, + 0.6243967828418231 + ], + "eval_per_category_iou": [ + 0.98499100347911, + 0.6740377109229568, + 0.8138506749288889, + 0.6264674493062967, + 0.7778713799496764, + 0.6535076993690768, + 0.8535941965267092, + 0.5348813774613862, + 0.30828141225337485, + 0.5664123194314109, + 0.5975008693285759, + 0.8513121333744983, + 0.8093334150978108, + 0.8059208806938117, + 0.7369089401103046, + 0.7218235350847833, + 0.8204598065288858, + 0.2714373124326214 + ], + "eval_runtime": 87.0327, + "eval_samples_per_second": 1.023, + "eval_steps_per_second": 0.138, + "step": 8600 + }, + { + "epoch": 3.91, + "learning_rate": 1.746709033136632e-05, + "loss": 0.0763, + "step": 8610 + }, + { + "epoch": 3.91, + "learning_rate": 1.7394462097140266e-05, + "loss": 0.085, + "step": 8620 + }, + { + "epoch": 3.92, + "learning_rate": 1.732183386291421e-05, + "loss": 0.0936, + "step": 8630 + }, + { + "epoch": 3.92, + "learning_rate": 1.7249205628688156e-05, + "loss": 0.0972, + "step": 8640 + }, + { + "epoch": 3.93, + "learning_rate": 1.71765773944621e-05, + "loss": 0.0845, + "step": 8650 + }, + { + "epoch": 3.93, + "learning_rate": 1.7103949160236043e-05, + "loss": 0.0784, + "step": 8660 + }, + { + "epoch": 3.94, + "learning_rate": 1.703132092600999e-05, + "loss": 0.0805, + "step": 8670 + }, + { + "epoch": 3.94, + "learning_rate": 1.6958692691783933e-05, + "loss": 0.0815, + "step": 8680 + }, + { + "epoch": 3.94, + "learning_rate": 1.6886064457557876e-05, + "loss": 0.0906, + "step": 8690 + }, + { + "epoch": 3.95, + "learning_rate": 1.6813436223331823e-05, + "loss": 0.0882, + "step": 8700 + }, + { + "epoch": 3.95, + "eval_loss": 0.14950454235076904, + "eval_mean_accuracy": 0.797425182067469, + "eval_mean_iou": 0.6949051175362407, + "eval_overall_accuracy": 0.9602914017237975, + "eval_per_category_accuracy": [ + 0.9922564792266364, + 0.7303550899464045, + 0.9147993011995158, + 0.7272788931090614, + 0.8826363525959328, + 0.7583786361889957, + 0.904900353902849, + 0.7235459985946354, + 0.3380237972064149, + 0.7512185592185592, + 0.7655373900725964, + 0.9206469697508827, + 0.8773897091883531, + 0.8885442514474773, + 0.8606656320055182, + 0.8113116649537513, + 0.8995288098669127, + 0.6066353887399464 + ], + "eval_per_category_iou": [ + 0.9853183642068217, + 0.6753305086014583, + 0.8137482085143938, + 0.6271860560332222, + 0.7828199631106115, + 0.6470856436604248, + 0.8389340018002148, + 0.5378408753694257, + 0.289833215046132, + 0.5719449071870538, + 0.5950771536496964, + 0.8532900450349763, + 0.8113708842496539, + 0.8133570677760639, + 0.7382999574712006, + 0.7222917989100288, + 0.8034911027320235, + 0.40107236229893206 + ], + "eval_runtime": 83.3489, + "eval_samples_per_second": 1.068, + "eval_steps_per_second": 0.144, + "step": 8700 + }, + { + "epoch": 3.95, + "learning_rate": 1.6740807989105766e-05, + "loss": 0.0761, + "step": 8710 + }, + { + "epoch": 3.96, + "learning_rate": 1.6668179754879713e-05, + "loss": 0.1032, + "step": 8720 + }, + { + "epoch": 3.96, + "learning_rate": 1.6595551520653653e-05, + "loss": 0.0881, + "step": 8730 + }, + { + "epoch": 3.97, + "learning_rate": 1.65229232864276e-05, + "loss": 0.0764, + "step": 8740 + }, + { + "epoch": 3.97, + "learning_rate": 1.6450295052201543e-05, + "loss": 0.0841, + "step": 8750 + }, + { + "epoch": 3.98, + "learning_rate": 1.637766681797549e-05, + "loss": 0.0933, + "step": 8760 + }, + { + "epoch": 3.98, + "learning_rate": 1.6305038583749433e-05, + "loss": 0.0821, + "step": 8770 + }, + { + "epoch": 3.99, + "learning_rate": 1.6232410349523377e-05, + "loss": 0.0974, + "step": 8780 + }, + { + "epoch": 3.99, + "learning_rate": 1.6159782115297323e-05, + "loss": 0.0939, + "step": 8790 + }, + { + "epoch": 3.99, + "learning_rate": 1.6087153881071267e-05, + "loss": 0.079, + "step": 8800 + }, + { + "epoch": 3.99, + "eval_loss": 0.1423356533050537, + "eval_mean_accuracy": 0.7975943044649596, + "eval_mean_iou": 0.6952737592404045, + "eval_overall_accuracy": 0.9609540875038404, + "eval_per_category_accuracy": [ + 0.9924551163794055, + 0.720510370958079, + 0.9179837955320788, + 0.7331795984807379, + 0.8734463478410625, + 0.7626137723625347, + 0.9092954305817996, + 0.765502815245135, + 0.3264355923435075, + 0.740971916971917, + 0.7363611324244034, + 0.9256996912225768, + 0.8719718360520116, + 0.8972907097478133, + 0.8565916537334023, + 0.824441161356629, + 0.8905524372595064, + 0.6113941018766756 + ], + "eval_per_category_iou": [ + 0.9853967989607081, + 0.67163751075423, + 0.8135335458585021, + 0.6271392933805187, + 0.7852261105676173, + 0.6634668766989079, + 0.8395876308534634, + 0.5557512491311817, + 0.2873275351759938, + 0.5724192574705704, + 0.590634369575509, + 0.8507994186046511, + 0.8067195780401162, + 0.8133511998053831, + 0.7381697610268508, + 0.7266587027045389, + 0.814508549963452, + 0.37260027775508536 + ], + "eval_runtime": 86.6765, + "eval_samples_per_second": 1.027, + "eval_steps_per_second": 0.138, + "step": 8800 + }, + { + "epoch": 4.0, + "learning_rate": 1.601452564684521e-05, + "loss": 0.0973, + "step": 8810 + }, + { + "epoch": 4.0, + "learning_rate": 1.5941897412619157e-05, + "loss": 0.0778, + "step": 8820 + }, + { + "epoch": 4.01, + "learning_rate": 1.58692691783931e-05, + "loss": 0.0686, + "step": 8830 + }, + { + "epoch": 4.01, + "learning_rate": 1.5796640944167047e-05, + "loss": 0.0774, + "step": 8840 + }, + { + "epoch": 4.02, + "learning_rate": 1.572401270994099e-05, + "loss": 0.0732, + "step": 8850 + }, + { + "epoch": 4.02, + "learning_rate": 1.5651384475714933e-05, + "loss": 0.0742, + "step": 8860 + }, + { + "epoch": 4.03, + "learning_rate": 1.557875624148888e-05, + "loss": 0.0801, + "step": 8870 + }, + { + "epoch": 4.03, + "learning_rate": 1.5506128007262823e-05, + "loss": 0.0733, + "step": 8880 + }, + { + "epoch": 4.04, + "learning_rate": 1.543349977303677e-05, + "loss": 0.0754, + "step": 8890 + }, + { + "epoch": 4.04, + "learning_rate": 1.5360871538810714e-05, + "loss": 0.0812, + "step": 8900 + }, + { + "epoch": 4.04, + "eval_loss": 0.14698636531829834, + "eval_mean_accuracy": 0.7996153806330561, + "eval_mean_iou": 0.6972965406743133, + "eval_overall_accuracy": 0.9617347288667486, + "eval_per_category_accuracy": [ + 0.9925776490435962, + 0.7288816204925598, + 0.9133944783757015, + 0.7270754205100379, + 0.8765661740035527, + 0.7589814475421637, + 0.90428789666176, + 0.7953893925681864, + 0.3345059493016037, + 0.7661929181929182, + 0.720907356036908, + 0.9212360585609628, + 0.8905679549664587, + 0.8877083223342661, + 0.8648366097603035, + 0.8118191161356629, + 0.8911109533963026, + 0.6070375335120644 + ], + "eval_per_category_iou": [ + 0.9854201188515314, + 0.674301207345936, + 0.8140986541037422, + 0.6266074351180734, + 0.7923066093873357, + 0.6654663236008743, + 0.8454510997493212, + 0.5763550473378125, + 0.28996816000717524, + 0.5647766137451848, + 0.5786224103082365, + 0.8535889339067617, + 0.8102106180283142, + 0.8070114390848732, + 0.7428222301422363, + 0.727096996893338, + 0.8217051837266607, + 0.3755286508002322 + ], + "eval_runtime": 84.7255, + "eval_samples_per_second": 1.05, + "eval_steps_per_second": 0.142, + "step": 8900 + }, + { + "epoch": 4.04, + "learning_rate": 1.5288243304584657e-05, + "loss": 0.0733, + "step": 8910 + }, + { + "epoch": 4.05, + "learning_rate": 1.5215615070358602e-05, + "loss": 0.0821, + "step": 8920 + }, + { + "epoch": 4.05, + "learning_rate": 1.5142986836132547e-05, + "loss": 0.0899, + "step": 8930 + }, + { + "epoch": 4.06, + "learning_rate": 1.5070358601906492e-05, + "loss": 0.0831, + "step": 8940 + }, + { + "epoch": 4.06, + "learning_rate": 1.4997730367680437e-05, + "loss": 0.0876, + "step": 8950 + }, + { + "epoch": 4.07, + "learning_rate": 1.492510213345438e-05, + "loss": 0.0681, + "step": 8960 + }, + { + "epoch": 4.07, + "learning_rate": 1.4852473899228325e-05, + "loss": 0.0744, + "step": 8970 + }, + { + "epoch": 4.08, + "learning_rate": 1.477984566500227e-05, + "loss": 0.0736, + "step": 8980 + }, + { + "epoch": 4.08, + "learning_rate": 1.4707217430776215e-05, + "loss": 0.0774, + "step": 8990 + }, + { + "epoch": 4.09, + "learning_rate": 1.463458919655016e-05, + "loss": 0.0764, + "step": 9000 + }, + { + "epoch": 4.09, + "eval_loss": 0.14982560276985168, + "eval_mean_accuracy": 0.8015064865594175, + "eval_mean_iou": 0.6920986484162183, + "eval_overall_accuracy": 0.9613027679786253, + "eval_per_category_accuracy": [ + 0.9924349253926398, + 0.7299826745899383, + 0.9157725321888412, + 0.7145279435702658, + 0.8650234347179635, + 0.7611056399295174, + 0.9126813447612286, + 0.7848672219482544, + 0.3223486808070357, + 0.7378754578754578, + 0.7460012984713451, + 0.9237347439835846, + 0.8957475316080068, + 0.8944353518821604, + 0.8760616054492154, + 0.8233299075025694, + 0.8943231926140098, + 0.6368632707774798 + ], + "eval_per_category_iou": [ + 0.9855178734297589, + 0.6729709961039543, + 0.8143660567657411, + 0.627307371680362, + 0.7865533688324895, + 0.6713737833726459, + 0.8465178204000613, + 0.5682239641774492, + 0.28379486245217705, + 0.5729933097683451, + 0.5916953397467406, + 0.8528486326271372, + 0.8104870041373586, + 0.8055202253735424, + 0.7439990480854161, + 0.7292381391273675, + 0.8059565356432753, + 0.2884113397681054 + ], + "eval_runtime": 86.8211, + "eval_samples_per_second": 1.025, + "eval_steps_per_second": 0.138, + "step": 9000 + }, + { + "epoch": 4.09, + "learning_rate": 1.4561960962324104e-05, + "loss": 0.0809, + "step": 9010 + }, + { + "epoch": 4.09, + "learning_rate": 1.4489332728098049e-05, + "loss": 0.0832, + "step": 9020 + }, + { + "epoch": 4.1, + "learning_rate": 1.4416704493871994e-05, + "loss": 0.0721, + "step": 9030 + }, + { + "epoch": 4.1, + "learning_rate": 1.4344076259645939e-05, + "loss": 0.0727, + "step": 9040 + }, + { + "epoch": 4.11, + "learning_rate": 1.4271448025419882e-05, + "loss": 0.0814, + "step": 9050 + }, + { + "epoch": 4.11, + "learning_rate": 1.4198819791193827e-05, + "loss": 0.0735, + "step": 9060 + }, + { + "epoch": 4.12, + "learning_rate": 1.4126191556967772e-05, + "loss": 0.0731, + "step": 9070 + }, + { + "epoch": 4.12, + "learning_rate": 1.4053563322741717e-05, + "loss": 0.0808, + "step": 9080 + }, + { + "epoch": 4.13, + "learning_rate": 1.398093508851566e-05, + "loss": 0.0947, + "step": 9090 + }, + { + "epoch": 4.13, + "learning_rate": 1.3908306854289606e-05, + "loss": 0.0749, + "step": 9100 + }, + { + "epoch": 4.13, + "eval_loss": 0.15402460098266602, + "eval_mean_accuracy": 0.8009089659315938, + "eval_mean_iou": 0.694753290646456, + "eval_overall_accuracy": 0.96209283893028, + "eval_per_category_accuracy": [ + 0.9929531237397967, + 0.7231496623973834, + 0.9194608369098712, + 0.7209712425393381, + 0.8628079258816579, + 0.7717906367719342, + 0.8977840183458783, + 0.8285148114228323, + 0.3270563890325918, + 0.7431013431013431, + 0.7329772374038442, + 0.9246519829361926, + 0.8941109292664065, + 0.8941405768790807, + 0.8529110622521124, + 0.8189298561151079, + 0.8821623523095106, + 0.628887399463807 + ], + "eval_per_category_iou": [ + 0.9853784134118165, + 0.6752698902295201, + 0.8145830953088226, + 0.6272866753216099, + 0.7905764692125179, + 0.6927389913548095, + 0.8495780816504622, + 0.5937353964212252, + 0.28859673148908976, + 0.5682879275699014, + 0.5839439511610228, + 0.8511850169600094, + 0.8083813348834752, + 0.8079117157156998, + 0.7390650609610886, + 0.7236816711131293, + 0.7995642579659965, + 0.30579455090600965 + ], + "eval_runtime": 85.3031, + "eval_samples_per_second": 1.043, + "eval_steps_per_second": 0.141, + "step": 9100 + }, + { + "epoch": 4.14, + "learning_rate": 1.383567862006355e-05, + "loss": 0.0708, + "step": 9110 + }, + { + "epoch": 4.14, + "learning_rate": 1.3763050385837496e-05, + "loss": 0.0728, + "step": 9120 + }, + { + "epoch": 4.14, + "learning_rate": 1.369042215161144e-05, + "loss": 0.0869, + "step": 9130 + }, + { + "epoch": 4.15, + "learning_rate": 1.3617793917385384e-05, + "loss": 0.0803, + "step": 9140 + }, + { + "epoch": 4.15, + "learning_rate": 1.3545165683159329e-05, + "loss": 0.0928, + "step": 9150 + }, + { + "epoch": 4.16, + "learning_rate": 1.3472537448933274e-05, + "loss": 0.0717, + "step": 9160 + }, + { + "epoch": 4.16, + "learning_rate": 1.3399909214707219e-05, + "loss": 0.0688, + "step": 9170 + }, + { + "epoch": 4.17, + "learning_rate": 1.3327280980481162e-05, + "loss": 0.0926, + "step": 9180 + }, + { + "epoch": 4.17, + "learning_rate": 1.3254652746255108e-05, + "loss": 0.0712, + "step": 9190 + }, + { + "epoch": 4.18, + "learning_rate": 1.3182024512029053e-05, + "loss": 0.0697, + "step": 9200 + }, + { + "epoch": 4.18, + "eval_loss": 0.15140081942081451, + "eval_mean_accuracy": 0.8005548648580436, + "eval_mean_iou": 0.6961780056115205, + "eval_overall_accuracy": 0.9621129410990169, + "eval_per_category_accuracy": [ + 0.992782831626141, + 0.7380624686280542, + 0.9173097556949489, + 0.7299240368963646, + 0.8709996137096703, + 0.7526817376865292, + 0.9037885352180313, + 0.8238380904382823, + 0.31841696844283496, + 0.7420952380952381, + 0.7270554211178658, + 0.9283248923627996, + 0.8986790281099941, + 0.8894769723527445, + 0.8599543024659424, + 0.8222764645426516, + 0.8756885022818933, + 0.618632707774799 + ], + "eval_per_category_iou": [ + 0.9855298212243018, + 0.6737816144624618, + 0.8137645522661264, + 0.6260616637579989, + 0.7914383525346843, + 0.6828467489593224, + 0.8360892428585915, + 0.5947329986307622, + 0.2835229628264775, + 0.5737156493305442, + 0.586409076483656, + 0.8514208841137287, + 0.8150094803156027, + 0.8100124203694058, + 0.742073788863778, + 0.7272541344498668, + 0.8038699699172299, + 0.33367073964283134 + ], + "eval_runtime": 81.7415, + "eval_samples_per_second": 1.089, + "eval_steps_per_second": 0.147, + "step": 9200 + }, + { + "epoch": 4.18, + "learning_rate": 1.3109396277802998e-05, + "loss": 0.0786, + "step": 9210 + }, + { + "epoch": 4.19, + "learning_rate": 1.3036768043576941e-05, + "loss": 0.0687, + "step": 9220 + }, + { + "epoch": 4.19, + "learning_rate": 1.2964139809350886e-05, + "loss": 0.077, + "step": 9230 + }, + { + "epoch": 4.19, + "learning_rate": 1.2891511575124831e-05, + "loss": 0.0727, + "step": 9240 + }, + { + "epoch": 4.2, + "learning_rate": 1.2818883340898776e-05, + "loss": 0.075, + "step": 9250 + }, + { + "epoch": 4.2, + "learning_rate": 1.2746255106672721e-05, + "loss": 0.0908, + "step": 9260 + }, + { + "epoch": 4.21, + "learning_rate": 1.2673626872446664e-05, + "loss": 0.0789, + "step": 9270 + }, + { + "epoch": 4.21, + "learning_rate": 1.260099863822061e-05, + "loss": 0.0802, + "step": 9280 + }, + { + "epoch": 4.22, + "learning_rate": 1.2528370403994554e-05, + "loss": 0.0796, + "step": 9290 + }, + { + "epoch": 4.22, + "learning_rate": 1.24557421697685e-05, + "loss": 0.0763, + "step": 9300 + }, + { + "epoch": 4.22, + "eval_loss": 0.1495458036661148, + "eval_mean_accuracy": 0.8018444069331001, + "eval_mean_iou": 0.6997454892585403, + "eval_overall_accuracy": 0.9618220382861877, + "eval_per_category_accuracy": [ + 0.9929321007068731, + 0.7280720218915462, + 0.9101068146803125, + 0.7258545849158979, + 0.8758930819724546, + 0.7673435229796988, + 0.8916803251591171, + 0.7866609585020041, + 0.3637351267459907, + 0.7121465201465201, + 0.7782466702079521, + 0.9229044845868274, + 0.8839855763178246, + 0.8962172007813738, + 0.8624331781341611, + 0.813225847893114, + 0.9073511215065742, + 0.6144101876675603 + ], + "eval_per_category_iou": [ + 0.9855452046413777, + 0.6761450783435085, + 0.8157453058607579, + 0.628457337483117, + 0.7905338290736328, + 0.6714772913909204, + 0.837535892947904, + 0.5760236393361092, + 0.3095856633349478, + 0.565627569941192, + 0.5921559821862954, + 0.8530411331345378, + 0.8028986335829855, + 0.8034290176775446, + 0.74795532083937, + 0.7289440350069093, + 0.8228870785840817, + 0.38743079328853386 + ], + "eval_runtime": 85.8083, + "eval_samples_per_second": 1.037, + "eval_steps_per_second": 0.14, + "step": 9300 + }, + { + "epoch": 4.23, + "learning_rate": 1.2383113935542443e-05, + "loss": 0.0892, + "step": 9310 + }, + { + "epoch": 4.23, + "learning_rate": 1.2310485701316388e-05, + "loss": 0.0864, + "step": 9320 + }, + { + "epoch": 4.24, + "learning_rate": 1.2237857467090333e-05, + "loss": 0.0665, + "step": 9330 + }, + { + "epoch": 4.24, + "learning_rate": 1.2165229232864278e-05, + "loss": 0.0729, + "step": 9340 + }, + { + "epoch": 4.24, + "learning_rate": 1.2092600998638221e-05, + "loss": 0.0737, + "step": 9350 + }, + { + "epoch": 4.25, + "learning_rate": 1.2019972764412166e-05, + "loss": 0.0797, + "step": 9360 + }, + { + "epoch": 4.25, + "learning_rate": 1.1947344530186111e-05, + "loss": 0.0736, + "step": 9370 + }, + { + "epoch": 4.26, + "learning_rate": 1.1874716295960056e-05, + "loss": 0.0785, + "step": 9380 + }, + { + "epoch": 4.26, + "learning_rate": 1.1802088061734001e-05, + "loss": 0.091, + "step": 9390 + }, + { + "epoch": 4.27, + "learning_rate": 1.1729459827507945e-05, + "loss": 0.0847, + "step": 9400 + }, + { + "epoch": 4.27, + "eval_loss": 0.14821504056453705, + "eval_mean_accuracy": 0.8064361413894966, + "eval_mean_iou": 0.6972761209085664, + "eval_overall_accuracy": 0.9622693865486746, + "eval_per_category_accuracy": [ + 0.9929973885953984, + 0.7311646885474182, + 0.9175608011444921, + 0.7520347259902334, + 0.864897187079103, + 0.7666568038191302, + 0.8978501358889504, + 0.8139553787710466, + 0.37211588204862905, + 0.7375921855921856, + 0.7173562336461469, + 0.922758200788351, + 0.8940524791827779, + 0.8880382943526389, + 0.876913045352647, + 0.8234712230215827, + 0.9088353549971766, + 0.6376005361930295 + ], + "eval_per_category_iou": [ + 0.9855848828569402, + 0.6755127380435921, + 0.8165006502945452, + 0.6316868911297214, + 0.7884445422970183, + 0.6859489927157852, + 0.8430362210281603, + 0.589946556550292, + 0.3199875439298901, + 0.562432033845283, + 0.5734720518385706, + 0.8528903392252232, + 0.8178703486626441, + 0.81427124893599, + 0.7500702930181747, + 0.7368294000666721, + 0.829614753013219, + 0.27687068890247096 + ], + "eval_runtime": 75.2666, + "eval_samples_per_second": 1.182, + "eval_steps_per_second": 0.159, + "step": 9400 + }, + { + "epoch": 4.27, + "learning_rate": 1.165683159328189e-05, + "loss": 0.0917, + "step": 9410 + }, + { + "epoch": 4.28, + "learning_rate": 1.1584203359055835e-05, + "loss": 0.0773, + "step": 9420 + }, + { + "epoch": 4.28, + "learning_rate": 1.151157512482978e-05, + "loss": 0.0734, + "step": 9430 + }, + { + "epoch": 4.29, + "learning_rate": 1.1438946890603723e-05, + "loss": 0.0798, + "step": 9440 + }, + { + "epoch": 4.29, + "learning_rate": 1.1366318656377668e-05, + "loss": 0.0875, + "step": 9450 + }, + { + "epoch": 4.29, + "learning_rate": 1.1293690422151613e-05, + "loss": 0.068, + "step": 9460 + }, + { + "epoch": 4.3, + "learning_rate": 1.1221062187925558e-05, + "loss": 0.0806, + "step": 9470 + }, + { + "epoch": 4.3, + "learning_rate": 1.1148433953699501e-05, + "loss": 0.0884, + "step": 9480 + }, + { + "epoch": 4.31, + "learning_rate": 1.1075805719473447e-05, + "loss": 0.0987, + "step": 9490 + }, + { + "epoch": 4.31, + "learning_rate": 1.1003177485247392e-05, + "loss": 0.0896, + "step": 9500 + }, + { + "epoch": 4.31, + "eval_loss": 0.14339512586593628, + "eval_mean_accuracy": 0.805191471580113, + "eval_mean_iou": 0.7010373604415414, + "eval_overall_accuracy": 0.9629785773459445, + "eval_per_category_accuracy": [ + 0.9929541221951863, + 0.7355365209928917, + 0.9082514856388247, + 0.7241589799240369, + 0.8703498500466225, + 0.7673280662783355, + 0.9190181892840862, + 0.82143921617506, + 0.34997413347128814, + 0.7357362637362638, + 0.748381829270692, + 0.9288349088493791, + 0.8834865025268421, + 0.8941273779983457, + 0.8731677875495775, + 0.8239208633093525, + 0.8968072008467228, + 0.6199731903485255 + ], + "eval_per_category_iou": [ + 0.985366983039312, + 0.6760626265031552, + 0.8168545355290974, + 0.6312895405900787, + 0.8007127860896879, + 0.6777893720560556, + 0.8562505572532078, + 0.5992923036373221, + 0.3036900700305261, + 0.5772520347634157, + 0.6002998382451572, + 0.8507595647214325, + 0.8116230561120175, + 0.8071746029224274, + 0.7468426777779826, + 0.7331317622985957, + 0.8194452276495201, + 0.324834948728754 + ], + "eval_runtime": 73.2841, + "eval_samples_per_second": 1.214, + "eval_steps_per_second": 0.164, + "step": 9500 + }, + { + "epoch": 4.32, + "learning_rate": 1.0930549251021337e-05, + "loss": 0.0843, + "step": 9510 + }, + { + "epoch": 4.32, + "learning_rate": 1.0857921016795282e-05, + "loss": 0.0712, + "step": 9520 + }, + { + "epoch": 4.33, + "learning_rate": 1.0785292782569225e-05, + "loss": 0.0696, + "step": 9530 + }, + { + "epoch": 4.33, + "learning_rate": 1.071266454834317e-05, + "loss": 0.077, + "step": 9540 + }, + { + "epoch": 4.33, + "learning_rate": 1.0640036314117115e-05, + "loss": 0.078, + "step": 9550 + }, + { + "epoch": 4.34, + "learning_rate": 1.056740807989106e-05, + "loss": 0.0828, + "step": 9560 + }, + { + "epoch": 4.34, + "learning_rate": 1.0494779845665003e-05, + "loss": 0.0753, + "step": 9570 + }, + { + "epoch": 4.35, + "learning_rate": 1.0422151611438948e-05, + "loss": 0.0753, + "step": 9580 + }, + { + "epoch": 4.35, + "learning_rate": 1.0349523377212893e-05, + "loss": 0.0761, + "step": 9590 + }, + { + "epoch": 4.36, + "learning_rate": 1.0276895142986838e-05, + "loss": 0.0869, + "step": 9600 + }, + { + "epoch": 4.36, + "eval_loss": 0.14896970987319946, + "eval_mean_accuracy": 0.8013344143351102, + "eval_mean_iou": 0.6940233914355434, + "eval_overall_accuracy": 0.9610248094194391, + "eval_per_category_accuracy": [ + 0.9930181342796028, + 0.7365728072021892, + 0.9134374656102124, + 0.7518312533912099, + 0.8709138476506618, + 0.733261496473664, + 0.9116895816151471, + 0.765255341017679, + 0.3265390584583549, + 0.7439609279609279, + 0.7598910071022448, + 0.9220228281797946, + 0.8925462654892722, + 0.8905504813191841, + 0.8588441972753923, + 0.8227582219938335, + 0.9009667574898248, + 0.6299597855227882 + ], + "eval_per_category_iou": [ + 0.9855236516057857, + 0.6757981370611917, + 0.8169465043105967, + 0.6337907375643225, + 0.7813916266013309, + 0.6447653275395748, + 0.8497944355695392, + 0.5533401688860444, + 0.28893161219445207, + 0.5823527162900944, + 0.6036257081461223, + 0.8525782806587823, + 0.8160930408472012, + 0.8133297438040439, + 0.7446861201317665, + 0.7286737474471074, + 0.8354884954688658, + 0.28531099171295876 + ], + "eval_runtime": 77.035, + "eval_samples_per_second": 1.155, + "eval_steps_per_second": 0.156, + "step": 9600 + }, + { + "epoch": 4.36, + "learning_rate": 1.0204266908760782e-05, + "loss": 0.0711, + "step": 9610 + }, + { + "epoch": 4.37, + "learning_rate": 1.0131638674534727e-05, + "loss": 0.0716, + "step": 9620 + }, + { + "epoch": 4.37, + "learning_rate": 1.0059010440308672e-05, + "loss": 0.0704, + "step": 9630 + }, + { + "epoch": 4.38, + "learning_rate": 9.986382206082615e-06, + "loss": 0.0795, + "step": 9640 + }, + { + "epoch": 4.38, + "learning_rate": 9.91375397185656e-06, + "loss": 0.0751, + "step": 9650 + }, + { + "epoch": 4.38, + "learning_rate": 9.841125737630505e-06, + "loss": 0.0773, + "step": 9660 + }, + { + "epoch": 4.39, + "learning_rate": 9.768497503404449e-06, + "loss": 0.0723, + "step": 9670 + }, + { + "epoch": 4.39, + "learning_rate": 9.695869269178394e-06, + "loss": 0.0899, + "step": 9680 + }, + { + "epoch": 4.4, + "learning_rate": 9.623241034952339e-06, + "loss": 0.081, + "step": 9690 + }, + { + "epoch": 4.4, + "learning_rate": 9.550612800726284e-06, + "loss": 0.0758, + "step": 9700 + }, + { + "epoch": 4.4, + "eval_loss": 0.1480877697467804, + "eval_mean_accuracy": 0.8042308847884129, + "eval_mean_iou": 0.6978584772456488, + "eval_overall_accuracy": 0.9620015005047402, + "eval_per_category_accuracy": [ + 0.9928457897854241, + 0.7331401091338914, + 0.911350005502366, + 0.7341291372761801, + 0.871964310341397, + 0.766100362570052, + 0.9007192892712107, + 0.7903893293448436, + 0.34749094671495084, + 0.753992673992674, + 0.7530248480198312, + 0.9246559365523677, + 0.891799902882938, + 0.8920507540960526, + 0.8524799534402483, + 0.8266829393627955, + 0.9091439274484453, + 0.624195710455764 + ], + "eval_per_category_iou": [ + 0.9855239967930458, + 0.6758414807075155, + 0.8167526035326061, + 0.6312106368089573, + 0.789750648624295, + 0.6701058803987995, + 0.8434524469007227, + 0.5789336498221074, + 0.2992648696814435, + 0.5821793827495702, + 0.6023558849912658, + 0.8526471596170533, + 0.8104330274329702, + 0.8094245770357773, + 0.7416351387462905, + 0.7291547452451234, + 0.8305421107675134, + 0.31224435056661975 + ], + "eval_runtime": 76.325, + "eval_samples_per_second": 1.166, + "eval_steps_per_second": 0.157, + "step": 9700 + }, + { + "epoch": 4.41, + "learning_rate": 9.477984566500229e-06, + "loss": 0.0793, + "step": 9710 + }, + { + "epoch": 4.41, + "learning_rate": 9.405356332274172e-06, + "loss": 0.0692, + "step": 9720 + }, + { + "epoch": 4.42, + "learning_rate": 9.332728098048117e-06, + "loss": 0.0896, + "step": 9730 + }, + { + "epoch": 4.42, + "learning_rate": 9.260099863822062e-06, + "loss": 0.0738, + "step": 9740 + }, + { + "epoch": 4.43, + "learning_rate": 9.187471629596007e-06, + "loss": 0.0739, + "step": 9750 + }, + { + "epoch": 4.43, + "learning_rate": 9.11484339536995e-06, + "loss": 0.0746, + "step": 9760 + }, + { + "epoch": 4.43, + "learning_rate": 9.042215161143895e-06, + "loss": 0.0716, + "step": 9770 + }, + { + "epoch": 4.44, + "learning_rate": 8.96958692691784e-06, + "loss": 0.074, + "step": 9780 + }, + { + "epoch": 4.44, + "learning_rate": 8.896958692691786e-06, + "loss": 0.0761, + "step": 9790 + }, + { + "epoch": 4.45, + "learning_rate": 8.824330458465729e-06, + "loss": 0.12, + "step": 9800 + }, + { + "epoch": 4.45, + "eval_loss": 0.14720258116722107, + "eval_mean_accuracy": 0.8020529731503411, + "eval_mean_iou": 0.6962903154679329, + "eval_overall_accuracy": 0.9608089575606785, + "eval_per_category_accuracy": [ + 0.9929007048318473, + 0.7357955925452161, + 0.9132844310553538, + 0.7322300596852958, + 0.8734134136744032, + 0.7652546601954611, + 0.9059147362084025, + 0.7283581981708583, + 0.3609415416451112, + 0.7462075702075702, + 0.7708886659190619, + 0.9268106573677615, + 0.8765444310558783, + 0.8904976857962444, + 0.8645348335919987, + 0.8191675231243577, + 0.9104152459476723, + 0.6237935656836461 + ], + "eval_per_category_iou": [ + 0.9854089528456684, + 0.6768547894603572, + 0.8160466209634548, + 0.6293575842369127, + 0.782235372556972, + 0.6477415665026288, + 0.8502126103058845, + 0.5364941748761263, + 0.3084711291891414, + 0.5842900302114804, + 0.6064477685860876, + 0.8520062513629425, + 0.8130943245136966, + 0.8141156879859383, + 0.7465680115407883, + 0.7336109896684231, + 0.8317550976683084, + 0.3185147159479808 + ], + "eval_runtime": 75.2135, + "eval_samples_per_second": 1.183, + "eval_steps_per_second": 0.16, + "step": 9800 + }, + { + "epoch": 4.45, + "learning_rate": 8.751702224239674e-06, + "loss": 0.0803, + "step": 9810 + }, + { + "epoch": 4.46, + "learning_rate": 8.679073990013619e-06, + "loss": 0.0799, + "step": 9820 + }, + { + "epoch": 4.46, + "learning_rate": 8.606445755787564e-06, + "loss": 0.0719, + "step": 9830 + }, + { + "epoch": 4.47, + "learning_rate": 8.533817521561509e-06, + "loss": 0.0769, + "step": 9840 + }, + { + "epoch": 4.47, + "learning_rate": 8.461189287335452e-06, + "loss": 0.0745, + "step": 9850 + }, + { + "epoch": 4.48, + "learning_rate": 8.388561053109397e-06, + "loss": 0.0698, + "step": 9860 + }, + { + "epoch": 4.48, + "learning_rate": 8.31593281888334e-06, + "loss": 0.0881, + "step": 9870 + }, + { + "epoch": 4.48, + "learning_rate": 8.243304584657286e-06, + "loss": 0.0781, + "step": 9880 + }, + { + "epoch": 4.49, + "learning_rate": 8.17067635043123e-06, + "loss": 0.0815, + "step": 9890 + }, + { + "epoch": 4.49, + "learning_rate": 8.098048116205174e-06, + "loss": 0.0728, + "step": 9900 + }, + { + "epoch": 4.49, + "eval_loss": 0.14797358214855194, + "eval_mean_accuracy": 0.8018117289656665, + "eval_mean_iou": 0.6966242338337922, + "eval_overall_accuracy": 0.9613266419828608, + "eval_per_category_accuracy": [ + 0.9929076940195739, + 0.7325248141971211, + 0.9085902250467701, + 0.7393516006511123, + 0.8735046687611882, + 0.7645370276321658, + 0.8980763274836707, + 0.7640017124493987, + 0.33300569063631663, + 0.758896214896215, + 0.7537429420212871, + 0.9265853012457844, + 0.8966107943815981, + 0.887404748077363, + 0.8670137092602173, + 0.819263874614594, + 0.9001058403507851, + 0.6164879356568365 + ], + "eval_per_category_iou": [ + 0.9854493616568202, + 0.6770731999341485, + 0.8165316100479652, + 0.6310640268611787, + 0.7888095041888148, + 0.6502368137378259, + 0.8400098948187279, + 0.560748183964797, + 0.2940746493672621, + 0.5783322663728804, + 0.6029350434748397, + 0.8528126398678374, + 0.814758371602739, + 0.8124743206554578, + 0.7471613996795691, + 0.7346523817752434, + 0.8230719743569486, + 0.3290405666452028 + ], + "eval_runtime": 75.3594, + "eval_samples_per_second": 1.181, + "eval_steps_per_second": 0.159, + "step": 9900 + }, + { + "epoch": 4.5, + "learning_rate": 8.025419881979119e-06, + "loss": 0.0681, + "step": 9910 + }, + { + "epoch": 4.5, + "learning_rate": 7.952791647753064e-06, + "loss": 0.0963, + "step": 9920 + }, + { + "epoch": 4.51, + "learning_rate": 7.88016341352701e-06, + "loss": 0.0803, + "step": 9930 + }, + { + "epoch": 4.51, + "learning_rate": 7.807535179300954e-06, + "loss": 0.0691, + "step": 9940 + }, + { + "epoch": 4.52, + "learning_rate": 7.734906945074898e-06, + "loss": 0.0783, + "step": 9950 + }, + { + "epoch": 4.52, + "learning_rate": 7.662278710848843e-06, + "loss": 0.0832, + "step": 9960 + }, + { + "epoch": 4.53, + "learning_rate": 7.589650476622788e-06, + "loss": 0.0745, + "step": 9970 + }, + { + "epoch": 4.53, + "learning_rate": 7.517022242396732e-06, + "loss": 0.0848, + "step": 9980 + }, + { + "epoch": 4.53, + "learning_rate": 7.444394008170677e-06, + "loss": 0.0798, + "step": 9990 + }, + { + "epoch": 4.54, + "learning_rate": 7.371765773944621e-06, + "loss": 0.0903, + "step": 10000 + }, + { + "epoch": 4.54, + "eval_loss": 0.15016531944274902, + "eval_mean_accuracy": 0.8001159626297416, + "eval_mean_iou": 0.6937524225865573, + "eval_overall_accuracy": 0.9609915058264572, + "eval_per_category_accuracy": [ + 0.993084254214286, + 0.7336420602665199, + 0.9131640667987234, + 0.738537710255019, + 0.8735005519903558, + 0.7647092594473567, + 0.9077590676730453, + 0.7338965629984483, + 0.32948784273150544, + 0.7564151404151405, + 0.7450274449625214, + 0.9219042196945436, + 0.8985081740193874, + 0.8839686394593739, + 0.8614685721676151, + 0.8171569886947585, + 0.9040524820025118, + 0.625804289544236 + ], + "eval_per_category_iou": [ + 0.9854932998596498, + 0.674642644431209, + 0.8165864534481433, + 0.629786003470214, + 0.7797532642688882, + 0.6523827643758924, + 0.8486701911346075, + 0.5482705618329305, + 0.2901462347956813, + 0.5782321052552979, + 0.5995203077629402, + 0.8533952085727461, + 0.8229923400049419, + 0.8208716186678542, + 0.7445229978203767, + 0.7306587789328586, + 0.8232707265533675, + 0.28834810537043326 + ], + "eval_runtime": 72.1924, + "eval_samples_per_second": 1.233, + "eval_steps_per_second": 0.166, + "step": 10000 + }, + { + "epoch": 4.54, + "learning_rate": 7.299137539718566e-06, + "loss": 0.0784, + "step": 10010 + }, + { + "epoch": 4.55, + "learning_rate": 7.22650930549251e-06, + "loss": 0.0775, + "step": 10020 + }, + { + "epoch": 4.55, + "learning_rate": 7.153881071266455e-06, + "loss": 0.0812, + "step": 10030 + }, + { + "epoch": 4.56, + "learning_rate": 7.0812528370404e-06, + "loss": 0.0859, + "step": 10040 + }, + { + "epoch": 4.56, + "learning_rate": 7.0086246028143445e-06, + "loss": 0.0688, + "step": 10050 + }, + { + "epoch": 4.57, + "learning_rate": 6.9359963685882895e-06, + "loss": 0.0938, + "step": 10060 + }, + { + "epoch": 4.57, + "learning_rate": 6.863368134362234e-06, + "loss": 0.0843, + "step": 10070 + }, + { + "epoch": 4.58, + "learning_rate": 6.790739900136179e-06, + "loss": 0.0816, + "step": 10080 + }, + { + "epoch": 4.58, + "learning_rate": 6.718111665910123e-06, + "loss": 0.0735, + "step": 10090 + }, + { + "epoch": 4.58, + "learning_rate": 6.645483431684068e-06, + "loss": 0.0839, + "step": 10100 + }, + { + "epoch": 4.58, + "eval_loss": 0.14849814772605896, + "eval_mean_accuracy": 0.8015473405558757, + "eval_mean_iou": 0.6948947776696578, + "eval_overall_accuracy": 0.9613145121027914, + "eval_per_category_accuracy": [ + 0.9926776055220352, + 0.7325086222251008, + 0.918262352811709, + 0.7271432447097125, + 0.8698908300988094, + 0.7637376953616647, + 0.9021964943782689, + 0.7724086106580105, + 0.3287635799275737, + 0.7487863247863248, + 0.7622518640933326, + 0.9257194593034519, + 0.8912918367713972, + 0.893115463808669, + 0.8630528970512157, + 0.8211973278520042, + 0.8931321029521126, + 0.62171581769437 + ], + "eval_per_category_iou": [ + 0.9856376280793812, + 0.6757334050307702, + 0.8159314047541198, + 0.6285764540337712, + 0.7863529090700465, + 0.6570806000885276, + 0.8424802428055216, + 0.5606736475518387, + 0.29176805472659656, + 0.5810121497381326, + 0.6037837290301468, + 0.8526052078667846, + 0.8174800201242093, + 0.8123364306465941, + 0.7446264430609864, + 0.7302700726591418, + 0.824526119679351, + 0.297231479107921 + ], + "eval_runtime": 72.5017, + "eval_samples_per_second": 1.228, + "eval_steps_per_second": 0.166, + "step": 10100 + }, + { + "epoch": 4.59, + "learning_rate": 6.572855197458012e-06, + "loss": 0.0899, + "step": 10110 + }, + { + "epoch": 4.59, + "learning_rate": 6.500226963231957e-06, + "loss": 0.084, + "step": 10120 + }, + { + "epoch": 4.6, + "learning_rate": 6.427598729005901e-06, + "loss": 0.0784, + "step": 10130 + }, + { + "epoch": 4.6, + "learning_rate": 6.354970494779846e-06, + "loss": 0.078, + "step": 10140 + }, + { + "epoch": 4.61, + "learning_rate": 6.2823422605537905e-06, + "loss": 0.0816, + "step": 10150 + }, + { + "epoch": 4.61, + "learning_rate": 6.2097140263277356e-06, + "loss": 0.0701, + "step": 10160 + }, + { + "epoch": 4.62, + "learning_rate": 6.137085792101681e-06, + "loss": 0.0771, + "step": 10170 + }, + { + "epoch": 4.62, + "learning_rate": 6.064457557875625e-06, + "loss": 0.0777, + "step": 10180 + }, + { + "epoch": 4.63, + "learning_rate": 5.99182932364957e-06, + "loss": 0.092, + "step": 10190 + }, + { + "epoch": 4.63, + "learning_rate": 5.919201089423514e-06, + "loss": 0.0789, + "step": 10200 + }, + { + "epoch": 4.63, + "eval_loss": 0.14652346074581146, + "eval_mean_accuracy": 0.8004928725821632, + "eval_mean_iou": 0.6951126828621068, + "eval_overall_accuracy": 0.961037710811315, + "eval_per_category_accuracy": [ + 0.9928591579936948, + 0.7318933272883305, + 0.9138329481677121, + 0.730127509495388, + 0.8733523482403892, + 0.7613772362534722, + 0.8967261376567247, + 0.7599229036494319, + 0.3265907915157786, + 0.7421343101343101, + 0.7789746011135376, + 0.9237070686703593, + 0.8920471916982896, + 0.895046900022878, + 0.8626002327987584, + 0.8174010791366907, + 0.8885621449488232, + 0.62171581769437 + ], + "eval_per_category_iou": [ + 0.9854643947814263, + 0.6756098290087289, + 0.8167550853552833, + 0.6272213482491406, + 0.7792095694014227, + 0.6550510269990957, + 0.841196133669782, + 0.5596696431422216, + 0.2894013019162006, + 0.5838738434109003, + 0.6083786607664178, + 0.853801682490261, + 0.8179704313266324, + 0.8128701002916849, + 0.7456156286245301, + 0.7325895350109094, + 0.8218969685379853, + 0.30545310853530033 + ], + "eval_runtime": 73.6237, + "eval_samples_per_second": 1.209, + "eval_steps_per_second": 0.163, + "step": 10200 + }, + { + "epoch": 4.63, + "learning_rate": 5.846572855197459e-06, + "loss": 0.0784, + "step": 10210 + }, + { + "epoch": 4.64, + "learning_rate": 5.773944620971403e-06, + "loss": 0.0698, + "step": 10220 + }, + { + "epoch": 4.64, + "learning_rate": 5.701316386745348e-06, + "loss": 0.0752, + "step": 10230 + }, + { + "epoch": 4.65, + "learning_rate": 5.628688152519292e-06, + "loss": 0.08, + "step": 10240 + }, + { + "epoch": 4.65, + "learning_rate": 5.5560599182932374e-06, + "loss": 0.069, + "step": 10250 + }, + { + "epoch": 4.66, + "learning_rate": 5.483431684067182e-06, + "loss": 0.0816, + "step": 10260 + }, + { + "epoch": 4.66, + "learning_rate": 5.410803449841127e-06, + "loss": 0.0765, + "step": 10270 + }, + { + "epoch": 4.67, + "learning_rate": 5.338175215615071e-06, + "loss": 0.0703, + "step": 10280 + }, + { + "epoch": 4.67, + "learning_rate": 5.265546981389016e-06, + "loss": 0.0827, + "step": 10290 + }, + { + "epoch": 4.68, + "learning_rate": 5.192918747162961e-06, + "loss": 0.0692, + "step": 10300 + }, + { + "epoch": 4.68, + "eval_loss": 0.14556331932544708, + "eval_mean_accuracy": 0.7999499482492319, + "eval_mean_iou": 0.6912676735256027, + "eval_overall_accuracy": 0.9605477150906337, + "eval_per_category_accuracy": [ + 0.9925063704227338, + 0.733771596042682, + 0.914716765709255, + 0.7370455778621813, + 0.8745688540213646, + 0.7683504166685068, + 0.8941318940588168, + 0.7407301031624316, + 0.31345059493016036, + 0.7543443223443224, + 0.7648881544000472, + 0.9230112322235533, + 0.8903341546319443, + 0.8931066645548458, + 0.8511435161234696, + 0.8249743062692703, + 0.8998744110123337, + 0.6281501340482574 + ], + "eval_per_category_iou": [ + 0.9855265545887547, + 0.6755362759566507, + 0.8157647766027921, + 0.6272076647812536, + 0.7768120941162705, + 0.6519911860926135, + 0.8405735465867566, + 0.5492412928493073, + 0.2809645258520751, + 0.579652925811391, + 0.6032023086411133, + 0.8529403168293681, + 0.8117577610979704, + 0.8102921100741651, + 0.7390242418854488, + 0.7305449850116323, + 0.8277511282677188, + 0.28403442841556553 + ], + "eval_runtime": 72.7214, + "eval_samples_per_second": 1.224, + "eval_steps_per_second": 0.165, + "step": 10300 + }, + { + "epoch": 4.68, + "learning_rate": 5.120290512936905e-06, + "loss": 0.0789, + "step": 10310 + }, + { + "epoch": 4.68, + "learning_rate": 5.04766227871085e-06, + "loss": 0.0821, + "step": 10320 + }, + { + "epoch": 4.69, + "learning_rate": 4.975034044484794e-06, + "loss": 0.0674, + "step": 10330 + }, + { + "epoch": 4.69, + "learning_rate": 4.9024058102587385e-06, + "loss": 0.0747, + "step": 10340 + }, + { + "epoch": 4.7, + "learning_rate": 4.8297775760326835e-06, + "loss": 0.0791, + "step": 10350 + }, + { + "epoch": 4.7, + "learning_rate": 4.757149341806628e-06, + "loss": 0.0746, + "step": 10360 + }, + { + "epoch": 4.71, + "learning_rate": 4.684521107580573e-06, + "loss": 0.0674, + "step": 10370 + }, + { + "epoch": 4.71, + "learning_rate": 4.611892873354517e-06, + "loss": 0.078, + "step": 10380 + }, + { + "epoch": 4.72, + "learning_rate": 4.539264639128462e-06, + "loss": 0.0716, + "step": 10390 + }, + { + "epoch": 4.72, + "learning_rate": 4.466636404902406e-06, + "loss": 0.0704, + "step": 10400 + }, + { + "epoch": 4.72, + "eval_loss": 0.14813955128192902, + "eval_mean_accuracy": 0.8019139040240622, + "eval_mean_iou": 0.6936253644894181, + "eval_overall_accuracy": 0.9608258022351212, + "eval_per_category_accuracy": [ + 0.9925674980804695, + 0.735083145776324, + 0.9134323071420711, + 0.7302631578947368, + 0.8735074132750764, + 0.7665508150097818, + 0.9013752448958997, + 0.7417001298788098, + 0.3291774443869633, + 0.7587496947496948, + 0.7666096126227154, + 0.9250038547757706, + 0.8937647249249141, + 0.8924555197719234, + 0.8613500172443525, + 0.8203044707091469, + 0.9015900738413876, + 0.6309651474530831 + ], + "eval_per_category_iou": [ + 0.9855797066964888, + 0.6743512425543292, + 0.815467107949539, + 0.6274109900355457, + 0.7761384385125846, + 0.6540092575672228, + 0.8437669999055335, + 0.552212751192247, + 0.29070723684210525, + 0.5791302273218666, + 0.6012065480690603, + 0.853086167668164, + 0.8174390058352078, + 0.8141503413565159, + 0.7431675950120421, + 0.7328417307471594, + 0.833127081527442, + 0.2914641320164711 + ], + "eval_runtime": 74.519, + "eval_samples_per_second": 1.194, + "eval_steps_per_second": 0.161, + "step": 10400 + }, + { + "epoch": 4.73, + "learning_rate": 4.394008170676351e-06, + "loss": 0.0734, + "step": 10410 + }, + { + "epoch": 4.73, + "learning_rate": 4.321379936450295e-06, + "loss": 0.0759, + "step": 10420 + }, + { + "epoch": 4.73, + "learning_rate": 4.24875170222424e-06, + "loss": 0.0776, + "step": 10430 + }, + { + "epoch": 4.74, + "learning_rate": 4.1761234679981845e-06, + "loss": 0.0915, + "step": 10440 + }, + { + "epoch": 4.74, + "learning_rate": 4.103495233772129e-06, + "loss": 0.0803, + "step": 10450 + }, + { + "epoch": 4.75, + "learning_rate": 4.030866999546074e-06, + "loss": 0.0704, + "step": 10460 + }, + { + "epoch": 4.75, + "learning_rate": 3.958238765320018e-06, + "loss": 0.0696, + "step": 10470 + }, + { + "epoch": 4.76, + "learning_rate": 3.885610531093963e-06, + "loss": 0.0731, + "step": 10480 + }, + { + "epoch": 4.76, + "learning_rate": 3.8129822968679076e-06, + "loss": 0.0765, + "step": 10490 + }, + { + "epoch": 4.77, + "learning_rate": 3.740354062641852e-06, + "loss": 0.0833, + "step": 10500 + }, + { + "epoch": 4.77, + "eval_loss": 0.1479746252298355, + "eval_mean_accuracy": 0.799709068394295, + "eval_mean_iou": 0.6929739311875632, + "eval_overall_accuracy": 0.9607193336058198, + "eval_per_category_accuracy": [ + 0.9929768093204258, + 0.7261289852491135, + 0.9176381781666116, + 0.7276180141074335, + 0.8688746738316776, + 0.7668378680351, + 0.9009628802193711, + 0.7443591230380442, + 0.336316606311433, + 0.7475164835164835, + 0.7497786696570855, + 0.925845975021053, + 0.8911164865205115, + 0.8976162821392746, + 0.860536299361959, + 0.8214285714285714, + 0.8879110570766463, + 0.6313002680965147 + ], + "eval_per_category_iou": [ + 0.9854424395099749, + 0.6760280993728895, + 0.8149795746955293, + 0.6285814730181051, + 0.7788548531433264, + 0.6537910520816664, + 0.8440724989445304, + 0.54918256729976, + 0.29528524709302323, + 0.5783960274811237, + 0.6000803035790203, + 0.8532219396492047, + 0.8131010207095737, + 0.8117977081012255, + 0.7413590592342583, + 0.7304840570769213, + 0.8240961371030564, + 0.2947767032829468 + ], + "eval_runtime": 77.3271, + "eval_samples_per_second": 1.151, + "eval_steps_per_second": 0.155, + "step": 10500 + }, + { + "epoch": 4.77, + "learning_rate": 3.6677258284157968e-06, + "loss": 0.0725, + "step": 10510 + }, + { + "epoch": 4.78, + "learning_rate": 3.5950975941897414e-06, + "loss": 0.0795, + "step": 10520 + }, + { + "epoch": 4.78, + "learning_rate": 3.522469359963686e-06, + "loss": 0.0694, + "step": 10530 + }, + { + "epoch": 4.78, + "learning_rate": 3.4498411257376306e-06, + "loss": 0.0786, + "step": 10540 + }, + { + "epoch": 4.79, + "learning_rate": 3.377212891511575e-06, + "loss": 0.0776, + "step": 10550 + }, + { + "epoch": 4.79, + "learning_rate": 3.3045846572855202e-06, + "loss": 0.0772, + "step": 10560 + }, + { + "epoch": 4.8, + "learning_rate": 3.231956423059465e-06, + "loss": 0.0785, + "step": 10570 + }, + { + "epoch": 4.8, + "learning_rate": 3.1593281888334094e-06, + "loss": 0.0695, + "step": 10580 + }, + { + "epoch": 4.81, + "learning_rate": 3.086699954607354e-06, + "loss": 0.0665, + "step": 10590 + }, + { + "epoch": 4.81, + "learning_rate": 3.0140717203812986e-06, + "loss": 0.0666, + "step": 10600 + }, + { + "epoch": 4.81, + "eval_loss": 0.14828309416770935, + "eval_mean_accuracy": 0.8017485365453704, + "eval_mean_iou": 0.6961283653552043, + "eval_overall_accuracy": 0.9611695964684647, + "eval_per_category_accuracy": [ + 0.9927109428380961, + 0.7328972295535874, + 0.9128700341146693, + 0.7346717308735757, + 0.8750923700455521, + 0.768122982348447, + 0.8967122181739726, + 0.7593141531775149, + 0.33678220382824625, + 0.7482881562881563, + 0.7686065041610104, + 0.9261741251635809, + 0.8872318040393504, + 0.8957376414480052, + 0.8616032936713226, + 0.8175488180883864, + 0.8934499325769194, + 0.6236595174262735 + ], + "eval_per_category_iou": [ + 0.9855292226790823, + 0.6760817936040867, + 0.8162783331744533, + 0.6303905022405867, + 0.7806234430754729, + 0.6596454367378207, + 0.8434200857869009, + 0.5597664260793139, + 0.295882192527952, + 0.579330268013794, + 0.6043016906680692, + 0.8535430087154225, + 0.8135113185223052, + 0.8111767190201844, + 0.7431131685234503, + 0.7315974018508938, + 0.8272586234975129, + 0.31886094167637585 + ], + "eval_runtime": 77.2989, + "eval_samples_per_second": 1.151, + "eval_steps_per_second": 0.155, + "step": 10600 + }, + { + "epoch": 4.82, + "learning_rate": 2.9414434861552432e-06, + "loss": 0.0955, + "step": 10610 + }, + { + "epoch": 4.82, + "learning_rate": 2.868815251929188e-06, + "loss": 0.0779, + "step": 10620 + }, + { + "epoch": 4.83, + "learning_rate": 2.7961870177031325e-06, + "loss": 0.0716, + "step": 10630 + }, + { + "epoch": 4.83, + "learning_rate": 2.723558783477077e-06, + "loss": 0.083, + "step": 10640 + }, + { + "epoch": 4.83, + "learning_rate": 2.6509305492510217e-06, + "loss": 0.0848, + "step": 10650 + }, + { + "epoch": 4.84, + "learning_rate": 2.5783023150249663e-06, + "loss": 0.0807, + "step": 10660 + }, + { + "epoch": 4.84, + "learning_rate": 2.505674080798911e-06, + "loss": 0.0868, + "step": 10670 + }, + { + "epoch": 4.85, + "learning_rate": 2.4330458465728555e-06, + "loss": 0.0808, + "step": 10680 + }, + { + "epoch": 4.85, + "learning_rate": 2.3604176123468e-06, + "loss": 0.0851, + "step": 10690 + }, + { + "epoch": 4.86, + "learning_rate": 2.2877893781207447e-06, + "loss": 0.071, + "step": 10700 + }, + { + "epoch": 4.86, + "eval_loss": 0.14659462869167328, + "eval_mean_accuracy": 0.8008380411850532, + "eval_mean_iou": 0.6962789687736569, + "eval_overall_accuracy": 0.9611141333419285, + "eval_per_category_accuracy": [ + 0.992941419623842, + 0.7355689049369323, + 0.9125588065368109, + 0.735485621269669, + 0.8745901240039987, + 0.7635787121476424, + 0.8998684608879934, + 0.7497475582241827, + 0.33652353854112776, + 0.7513748473748474, + 0.7542741348442819, + 0.9236477644277338, + 0.8949292304372066, + 0.8923235309645742, + 0.8587094757716848, + 0.8168743576567318, + 0.9007075566307591, + 0.6213806970509383 + ], + "eval_per_category_iou": [ + 0.9854991646107671, + 0.6764246042972647, + 0.8162711290893151, + 0.6299889618311741, + 0.7792439461713443, + 0.6550150680286927, + 0.8416806760981469, + 0.5569122436027313, + 0.2950380986937591, + 0.5779741376071651, + 0.6005717642451537, + 0.8539996563837682, + 0.8179121037167923, + 0.8140430023801019, + 0.7433952405611305, + 0.7322987446735, + 0.8326487183436881, + 0.3241041775913302 + ], + "eval_runtime": 72.8009, + "eval_samples_per_second": 1.223, + "eval_steps_per_second": 0.165, + "step": 10700 + }, + { + "epoch": 4.86, + "learning_rate": 2.2151611438946893e-06, + "loss": 0.0792, + "step": 10710 + }, + { + "epoch": 4.87, + "learning_rate": 2.142532909668634e-06, + "loss": 0.0867, + "step": 10720 + }, + { + "epoch": 4.87, + "learning_rate": 2.0699046754425785e-06, + "loss": 0.0793, + "step": 10730 + }, + { + "epoch": 4.88, + "learning_rate": 1.997276441216523e-06, + "loss": 0.0739, + "step": 10740 + }, + { + "epoch": 4.88, + "learning_rate": 1.9246482069904677e-06, + "loss": 0.0698, + "step": 10750 + }, + { + "epoch": 4.88, + "learning_rate": 1.8520199727644123e-06, + "loss": 0.0847, + "step": 10760 + }, + { + "epoch": 4.89, + "learning_rate": 1.779391738538357e-06, + "loss": 0.0827, + "step": 10770 + }, + { + "epoch": 4.89, + "learning_rate": 1.7067635043123015e-06, + "loss": 0.073, + "step": 10780 + }, + { + "epoch": 4.9, + "learning_rate": 1.6341352700862462e-06, + "loss": 0.0716, + "step": 10790 + }, + { + "epoch": 4.9, + "learning_rate": 1.5615070358601908e-06, + "loss": 0.0727, + "step": 10800 + }, + { + "epoch": 4.9, + "eval_loss": 0.14799156785011292, + "eval_mean_accuracy": 0.8013999895039033, + "eval_mean_iou": 0.6942882071400502, + "eval_overall_accuracy": 0.960845904403858, + "eval_per_category_accuracy": [ + 0.992796088894924, + 0.7337392120986415, + 0.9124745515571696, + 0.7321622354856213, + 0.875763403691234, + 0.763996043084451, + 0.8989167162548239, + 0.733038531917853, + 0.344593895499224, + 0.749030525030525, + 0.7541364181864684, + 0.9257748099299024, + 0.89742010323184, + 0.8935730250074794, + 0.8617056820141403, + 0.8198805241521069, + 0.9070425490553055, + 0.6291554959785522 + ], + "eval_per_category_iou": [ + 0.9855499280467153, + 0.6765452373842938, + 0.8160740126747936, + 0.6306595781971139, + 0.7764124356504543, + 0.6527826516789489, + 0.8405210000911066, + 0.5506681756254308, + 0.2954928577765948, + 0.5789724036392465, + 0.6016685109756001, + 0.8536446656094493, + 0.8161573784434712, + 0.8133254310862653, + 0.7438929646394395, + 0.7333804483975133, + 0.8340819644686327, + 0.29735808413583376 + ], + "eval_runtime": 77.2539, + "eval_samples_per_second": 1.152, + "eval_steps_per_second": 0.155, + "step": 10800 + }, + { + "epoch": 4.91, + "learning_rate": 1.4888788016341354e-06, + "loss": 0.0762, + "step": 10810 + }, + { + "epoch": 4.91, + "learning_rate": 1.4162505674080802e-06, + "loss": 0.0863, + "step": 10820 + }, + { + "epoch": 4.92, + "learning_rate": 1.3436223331820248e-06, + "loss": 0.0763, + "step": 10830 + }, + { + "epoch": 4.92, + "learning_rate": 1.2709940989559694e-06, + "loss": 0.0731, + "step": 10840 + }, + { + "epoch": 4.93, + "learning_rate": 1.1983658647299138e-06, + "loss": 0.0808, + "step": 10850 + }, + { + "epoch": 4.93, + "learning_rate": 1.1257376305038586e-06, + "loss": 0.0837, + "step": 10860 + }, + { + "epoch": 4.93, + "learning_rate": 1.0531093962778032e-06, + "loss": 0.0825, + "step": 10870 + }, + { + "epoch": 4.94, + "learning_rate": 9.804811620517476e-07, + "loss": 0.0758, + "step": 10880 + }, + { + "epoch": 4.94, + "learning_rate": 9.078529278256923e-07, + "loss": 0.0887, + "step": 10890 + }, + { + "epoch": 4.95, + "learning_rate": 8.352246935996369e-07, + "loss": 0.0735, + "step": 10900 + }, + { + "epoch": 4.95, + "eval_loss": 0.14808058738708496, + "eval_mean_accuracy": 0.8014247245983838, + "eval_mean_iou": 0.6946340248335119, + "eval_overall_accuracy": 0.9609660030750746, + "eval_per_category_accuracy": [ + 0.9928440147536206, + 0.731634255736006, + 0.914145895234951, + 0.7319587628865979, + 0.8729063647335454, + 0.7649786476711167, + 0.9010411773098511, + 0.7426069332524075, + 0.34319710294878425, + 0.7453089133089134, + 0.7533199551437172, + 0.9252252572815726, + 0.8937917018865889, + 0.8945321436742164, + 0.8619050698396276, + 0.8202081192189106, + 0.9052096286947694, + 0.6308310991957105 + ], + "eval_per_category_iou": [ + 0.9855369404856755, + 0.6766247379454927, + 0.8163059116530241, + 0.6302633884249256, + 0.7772533853856423, + 0.6539899119561442, + 0.8418459327607872, + 0.5540958550165581, + 0.2964253798033959, + 0.5797948328267477, + 0.6018815429595398, + 0.853853872115297, + 0.8169328955317111, + 0.814172332876833, + 0.7441031700985363, + 0.7332648057011927, + 0.8343330403893027, + 0.29273451107240606 + ], + "eval_runtime": 77.3655, + "eval_samples_per_second": 1.15, + "eval_steps_per_second": 0.155, + "step": 10900 + }, + { + "epoch": 4.95, + "learning_rate": 7.625964593735815e-07, + "loss": 0.0855, + "step": 10910 + }, + { + "epoch": 4.96, + "learning_rate": 6.899682251475262e-07, + "loss": 0.0784, + "step": 10920 + }, + { + "epoch": 4.96, + "learning_rate": 6.173399909214707e-07, + "loss": 0.0872, + "step": 10930 + }, + { + "epoch": 4.97, + "learning_rate": 5.447117566954155e-07, + "loss": 0.077, + "step": 10940 + }, + { + "epoch": 4.97, + "learning_rate": 4.7208352246936e-07, + "loss": 0.0806, + "step": 10950 + }, + { + "epoch": 4.98, + "learning_rate": 3.994552882433046e-07, + "loss": 0.0794, + "step": 10960 + }, + { + "epoch": 4.98, + "learning_rate": 3.2682705401724927e-07, + "loss": 0.0754, + "step": 10970 + }, + { + "epoch": 4.98, + "learning_rate": 2.541988197911938e-07, + "loss": 0.0779, + "step": 10980 + }, + { + "epoch": 4.99, + "learning_rate": 1.8157058556513846e-07, + "loss": 0.0931, + "step": 10990 + }, + { + "epoch": 4.99, + "learning_rate": 1.0894235133908308e-07, + "loss": 0.0853, + "step": 11000 + }, + { + "epoch": 4.99, + "eval_loss": 0.1483118236064911, + "eval_mean_accuracy": 0.8013773443925859, + "eval_mean_iou": 0.6942808214048504, + "eval_overall_accuracy": 0.9608961812565836, + "eval_per_category_accuracy": [ + 0.99286914254759, + 0.7324762382810602, + 0.914249064597777, + 0.7332474226804123, + 0.8738827255492974, + 0.7648064158559259, + 0.8971837406521974, + 0.738701537049782, + 0.34500775995861355, + 0.743003663003663, + 0.754480709831002, + 0.9248298956640691, + 0.8952934194198154, + 0.896274395931225, + 0.8613392395240559, + 0.8215313463514903, + 0.9057897449031546, + 0.6298257372654156 + ], + "eval_per_category_iou": [ + 0.9855223246429033, + 0.6765220512360357, + 0.8160509554140127, + 0.6297914482115811, + 0.7767318840844862, + 0.6542057486939079, + 0.8407320751640647, + 0.5527613058975714, + 0.29650542415080916, + 0.5793839404049175, + 0.6019384711976142, + 0.8538285772687122, + 0.8154169345492815, + 0.8131239148222803, + 0.7433172737080993, + 0.7330964117849363, + 0.835010226344999, + 0.2931158177110952 + ], + "eval_runtime": 72.1878, + "eval_samples_per_second": 1.233, + "eval_steps_per_second": 0.166, + "step": 11000 + } + ], + "max_steps": 11015, + "num_train_epochs": 5, + "total_flos": 1.1357438206053188e+19, + "trial_name": null, + "trial_params": null +} diff --git a/segformer_b2_clothes/training_args.bin b/segformer_b2_clothes/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..15e36311880aac16148c8af049028f0b27e16a95 --- /dev/null +++ b/segformer_b2_clothes/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:210f58c34439201a03f7a2e923b10e2a9b03a8943740f452ae4e8f57ebcfc186 +size 3323 diff --git a/segformer_b3_clothes/.gitattributes b/segformer_b3_clothes/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/segformer_b3_clothes/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/segformer_b3_clothes/README.md b/segformer_b3_clothes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4532afdab4370411f96b728ae990365969d799f3 --- /dev/null +++ b/segformer_b3_clothes/README.md @@ -0,0 +1,112 @@ +--- +license: mit +tags: +- vision +- image-segmentation +widget: +- src: >- + https://images.unsplash.com/photo-1643310325061-2beef64926a5?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxzZWFyY2h8Nnx8cmFjb29uc3xlbnwwfHwwfHw%3D&w=1000&q=80 + example_title: Person +- src: >- + https://freerangestock.com/sample/139043/young-man-standing-and-leaning-on-car.jpg + example_title: Person +datasets: +- mattmdjaga/human_parsing_dataset +pipeline_tag: image-segmentation +--- +# Segformer B3 fine-tuned for clothes segmentation + +SegFormer model fine-tuned on [ATR dataset](https://github.com/lemondan/HumanParsing-Dataset) for clothes segmentation but can also be used for human segmentation. +The dataset on hugging face is called "mattmdjaga/human_parsing_dataset". + + +**NEW** - +**[Training code](https://github.com/mattmdjaga/segformer_b2_clothes)**. Right now it only contains the pure code with some comments, but soon I'll add a colab notebook version + and a blog post with it to make it more friendly. + +```python +from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation +from PIL import Image +import requests +import matplotlib.pyplot as plt +import torch.nn as nn + +processor = SegformerImageProcessor.from_pretrained("sayeed99/segformer_b3_clothes") +model = AutoModelForSemanticSegmentation.from_pretrained("sayeed99/segformer_b3_clothes") + +url = "https://plus.unsplash.com/premium_photo-1673210886161-bfcc40f54d1f?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxzZWFyY2h8MXx8cGVyc29uJTIwc3RhbmRpbmd8ZW58MHx8MHx8&w=1000&q=80" + +image = Image.open(requests.get(url, stream=True).raw) +inputs = processor(images=image, return_tensors="pt") + +outputs = model(**inputs) +logits = outputs.logits.cpu() + +upsampled_logits = nn.functional.interpolate( + logits, + size=image.size[::-1], + mode="bilinear", + align_corners=False, +) + +pred_seg = upsampled_logits.argmax(dim=1)[0] +plt.imshow(pred_seg) +``` + +Labels: 0: "Background", 1: "Hat", 2: "Hair", 3: "Sunglasses", 4: "Upper-clothes", 5: "Skirt", 6: "Pants", 7: "Dress", 8: "Belt", 9: "Left-shoe", 10: "Right-shoe", 11: "Face", 12: "Left-leg", 13: "Right-leg", 14: "Left-arm", 15: "Right-arm", 16: "Bag", 17: "Scarf" + +### Evaluation + +| Label Index | Label Name | Category Accuracy | Category IoU | +|:-------------:|:----------------:|:-----------------:|:------------:| +| 0 | Background | 0.99 | 0.99 | +| 1 | Hat | 0.73 | 0.68 | +| 2 | Hair | 0.91 | 0.82 | +| 3 | Sunglasses | 0.73 | 0.63 | +| 4 | Upper-clothes | 0.87 | 0.78 | +| 5 | Skirt | 0.76 | 0.65 | +| 6 | Pants | 0.90 | 0.84 | +| 7 | Dress | 0.74 | 0.55 | +| 8 | Belt | 0.35 | 0.30 | +| 9 | Left-shoe | 0.74 | 0.58 | +| 10 | Right-shoe | 0.75 | 0.60 | +| 11 | Face | 0.92 | 0.85 | +| 12 | Left-leg | 0.90 | 0.82 | +| 13 | Right-leg | 0.90 | 0.81 | +| 14 | Left-arm | 0.86 | 0.74 | +| 15 | Right-arm | 0.82 | 0.73 | +| 16 | Bag | 0.91 | 0.84 | +| 17 | Scarf | 0.63 | 0.29 | + +Overall Evaluation Metrics: +- Evaluation Loss: 0.15 +- Mean Accuracy: 0.80 +- Mean IoU: 0.69 + +### License + +The license for this model can be found [here](https://github.com/NVlabs/SegFormer/blob/master/LICENSE). + +### BibTeX entry and citation info + +```bibtex +@article{DBLP:journals/corr/abs-2105-15203, + author = {Enze Xie and + Wenhai Wang and + Zhiding Yu and + Anima Anandkumar and + Jose M. Alvarez and + Ping Luo}, + title = {SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers}, + journal = {CoRR}, + volume = {abs/2105.15203}, + year = {2021}, + url = {https://arxiv.org/abs/2105.15203}, + eprinttype = {arXiv}, + eprint = {2105.15203}, + timestamp = {Wed, 02 Jun 2021 11:46:42 +0200}, + biburl = {https://dblp.org/rec/journals/corr/abs-2105-15203.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` \ No newline at end of file diff --git a/segformer_b3_clothes/config.json b/segformer_b3_clothes/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8352c4562bb0e1f72767dcb170ad6f3f56007836 --- /dev/null +++ b/segformer_b3_clothes/config.json @@ -0,0 +1,110 @@ +{ + "_name_or_path": "nvidia/mit-b3", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 4, + 18, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "Background", + "1": "Hat", + "10": "Right-shoe", + "11": "Face", + "12": "Left-leg", + "13": "Right-leg", + "14": "Left-arm", + "15": "Right-arm", + "16": "Bag", + "17": "Scarf", + "2": "Hair", + "3": "Sunglasses", + "4": "Upper-clothes", + "5": "Skirt", + "6": "Pants", + "7": "Dress", + "8": "Belt", + "9": "Left-shoe" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "Background": "0", + "Bag": "16", + "Belt": "8", + "Dress": "7", + "Face": "11", + "Hair": "2", + "Hat": "1", + "Left-arm": "14", + "Left-leg": "12", + "Left-shoe": "9", + "Pants": "6", + "Right-arm": "15", + "Right-leg": "13", + "Right-shoe": "10", + "Scarf": "17", + "Skirt": "5", + "Sunglasses": "3", + "Upper-clothes": "4" + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "torch_dtype": "float32", + "transformers_version": "4.38.1" +} diff --git a/segformer_b3_clothes/model.safetensors b/segformer_b3_clothes/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb5506b45fc6b66daa05add78f13412a4776d0cd --- /dev/null +++ b/segformer_b3_clothes/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70ae566c5773fb335796ebaa8acc924ac25eb97222c2b2967d44d2fc11568e6 +size 189029000 diff --git a/segformer_b3_clothes/preprocessor_config.json b/segformer_b3_clothes/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2340cf4e53b37fda4f5b92d28f11c0f33c3d0fd --- /dev/null +++ b/segformer_b3_clothes/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerImageProcessor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/segformer_b3_fashion/.gitattributes b/segformer_b3_fashion/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/segformer_b3_fashion/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/segformer_b3_fashion/.gitignore b/segformer_b3_fashion/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..0348ea97130c017c407fcfb6fd4003859f17b84c --- /dev/null +++ b/segformer_b3_fashion/.gitignore @@ -0,0 +1 @@ +checkpoint-*/ \ No newline at end of file diff --git a/segformer_b3_fashion/README.md b/segformer_b3_fashion/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4aa7d6b75f0baf02db7064559e1864163d7a35b0 --- /dev/null +++ b/segformer_b3_fashion/README.md @@ -0,0 +1,92 @@ +--- +license: other +tags: +- vision +- image-segmentation +- generated_from_trainer +widget: +- src: >- + https://media.istockphoto.com/id/515788534/photo/cheerful-and-confidant.jpg?s=612x612&w=0&k=20&c=T0Z4DfameRpyGhzevPomrm-wjZp7wmGjpAyjGcTzpkA= + example_title: Person +- src: >- + https://storage.googleapis.com/pai-images/1484fd9ea9d746eb9f1de0d6778dbea2.jpeg + example_title: Person +datasets: +- sayeed99/fashion_segmentation +model-index: +- name: segformer-b3-fashion + results: [] +pipeline_tag: image-segmentation +--- + + +# segformer-b3-fashion + +This model is a fine-tuned version of [nvidia/mit-b3](https://huggingface.co/nvidia/mit-b3) on the sayeed99/fashion_segmentation dataset using original image sizes without resizing. + + +```python +from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation +from PIL import Image +import requests +import matplotlib.pyplot as plt +import torch.nn as nn + +processor = SegformerImageProcessor.from_pretrained("sayeed99/segformer-b3-fashion") +model = AutoModelForSemanticSegmentation.from_pretrained("sayeed99/segformer-b3-fashion") + +url = "https://plus.unsplash.com/premium_photo-1673210886161-bfcc40f54d1f?ixlib=rb-4.0.3&ixid=MnwxMjA3fDB8MHxzZWFyY2h8MXx8cGVyc29uJTIwc3RhbmRpbmd8ZW58MHx8MHx8&w=1000&q=80" + +image = Image.open(requests.get(url, stream=True).raw) +inputs = processor(images=image, return_tensors="pt") + +outputs = model(**inputs) +logits = outputs.logits.cpu() + +upsampled_logits = nn.functional.interpolate( + logits, + size=image.size[::-1], + mode="bilinear", + align_corners=False, +) + +pred_seg = upsampled_logits.argmax(dim=1)[0] +plt.imshow(pred_seg) +``` + +Labels : {"0":"Unlabelled", "1": "shirt, blouse", "2": "top, t-shirt, sweatshirt", "3": "sweater", "4": "cardigan", "5": "jacket", "6": "vest", "7": "pants", "8": "shorts", "9": "skirt", "10": "coat", "11": "dress", "12": "jumpsuit", "13": "cape", "14": "glasses", "15": "hat", "16": "headband, head covering, hair accessory", "17": "tie", "18": "glove", "19": "watch", "20": "belt", "21": "leg warmer", "22": "tights, stockings", "23": "sock", "24": "shoe", "25": "bag, wallet", "26": "scarf", "27": "umbrella", "28": "hood", "29": "collar", "30": "lapel", "31": "epaulette", "32": "sleeve", "33": "pocket", "34": "neckline", "35": "buckle", "36": "zipper", "37": "applique", "38": "bead", "39": "bow", "40": "flower", "41": "fringe", "42": "ribbon", "43": "rivet", "44": "ruffle", "45": "sequin", "46": "tassel"} + +### Framework versions + +- Transformers 4.30.0 +- Pytorch 2.2.2+cu121 +- Datasets 2.18.0 +- Tokenizers 0.13.3 + + +### License + +The license for this model can be found [here](https://github.com/NVlabs/SegFormer/blob/master/LICENSE). + +### BibTeX entry and citation info + +```bibtex +@article{DBLP:journals/corr/abs-2105-15203, + author = {Enze Xie and + Wenhai Wang and + Zhiding Yu and + Anima Anandkumar and + Jose M. Alvarez and + Ping Luo}, + title = {SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers}, + journal = {CoRR}, + volume = {abs/2105.15203}, + year = {2021}, + url = {https://arxiv.org/abs/2105.15203}, + eprinttype = {arXiv}, + eprint = {2105.15203}, + timestamp = {Wed, 02 Jun 2021 11:46:42 +0200}, + biburl = {https://dblp.org/rec/journals/corr/abs-2105-15203.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} \ No newline at end of file diff --git a/segformer_b3_fashion/config.json b/segformer_b3_fashion/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fb2769b75445217cf9bc737f56abd740f313f1d --- /dev/null +++ b/segformer_b3_fashion/config.json @@ -0,0 +1,168 @@ +{ + "_name_or_path": "nvidia/mit-b3", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 4, + 18, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "unlabelled", + "1": "shirt, blouse", + "2": "top, t-shirt, sweatshirt", + "3": "sweater", + "4": "cardigan", + "5": "jacket", + "6": "vest", + "7": "pants", + "8": "shorts", + "9": "skirt", + "10": "coat", + "11": "dress", + "12": "jumpsuit", + "13": "cape", + "14": "glasses", + "15": "hat", + "16": "headband, head covering, hair accessory", + "17": "tie", + "18": "glove", + "19": "watch", + "20": "belt", + "21": "leg warmer", + "22": "tights, stockings", + "23": "sock", + "24": "shoe", + "25": "bag, wallet", + "26": "scarf", + "27": "umbrella", + "28": "hood", + "29": "collar", + "30": "lapel", + "31": "epaulette", + "32": "sleeve", + "33": "pocket", + "34": "neckline", + "35": "buckle", + "36": "zipper", + "37": "applique", + "38": "bead", + "39": "bow", + "40": "flower", + "41": "fringe", + "42": "ribbon", + "43": "rivet", + "44": "ruffle", + "45": "sequin", + "46": "tassel" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "applique": 37, + "bag, wallet": 25, + "bead": 38, + "belt": 20, + "bow": 39, + "buckle": 35, + "cape": 13, + "cardigan": 4, + "coat": 10, + "collar": 29, + "dress": 11, + "epaulette": 31, + "flower": 40, + "fringe": 41, + "glasses": 14, + "glove": 18, + "hat": 15, + "headband, head covering, hair accessory": 16, + "hood": 28, + "jacket": 5, + "jumpsuit": 12, + "lapel": 30, + "leg warmer": 21, + "neckline": 34, + "pants": 7, + "pocket": 33, + "ribbon": 42, + "rivet": 43, + "ruffle": 44, + "scarf": 26, + "sequin": 45, + "shirt, blouse": 1, + "shoe": 24, + "shorts": 8, + "skirt": 9, + "sleeve": 32, + "sock": 23, + "sweater": 3, + "tassel": 46, + "tie": 17, + "tights, stockings": 22, + "top, t-shirt, sweatshirt": 2, + "umbrella": 27, + "unlabelled": 0, + "vest": 6, + "watch": 19, + "zipper": 36 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "torch_dtype": "float32", + "transformers_version": "4.30.0" +} diff --git a/segformer_b3_fashion/model.safetensors b/segformer_b3_fashion/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0644c245dfa3ba121e7330a75d9a908ce8e47e60 --- /dev/null +++ b/segformer_b3_fashion/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3f5b30179f1480d329224d089f6d286580142c2b12846d08de814a48a81f42f +size 189118204 diff --git a/segformer_b3_fashion/preprocessor_config.json b/segformer_b3_fashion/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2340cf4e53b37fda4f5b92d28f11c0f33c3d0fd --- /dev/null +++ b/segformer_b3_fashion/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerImageProcessor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/segformer_b3_fashion/pytorch_model.bin b/segformer_b3_fashion/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8a03b44cb42b12c963f20ef4413ce76b693c4e6 --- /dev/null +++ b/segformer_b3_fashion/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec5749e86e5efad5d9dbbf7c2e4b996d675548dc22f26b06c0f1b6fc2e8bc1e2 +size 189264154 diff --git a/segformer_b3_fashion/training_args.bin b/segformer_b3_fashion/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..26d59012065dc479a57956e8299cba6a6ebf82c1 --- /dev/null +++ b/segformer_b3_fashion/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f871f7bdbc3af72746e7b76beb628f6365db08040b58b3071238dca986de97ca +size 4408 diff --git a/unet/.gitattributes b/unet/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/unet/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/unet/svdq-int4-flux.1-dev/README.md b/unet/svdq-int4-flux.1-dev/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6ac595e67c2977a23ad08abf114607e857638087 --- /dev/null +++ b/unet/svdq-int4-flux.1-dev/README.md @@ -0,0 +1,102 @@ +--- +license: other +license_name: flux-1-dev-non-commercial-license +tags: +- text-to-image +- SVDQuant +- FLUX.1-dev +- INT4 +- FLUX.1 +- Diffusion +- Quantization +- ICLR2025 +language: +- en +base_model: +- black-forest-labs/FLUX.1-dev +base_model_relation: quantized +pipeline_tag: text-to-image +datasets: +- mit-han-lab/svdquant-datasets +library_name: diffusers +--- + +

+ logo +

+

Quantization Library: DeepCompressor   Inference Engine: Nunchaku +

+ + +
+ [Paper]  + [Code]  + [Demo]  + [Website]  + [Blog] +
+ +![teaser](https://github.com/mit-han-lab/nunchaku/raw/refs/heads/main/assets/teaser.jpg) +SVDQuant is a post-training quantization technique for 4-bit weights and activations that well maintains visual fidelity. On 12B FLUX.1-dev, it achieves 3.6× memory reduction compared to the BF16 model. By eliminating CPU offloading, it offers 8.7× speedup over the 16-bit model when on a 16GB laptop 4090 GPU, 3× faster than the NF4 W4A16 baseline. On PixArt-∑, it demonstrates significantly superior visual quality over other W4A4 or even W4A8 baselines. "E2E" means the end-to-end latency including the text encoder and VAE decoder. + +## Method +#### Quantization Method -- SVDQuant + +![intuition](https://github.com/mit-han-lab/nunchaku/raw/refs/heads/main/assets/intuition.gif) +Overview of SVDQuant. Stage1: Originally, both the activation ***X*** and weights ***W*** contain outliers, making 4-bit quantization challenging. Stage 2: We migrate the outliers from activations to weights, resulting in the updated activation and weight. While the activation becomes easier to quantize, the weight now becomes more difficult. Stage 3: SVDQuant further decomposes the weight into a low-rank component and a residual with SVD. Thus, the quantization difficulty is alleviated by the low-rank branch, which runs at 16-bit precision. + +#### Nunchaku Engine Design + +![engine](https://github.com/mit-han-lab/nunchaku/raw/refs/heads/main/assets/engine.jpg) (a) Naïvely running low-rank branch with rank 32 will introduce 57% latency overhead due to extra read of 16-bit inputs in *Down Projection* and extra write of 16-bit outputs in *Up Projection*. Nunchaku optimizes this overhead with kernel fusion. (b) *Down Projection* and *Quantize* kernels use the same input, while *Up Projection* and *4-Bit Compute* kernels share the same output. To reduce data movement overhead, we fuse the first two and the latter two kernels together. + +## Model Description + +- **Developed by:** MIT, NVIDIA, CMU, Princeton, UC Berkeley, SJTU and Pika Labs +- **Model type:** INT W4A4 model +- **Model size:** 6.64GB +- **Model resolution:** The number of pixels need to be a multiple of 65,536. +- **License:** Apache-2.0 + +## Usage + +### Diffusers + +Please follow the instructions in [mit-han-lab/nunchaku](https://github.com/mit-han-lab/nunchaku) to set up the environment. Then you can run the model with + +```python +import torch +from diffusers import FluxPipeline + +from nunchaku.models.transformer_flux import NunchakuFluxTransformer2dModel + +transformer = NunchakuFluxTransformer2dModel.from_pretrained("mit-han-lab/svdq-int4-flux.1-dev") +pipeline = FluxPipeline.from_pretrained( + "black-forest-labs/FLUX.1-dev", transformer=transformer, torch_dtype=torch.bfloat16 +).to("cuda") +image = pipeline("A cat holding a sign that says hello world", num_inference_steps=50, guidance_scale=3.5).images[0] +image.save("example.png") +``` + +### Comfy UI + +![comfyui](https://github.com/mit-han-lab/nunchaku/blob/main/assets/comfyui.jpg?raw=true) +Please check [comfyui/README.md](comfyui/README.md) for the usage. + +## Limitations + +- The model is only runnable on NVIDIA GPUs with architectures sm_86 (Ampere: RTX 3090, A6000), sm_89 (Ada: RTX 4090), and sm_80 (A100). See this [issue](https://github.com/mit-han-lab/nunchaku/issues/1) for more details. +- You may observe some slight differences from the BF16 models in details. + +### Citation + +If you find this model useful or relevant to your research, please cite + +```bibtex +@inproceedings{ + li2024svdquant, + title={SVDQuant: Absorbing Outliers by Low-Rank Components for 4-Bit Diffusion Models}, + author={Li*, Muyang and Lin*, Yujun and Zhang*, Zhekai and Cai, Tianle and Li, Xiuyu and Guo, Junxian and Xie, Enze and Meng, Chenlin and Zhu, Jun-Yan and Han, Song}, + booktitle={The Thirteenth International Conference on Learning Representations}, + year={2025} +} +``` \ No newline at end of file diff --git a/unet/svdq-int4-flux.1-dev/comfy_config.json b/unet/svdq-int4-flux.1-dev/comfy_config.json new file mode 100644 index 0000000000000000000000000000000000000000..66d9f3d4a3a816070585c25ce9a52d6f4212ba77 --- /dev/null +++ b/unet/svdq-int4-flux.1-dev/comfy_config.json @@ -0,0 +1,25 @@ +{ + "model_class": "Flux", + "model_config": { + "axes_dim": [ + 16, + 56, + 56 + ], + "context_in_dim": 4096, + "depth": 19, + "depth_single_blocks": 38, + "disable_unet_model_creation": true, + "guidance_embed": true, + "hidden_size": 3072, + "image_model": "flux", + "in_channels": 16, + "mlp_ratio": 4.0, + "num_heads": 24, + "out_channels": 16, + "patch_size": 2, + "qkv_bias": true, + "theta": 10000, + "vec_in_dim": 768 + } +} \ No newline at end of file diff --git a/unet/svdq-int4-flux.1-dev/config.json b/unet/svdq-int4-flux.1-dev/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dbb93438161bb7fbfdcbd8fabf2b67654a38be38 --- /dev/null +++ b/unet/svdq-int4-flux.1-dev/config.json @@ -0,0 +1,14 @@ +{ + "_class_name": "FluxTransformer2DModel", + "_diffusers_version": "0.30.0.dev0", + "_name_or_path": "../checkpoints/flux-dev/transformer", + "attention_head_dim": 128, + "guidance_embeds": true, + "in_channels": 64, + "joint_attention_dim": 4096, + "num_attention_heads": 24, + "num_layers": 19, + "num_single_layers": 38, + "patch_size": 1, + "pooled_projection_dim": 768 +} diff --git a/unet/svdq-int4-flux.1-dev/unquantized_layers.safetensors b/unet/svdq-int4-flux.1-dev/unquantized_layers.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a68a34bf5f6a26c570770b85c759ef40ee7f6f24 --- /dev/null +++ b/unet/svdq-int4-flux.1-dev/unquantized_layers.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f9c33d9a2d96d7febe5e61342bfc58033c201226c9338a65a8720e099fe812 +size 128252144 diff --git a/unet/svdq-int4-flux.1-fill-dev/.gitattributes b/unet/svdq-int4-flux.1-fill-dev/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..8aaab9ede0abed7e7be7955f74c10363fc93e989 --- /dev/null +++ b/unet/svdq-int4-flux.1-fill-dev/.gitattributes @@ -0,0 +1,38 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +sa_1543979.png filter=lfs diff=lfs merge=lfs -text +example.png filter=lfs diff=lfs merge=lfs -text +demo.jpg filter=lfs diff=lfs merge=lfs -text diff --git a/unet/svdq-int4-flux.1-fill-dev/README.md b/unet/svdq-int4-flux.1-fill-dev/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b1d0162062d67ed8003d514e609829017f32e9f7 --- /dev/null +++ b/unet/svdq-int4-flux.1-fill-dev/README.md @@ -0,0 +1,117 @@ +--- +license: other +license_name: flux-1-dev-non-commercial-license +tags: +- image-to-image +- SVDQuant +- INT4 +- FLUX.1 +- Diffusion +- Quantization +- inpainting +- image-generation +- text-to-image +- ICLR2025 +- FLUX.1-Fill-dev +language: +- en +base_model: +- black-forest-labs/FLUX.1-Fill-dev +base_model_relation: quantized +pipeline_tag: image-to-image +datasets: +- mit-han-lab/svdquant-datasets +library_name: diffusers +--- + +

+ logo +

+

Quantization Library: DeepCompressor   Inference Engine: Nunchaku +

+ + +
+ [Paper]  + [Code]  + [Demo]  + [Website]  + [Blog] +
+ +![teaser](https://huggingface.co/mit-han-lab/svdq-int4-flux.1-fill-dev/resolve/main/demo.jpg) +`svdq-int4-flux.1-fill-dev` is an INT4-quantized version of [`FLUX.1-Fill-dev`](https://huggingface.co/black-forest-labs/FLUX.1-Fill-dev), which can fill areas in existing images based on a text description. It offers approximately 4× memory savings while also running 2–3× faster than the original BF16 model. + +## Method +#### Quantization Method -- SVDQuant + +![intuition](https://github.com/mit-han-lab/nunchaku/raw/refs/heads/main/assets/intuition.gif) +Overview of SVDQuant. Stage1: Originally, both the activation ***X*** and weights ***W*** contain outliers, making 4-bit quantization challenging. Stage 2: We migrate the outliers from activations to weights, resulting in the updated activation and weight. While the activation becomes easier to quantize, the weight now becomes more difficult. Stage 3: SVDQuant further decomposes the weight into a low-rank component and a residual with SVD. Thus, the quantization difficulty is alleviated by the low-rank branch, which runs at 16-bit precision. + +#### Nunchaku Engine Design + +![engine](https://github.com/mit-han-lab/nunchaku/raw/refs/heads/main/assets/engine.jpg) (a) Naïvely running low-rank branch with rank 32 will introduce 57% latency overhead due to extra read of 16-bit inputs in *Down Projection* and extra write of 16-bit outputs in *Up Projection*. Nunchaku optimizes this overhead with kernel fusion. (b) *Down Projection* and *Quantize* kernels use the same input, while *Up Projection* and *4-Bit Compute* kernels share the same output. To reduce data movement overhead, we fuse the first two and the latter two kernels together. + +## Model Description + +- **Developed by:** MIT, NVIDIA, CMU, Princeton, UC Berkeley, SJTU and Pika Labs +- **Model type:** INT W4A4 model +- **Model size:** 6.64GB +- **Model resolution:** The number of pixels need to be a multiple of 65,536. +- **License:** Apache-2.0 + +## Usage + +### Diffusers + +Please follow the instructions in [mit-han-lab/nunchaku](https://github.com/mit-han-lab/nunchaku) to set up the environment. Then you can run the model with + +```python +import torch +from diffusers import FluxFillPipeline +from diffusers.utils import load_image + +from nunchaku.models.transformer_flux import NunchakuFluxTransformer2dModel + +image = load_image("https://huggingface.co/mit-han-lab/svdq-int4-flux.1-fill-dev/resolve/main/example.png") +mask = load_image("https://huggingface.co/mit-han-lab/svdq-int4-flux.1-fill-dev/resolve/main/mask.png") + +transformer = NunchakuFluxTransformer2dModel.from_pretrained("mit-han-lab/svdq-int4-flux.1-fill-dev") +pipe = FluxFillPipeline.from_pretrained( + "black-forest-labs/FLUX.1-Fill-dev", transformer=transformer, torch_dtype=torch.bfloat16 +).to("cuda") +image = pipe( + prompt="A wooden basket of a cat.", + image=image, + mask_image=mask, + height=1024, + width=1024, + guidance_scale=30, + num_inference_steps=50, + max_sequence_length=512, +).images[0] +image.save("flux.1-fill-dev.png") +``` + +### Comfy UI + +Work in progress. Stay tuned! + +## Limitations + +- The model is only runnable on NVIDIA GPUs with architectures sm_86 (Ampere: RTX 3090, A6000), sm_89 (Ada: RTX 4090), and sm_80 (A100). See this [issue](https://github.com/mit-han-lab/nunchaku/issues/1) for more details. +- You may observe some slight differences from the BF16 models in detail. + +### Citation + +If you find this model useful or relevant to your research, please cite + +```bibtex +@inproceedings{ + li2024svdquant, + title={SVDQuant: Absorbing Outliers by Low-Rank Components for 4-Bit Diffusion Models}, + author={Li*, Muyang and Lin*, Yujun and Zhang*, Zhekai and Cai, Tianle and Li, Xiuyu and Guo, Junxian and Xie, Enze and Meng, Chenlin and Zhu, Jun-Yan and Han, Song}, + booktitle={The Thirteenth International Conference on Learning Representations}, + year={2025} +} +``` \ No newline at end of file diff --git a/unet/svdq-int4-flux.1-fill-dev/comfy_config.json b/unet/svdq-int4-flux.1-fill-dev/comfy_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f8394335be113135694f6a5e5e7363f6300762e --- /dev/null +++ b/unet/svdq-int4-flux.1-fill-dev/comfy_config.json @@ -0,0 +1,25 @@ +{ + "model_class": "Flux", + "model_config": { + "axes_dim": [ + 16, + 56, + 56 + ], + "context_in_dim": 4096, + "depth": 19, + "depth_single_blocks": 38, + "disable_unet_model_creation": true, + "guidance_embed": true, + "hidden_size": 3072, + "image_model": "flux", + "in_channels": 64, + "mlp_ratio": 4.0, + "num_heads": 24, + "out_channels": 16, + "patch_size": 2, + "qkv_bias": true, + "theta": 10000, + "vec_in_dim": 768 + } +} \ No newline at end of file diff --git a/unet/svdq-int4-flux.1-fill-dev/config.json b/unet/svdq-int4-flux.1-fill-dev/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9a19ee2be2959dc5e225b4d9c61f3e969dbfefb1 --- /dev/null +++ b/unet/svdq-int4-flux.1-fill-dev/config.json @@ -0,0 +1,19 @@ +{ + "_class_name": "FluxTransformer2DModel", + "_diffusers_version": "0.32.0.dev0", + "attention_head_dim": 128, + "axes_dims_rope": [ + 16, + 56, + 56 + ], + "guidance_embeds": true, + "in_channels": 384, + "joint_attention_dim": 4096, + "num_attention_heads": 24, + "num_layers": 19, + "num_single_layers": 38, + "out_channels": 64, + "patch_size": 1, + "pooled_projection_dim": 768 +} diff --git a/unet/svdq-int4-flux.1-fill-dev/demo.jpg b/unet/svdq-int4-flux.1-fill-dev/demo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d981715d7bdeffd8bd70250dea4c200da9323f22 --- /dev/null +++ b/unet/svdq-int4-flux.1-fill-dev/demo.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29cb289fae8a82413239e7262b0d823213450c423c1e7dcf019680a7f220e482 +size 458662 diff --git a/unet/svdq-int4-flux.1-fill-dev/example.png b/unet/svdq-int4-flux.1-fill-dev/example.png new file mode 100644 index 0000000000000000000000000000000000000000..0cc3e18e52232e780bf2c0d52e08a81871d5c388 --- /dev/null +++ b/unet/svdq-int4-flux.1-fill-dev/example.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dbcf85df1d557c3f67772c29d1d2baab1028f54e0e00ef0cbd9bd1170b22d83 +size 1360347 diff --git a/unet/svdq-int4-flux.1-fill-dev/mask.png b/unet/svdq-int4-flux.1-fill-dev/mask.png new file mode 100644 index 0000000000000000000000000000000000000000..387a210b725130f308dcb3b16216cfbc3afc14af Binary files /dev/null and b/unet/svdq-int4-flux.1-fill-dev/mask.png differ diff --git a/unet/svdq-int4-flux.1-fill-dev/unquantized_layers.safetensors b/unet/svdq-int4-flux.1-fill-dev/unquantized_layers.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cef5dee2542d0ad3513a8d17f5e27e5367bbe3c8 --- /dev/null +++ b/unet/svdq-int4-flux.1-fill-dev/unquantized_layers.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adb0791e9f0c79286bb9f3927f9228d69d4998b3a0d5b8381efb70fc6bf84f56 +size 130218224 diff --git a/upscale_models/4xNomos8kSCHAT-L.pth b/upscale_models/4xNomos8kSCHAT-L.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf2865a65cf56bdf4d372949839f9cc45540d2de --- /dev/null +++ b/upscale_models/4xNomos8kSCHAT-L.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e7726ba191fdf05b87ea8585d78164b6e96e2ee04fdbb3285c3efa37db4b5b0 +size 331564661 diff --git a/upscale_models/RealESRGAN_x2.pth b/upscale_models/RealESRGAN_x2.pth new file mode 100644 index 0000000000000000000000000000000000000000..313b87ab9359a04b0f450695b1a01a88edd4ac95 --- /dev/null +++ b/upscale_models/RealESRGAN_x2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c830d067d54fc767b9543a8432f36d91bc2de313584e8bbfe4ac26a47339e899 +size 67061725 diff --git a/vae/sdxl/animagine_vae.safetensors b/vae/sdxl/animagine_vae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c5bfd44fb0132b3cbd9d122244f502a7cd2bb24 --- /dev/null +++ b/vae/sdxl/animagine_vae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63aeecb90ff7bc1c115395962d3e803571385b61938377bc7089b36e81e92e2e +size 334641164 diff --git a/vae/wan/Wan2_1_VAE_bf16.safetensors b/vae/wan/Wan2_1_VAE_bf16.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae11fc7308cd02af140c441d24dcdda7f8687b34 --- /dev/null +++ b/vae/wan/Wan2_1_VAE_bf16.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ab9a32cc2c740f6e39d80d367ce5dcc28db8c71b79b28670546b8973e9d75f9 +size 253806278