hysts HF Staff commited on
Commit
3d33f91
·
1 Parent(s): 97a2fc2
Files changed (3) hide show
  1. README.md +6 -0
  2. SOLOv2.pth +3 -0
  3. SOLOv2.yaml +493 -0
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Yet-Another-Anime-Segmenter
2
+
3
+ - Repo: https://github.com/zymk9/Yet-Another-Anime-Segmenter
4
+ - https://drive.google.com/file/d/1-wFdQ4jwSTeJ7wGD3YKNJdcpSS5Ho8c9/view?usp=sharing
5
+ - https://raw.githubusercontent.com/zymk9/Yet-Another-Anime-Segmenter/main/configs/SOLOv2.yaml
6
+
SOLOv2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:638a3afc4c8415d2b48cd8e3746df80a3fd83725b3c48a881c304aa197d08887
3
+ size 261510761
SOLOv2.yaml ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INPUT:
2
+ FORMAT: BGR
3
+ MAX_SIZE_TEST: 1600
4
+ MIN_SIZE_TEST: 800
5
+ MODEL:
6
+ ANCHOR_GENERATOR:
7
+ ANGLES:
8
+ - - -90
9
+ - 0
10
+ - 90
11
+ ASPECT_RATIOS:
12
+ - - 0.5
13
+ - 1.0
14
+ - 2.0
15
+ NAME: DefaultAnchorGenerator
16
+ OFFSET: 0.0
17
+ SIZES:
18
+ - - 32
19
+ - 64
20
+ - 128
21
+ - 256
22
+ - 512
23
+ BACKBONE:
24
+ ANTI_ALIAS: false
25
+ FREEZE_AT: 2
26
+ NAME: build_resnet_fpn_backbone
27
+ BASIS_MODULE:
28
+ ANN_SET: coco
29
+ COMMON_STRIDE: 8
30
+ CONVS_DIM: 128
31
+ IN_FEATURES:
32
+ - p3
33
+ - p4
34
+ - p5
35
+ LOSS_ON: false
36
+ LOSS_WEIGHT: 0.3
37
+ NAME: ProtoNet
38
+ NORM: SyncBN
39
+ NUM_BASES: 4
40
+ NUM_CLASSES: 1
41
+ NUM_CONVS: 3
42
+ BATEXT:
43
+ CANONICAL_SIZE: 96
44
+ CONV_DIM: 256
45
+ IN_FEATURES:
46
+ - p2
47
+ - p3
48
+ - p4
49
+ NUM_CHARS: 25
50
+ NUM_CONV: 2
51
+ POOLER_RESOLUTION:
52
+ - 8
53
+ - 32
54
+ POOLER_SCALES:
55
+ - 0.25
56
+ - 0.125
57
+ - 0.0625
58
+ RECOGNITION_LOSS: ctc
59
+ RECOGNIZER: attn
60
+ SAMPLING_RATIO: 1
61
+ VOC_SIZE: 96
62
+ BLENDMASK:
63
+ ATTN_SIZE: 14
64
+ BOTTOM_RESOLUTION: 56
65
+ INSTANCE_LOSS_WEIGHT: 1.0
66
+ POOLER_SAMPLING_RATIO: 1
67
+ POOLER_SCALES:
68
+ - 0.25
69
+ POOLER_TYPE: ROIAlignV2
70
+ TOP_INTERP: bilinear
71
+ VISUALIZE: false
72
+ BiFPN:
73
+ IN_FEATURES:
74
+ - res2
75
+ - res3
76
+ - res4
77
+ - res5
78
+ NORM: ''
79
+ NUM_REPEATS: 6
80
+ OUT_CHANNELS: 160
81
+ CONDINST:
82
+ MASK_BRANCH:
83
+ CHANNELS: 128
84
+ IN_FEATURES:
85
+ - p3
86
+ - p4
87
+ - p5
88
+ NORM: BN
89
+ NUM_CONVS: 4
90
+ OUT_CHANNELS: 8
91
+ SEMANTIC_LOSS_ON: false
92
+ MASK_HEAD:
93
+ CHANNELS: 8
94
+ DISABLE_REL_COORDS: false
95
+ NUM_LAYERS: 3
96
+ USE_FP16: false
97
+ MASK_OUT_STRIDE: 4
98
+ MAX_PROPOSALS: -1
99
+ DEVICE: cuda
100
+ DLA:
101
+ CONV_BODY: DLA34
102
+ NORM: FrozenBN
103
+ OUT_FEATURES:
104
+ - stage2
105
+ - stage3
106
+ - stage4
107
+ - stage5
108
+ FCOS:
109
+ CENTER_SAMPLE: true
110
+ FPN_STRIDES:
111
+ - 8
112
+ - 16
113
+ - 32
114
+ - 64
115
+ - 128
116
+ INFERENCE_TH_TEST: 0.05
117
+ INFERENCE_TH_TRAIN: 0.05
118
+ IN_FEATURES:
119
+ - p3
120
+ - p4
121
+ - p5
122
+ - p6
123
+ - p7
124
+ LOC_LOSS_TYPE: giou
125
+ LOSS_ALPHA: 0.25
126
+ LOSS_GAMMA: 2.0
127
+ NMS_TH: 0.6
128
+ NORM: GN
129
+ NUM_BOX_CONVS: 4
130
+ NUM_CLASSES: 1
131
+ NUM_CLS_CONVS: 4
132
+ NUM_SHARE_CONVS: 0
133
+ POST_NMS_TOPK_TEST: 100
134
+ POST_NMS_TOPK_TRAIN: 100
135
+ POS_RADIUS: 1.5
136
+ PRE_NMS_TOPK_TEST: 1000
137
+ PRE_NMS_TOPK_TRAIN: 1000
138
+ PRIOR_PROB: 0.01
139
+ SIZES_OF_INTEREST:
140
+ - 64
141
+ - 128
142
+ - 256
143
+ - 512
144
+ THRESH_WITH_CTR: false
145
+ TOP_LEVELS: 2
146
+ USE_DEFORMABLE: false
147
+ USE_RELU: true
148
+ USE_SCALE: true
149
+ YIELD_PROPOSAL: false
150
+ FPN:
151
+ FUSE_TYPE: sum
152
+ IN_FEATURES:
153
+ - res2
154
+ - res3
155
+ - res4
156
+ - res5
157
+ NORM: ''
158
+ OUT_CHANNELS: 256
159
+ KEYPOINT_ON: false
160
+ LOAD_PROPOSALS: false
161
+ MASK_ON: true
162
+ MEInst:
163
+ AGNOSTIC: true
164
+ CENTER_SAMPLE: true
165
+ DIM_MASK: 60
166
+ FLAG_PARAMETERS: false
167
+ FPN_STRIDES:
168
+ - 8
169
+ - 16
170
+ - 32
171
+ - 64
172
+ - 128
173
+ GCN_KERNEL_SIZE: 9
174
+ INFERENCE_TH_TEST: 0.05
175
+ INFERENCE_TH_TRAIN: 0.05
176
+ IN_FEATURES:
177
+ - p3
178
+ - p4
179
+ - p5
180
+ - p6
181
+ - p7
182
+ IOU_LABELS:
183
+ - 0
184
+ - 1
185
+ IOU_THRESHOLDS:
186
+ - 0.5
187
+ LAST_DEFORMABLE: false
188
+ LOC_LOSS_TYPE: giou
189
+ LOSS_ALPHA: 0.25
190
+ LOSS_GAMMA: 2.0
191
+ LOSS_ON_MASK: false
192
+ MASK_LOSS_TYPE: mse
193
+ MASK_ON: true
194
+ MASK_SIZE: 28
195
+ NMS_TH: 0.6
196
+ NORM: GN
197
+ NUM_BOX_CONVS: 4
198
+ NUM_CLASSES: 1
199
+ NUM_CLS_CONVS: 4
200
+ NUM_MASK_CONVS: 4
201
+ NUM_SHARE_CONVS: 0
202
+ PATH_COMPONENTS: datasets/coco/components/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60.npz
203
+ POST_NMS_TOPK_TEST: 100
204
+ POST_NMS_TOPK_TRAIN: 100
205
+ POS_RADIUS: 1.5
206
+ PRE_NMS_TOPK_TEST: 1000
207
+ PRE_NMS_TOPK_TRAIN: 1000
208
+ PRIOR_PROB: 0.01
209
+ SIGMOID: true
210
+ SIZES_OF_INTEREST:
211
+ - 64
212
+ - 128
213
+ - 256
214
+ - 512
215
+ THRESH_WITH_CTR: false
216
+ TOP_LEVELS: 2
217
+ TYPE_DEFORMABLE: DCNv1
218
+ USE_DEFORMABLE: false
219
+ USE_GCN_IN_MASK: false
220
+ USE_RELU: true
221
+ USE_SCALE: true
222
+ WHITEN: true
223
+ META_ARCHITECTURE: SOLOv2
224
+ MOBILENET: false
225
+ PANOPTIC_FPN:
226
+ COMBINE:
227
+ ENABLED: true
228
+ INSTANCES_CONFIDENCE_THRESH: 0.5
229
+ OVERLAP_THRESH: 0.5
230
+ STUFF_AREA_LIMIT: 4096
231
+ INSTANCE_LOSS_WEIGHT: 1.0
232
+ PIXEL_MEAN:
233
+ - 103.53
234
+ - 116.28
235
+ - 123.675
236
+ PIXEL_STD:
237
+ - 1.0
238
+ - 1.0
239
+ - 1.0
240
+ PROPOSAL_GENERATOR:
241
+ MIN_SIZE: 0
242
+ NAME: RPN
243
+ RESNETS:
244
+ DEFORM_INTERVAL: 1
245
+ DEFORM_MODULATED: false
246
+ DEFORM_NUM_GROUPS: 1
247
+ DEFORM_ON_PER_STAGE:
248
+ - false
249
+ - false
250
+ - false
251
+ - false
252
+ DEPTH: 101
253
+ NORM: FrozenBN
254
+ NUM_GROUPS: 1
255
+ OUT_FEATURES:
256
+ - res2
257
+ - res3
258
+ - res4
259
+ - res5
260
+ RES2_OUT_CHANNELS: 256
261
+ RES5_DILATION: 1
262
+ STEM_OUT_CHANNELS: 64
263
+ STRIDE_IN_1X1: true
264
+ WIDTH_PER_GROUP: 64
265
+ RETINANET:
266
+ BBOX_REG_LOSS_TYPE: smooth_l1
267
+ BBOX_REG_WEIGHTS:
268
+ - 1.0
269
+ - 1.0
270
+ - 1.0
271
+ - 1.0
272
+ FOCAL_LOSS_ALPHA: 0.25
273
+ FOCAL_LOSS_GAMMA: 2.0
274
+ IN_FEATURES:
275
+ - p3
276
+ - p4
277
+ - p5
278
+ - p6
279
+ - p7
280
+ IOU_LABELS:
281
+ - 0
282
+ - -1
283
+ - 1
284
+ IOU_THRESHOLDS:
285
+ - 0.4
286
+ - 0.5
287
+ NMS_THRESH_TEST: 0.5
288
+ NORM: ''
289
+ NUM_CLASSES: 1
290
+ NUM_CONVS: 4
291
+ PRIOR_PROB: 0.01
292
+ SCORE_THRESH_TEST: 0.05
293
+ SMOOTH_L1_LOSS_BETA: 0.1
294
+ TOPK_CANDIDATES_TEST: 1000
295
+ ROI_BOX_CASCADE_HEAD:
296
+ BBOX_REG_WEIGHTS:
297
+ - - 10.0
298
+ - 10.0
299
+ - 5.0
300
+ - 5.0
301
+ - - 20.0
302
+ - 20.0
303
+ - 10.0
304
+ - 10.0
305
+ - - 30.0
306
+ - 30.0
307
+ - 15.0
308
+ - 15.0
309
+ IOUS:
310
+ - 0.5
311
+ - 0.6
312
+ - 0.7
313
+ ROI_BOX_HEAD:
314
+ BBOX_REG_LOSS_TYPE: smooth_l1
315
+ BBOX_REG_LOSS_WEIGHT: 1.0
316
+ BBOX_REG_WEIGHTS:
317
+ - 10.0
318
+ - 10.0
319
+ - 5.0
320
+ - 5.0
321
+ CLS_AGNOSTIC_BBOX_REG: false
322
+ CONV_DIM: 256
323
+ FC_DIM: 1024
324
+ NAME: ''
325
+ NORM: ''
326
+ NUM_CONV: 0
327
+ NUM_FC: 0
328
+ POOLER_RESOLUTION: 14
329
+ POOLER_SAMPLING_RATIO: 0
330
+ POOLER_TYPE: ROIAlignV2
331
+ SMOOTH_L1_BETA: 0.0
332
+ TRAIN_ON_PRED_BOXES: false
333
+ ROI_HEADS:
334
+ BATCH_SIZE_PER_IMAGE: 512
335
+ IN_FEATURES:
336
+ - res4
337
+ IOU_LABELS:
338
+ - 0
339
+ - 1
340
+ IOU_THRESHOLDS:
341
+ - 0.5
342
+ NAME: Res5ROIHeads
343
+ NMS_THRESH_TEST: 0.5
344
+ NUM_CLASSES: 1
345
+ POSITIVE_FRACTION: 0.25
346
+ PROPOSAL_APPEND_GT: true
347
+ SCORE_THRESH_TEST: 0.05
348
+ ROI_KEYPOINT_HEAD:
349
+ CONV_DIMS:
350
+ - 512
351
+ - 512
352
+ - 512
353
+ - 512
354
+ - 512
355
+ - 512
356
+ - 512
357
+ - 512
358
+ LOSS_WEIGHT: 1.0
359
+ MIN_KEYPOINTS_PER_IMAGE: 1
360
+ NAME: KRCNNConvDeconvUpsampleHead
361
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
362
+ NUM_KEYPOINTS: 17
363
+ POOLER_RESOLUTION: 14
364
+ POOLER_SAMPLING_RATIO: 0
365
+ POOLER_TYPE: ROIAlignV2
366
+ ROI_MASK_HEAD:
367
+ CLS_AGNOSTIC_MASK: false
368
+ CONV_DIM: 256
369
+ NAME: MaskRCNNConvUpsampleHead
370
+ NORM: ''
371
+ NUM_CONV: 0
372
+ POOLER_RESOLUTION: 14
373
+ POOLER_SAMPLING_RATIO: 0
374
+ POOLER_TYPE: ROIAlignV2
375
+ RPN:
376
+ BATCH_SIZE_PER_IMAGE: 256
377
+ BBOX_REG_LOSS_TYPE: smooth_l1
378
+ BBOX_REG_LOSS_WEIGHT: 1.0
379
+ BBOX_REG_WEIGHTS:
380
+ - 1.0
381
+ - 1.0
382
+ - 1.0
383
+ - 1.0
384
+ BOUNDARY_THRESH: -1
385
+ HEAD_NAME: StandardRPNHead
386
+ IN_FEATURES:
387
+ - res4
388
+ IOU_LABELS:
389
+ - 0
390
+ - -1
391
+ - 1
392
+ IOU_THRESHOLDS:
393
+ - 0.3
394
+ - 0.7
395
+ LOSS_WEIGHT: 1.0
396
+ NMS_THRESH: 0.7
397
+ POSITIVE_FRACTION: 0.5
398
+ POST_NMS_TOPK_TEST: 1000
399
+ POST_NMS_TOPK_TRAIN: 2000
400
+ PRE_NMS_TOPK_TEST: 6000
401
+ PRE_NMS_TOPK_TRAIN: 12000
402
+ SMOOTH_L1_BETA: 0.0
403
+ SEM_SEG_HEAD:
404
+ COMMON_STRIDE: 4
405
+ CONVS_DIM: 128
406
+ IGNORE_VALUE: 255
407
+ IN_FEATURES:
408
+ - p2
409
+ - p3
410
+ - p4
411
+ - p5
412
+ LOSS_WEIGHT: 1.0
413
+ NAME: SemSegFPNHead
414
+ NORM: GN
415
+ NUM_CLASSES: 1
416
+ SOLOV2:
417
+ FPN_INSTANCE_STRIDES:
418
+ - 8
419
+ - 8
420
+ - 16
421
+ - 32
422
+ - 32
423
+ FPN_SCALE_RANGES:
424
+ - - 1
425
+ - 96
426
+ - - 48
427
+ - 192
428
+ - - 96
429
+ - 384
430
+ - - 192
431
+ - 768
432
+ - - 384
433
+ - 2048
434
+ INSTANCE_CHANNELS: 512
435
+ INSTANCE_IN_CHANNELS: 256
436
+ INSTANCE_IN_FEATURES:
437
+ - p2
438
+ - p3
439
+ - p4
440
+ - p5
441
+ - p6
442
+ LOSS:
443
+ DICE_WEIGHT: 3.0
444
+ FOCAL_ALPHA: 0.25
445
+ FOCAL_GAMMA: 2.0
446
+ FOCAL_USE_SIGMOID: true
447
+ FOCAL_WEIGHT: 1.0
448
+ MASK_CHANNELS: 128
449
+ MASK_IN_CHANNELS: 256
450
+ MASK_IN_FEATURES:
451
+ - p2
452
+ - p3
453
+ - p4
454
+ - p5
455
+ MASK_THR: 0.5
456
+ MAX_PER_IMG: 100
457
+ NMS_KERNEL: gaussian
458
+ NMS_PRE: 500
459
+ NMS_SIGMA: 2
460
+ NMS_TYPE: mask
461
+ NORM: GN
462
+ NUM_CLASSES: 1
463
+ NUM_GRIDS:
464
+ - 40
465
+ - 36
466
+ - 24
467
+ - 16
468
+ - 12
469
+ NUM_INSTANCE_CONVS: 4
470
+ NUM_KERNELS: 256
471
+ NUM_MASKS: 256
472
+ PRIOR_PROB: 0.01
473
+ SCORE_THR: 0.1
474
+ SIGMA: 0.2
475
+ TYPE_DCN: DCN
476
+ UPDATE_THR: 0.05
477
+ USE_COORD_CONV: true
478
+ USE_DCN_IN_INSTANCE: false
479
+ TOP_MODULE:
480
+ DIM: 16
481
+ NAME: conv
482
+ VOVNET:
483
+ BACKBONE_OUT_CHANNELS: 256
484
+ CONV_BODY: V-39-eSE
485
+ NORM: FrozenBN
486
+ OUT_CHANNELS: 256
487
+ OUT_FEATURES:
488
+ - stage2
489
+ - stage3
490
+ - stage4
491
+ - stage5
492
+ WEIGHTS: SOLOv2.pth
493
+ VERSION: 2