Ctrl+K
- attn_norm=layernorm_teacher_only_affine, attn_projector=mlp_256_l2, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=16, warmup_ratio=0
- attn_norm=layernorm_teacher_only_affine, attn_projector=mlp_256_l3, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=16, warmup_ratio=0
- attn_norm=layernorm_teacher_only_affine, attn_projector=mlp_256_l4, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=16, warmup_ratio=0
- attn_norm=layernorm_teacher_only_affine, attn_projector=mlp_64_l2, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=16, warmup_ratio=0
- attn_norm=layernorm_teacher_only_affine, attn_projector=mlp_64_l3, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=16, warmup_ratio=0
- attn_norm=layernorm_teacher_only_affine, attn_projector=mlp_64_l4, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=16, warmup_ratio=0
- attn_norm=layernorm_teacher_only_affine, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=16, warmup_ratio=0
- attn_weight=0