# sam2_model_stub.py import torch import torch.nn as nn import torch.nn.functional as F class SAM2Hierarchical(nn.Module): def __init__(self, num_classes=6, in_channels=3, backbone="vit_b", freeze_backbone=True, use_cls_head=True): super().__init__() self.use_cls_head = use_cls_head # Minimal vision backbone stub (fake transformer or CNN) self.backbone = nn.Sequential( nn.Conv2d(in_channels, 64, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True), nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True) ) # Segmentation head stub self.segmentation_head = nn.Sequential( nn.Conv2d(128, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(64, num_classes, kernel_size=1) ) # Optional classification head if self.use_cls_head: self.cls_head = nn.Linear(128, num_classes) if freeze_backbone: for param in self.backbone.parameters(): param.requires_grad = False def forward(self, x): features = self.backbone(x) logits = self.segmentation_head(features) if self.use_cls_head: # Just return segmentation output; inference only cares about logits return logits return logits