Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- CC_net.pt +3 -0
- ResNet_for_CC.py +93 -0
- app.py +89 -0
- requirements.txt +7 -0
CC_net.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b61ad39bb8f2872cff371265b3ad4ecbf9c5a201d64225f92d6bcc937d9e112b
|
| 3 |
+
size 95648689
|
ResNet_for_CC.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torchvision.models as models
|
| 4 |
+
|
| 5 |
+
class ResClassifier(nn.Module):
|
| 6 |
+
"""
|
| 7 |
+
A classifier with two fully connected layers followed by a final linear layer.
|
| 8 |
+
Uses BatchNorm, ReLU activations, and Dropout for better generalization.
|
| 9 |
+
"""
|
| 10 |
+
def __init__(self, num_classes=14):
|
| 11 |
+
super(ResClassifier, self).__init__()
|
| 12 |
+
|
| 13 |
+
# First fully connected layer: reduces 128D features to 64D
|
| 14 |
+
self.fc1 = nn.Sequential(
|
| 15 |
+
nn.Linear(128, 64),
|
| 16 |
+
nn.BatchNorm1d(64, affine=True),
|
| 17 |
+
nn.ReLU(inplace=True),
|
| 18 |
+
nn.Dropout()
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
# Second fully connected layer: retains 64D features
|
| 22 |
+
self.fc2 = nn.Sequential(
|
| 23 |
+
nn.Linear(64, 64),
|
| 24 |
+
nn.BatchNorm1d(64, affine=True),
|
| 25 |
+
nn.ReLU(inplace=True),
|
| 26 |
+
nn.Dropout()
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Final classification layer mapping 64D features to class logits
|
| 30 |
+
self.fc3 = nn.Linear(64, num_classes)
|
| 31 |
+
|
| 32 |
+
def forward(self, x):
|
| 33 |
+
"""
|
| 34 |
+
Forward pass through the classifier.
|
| 35 |
+
Returns class logits after two hidden layers.
|
| 36 |
+
"""
|
| 37 |
+
x = self.fc1(x) # First FC layer
|
| 38 |
+
x = self.fc2(x) # Second FC layer
|
| 39 |
+
output = self.fc3(x) # Final classification layer
|
| 40 |
+
return output
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class CC_model(nn.Module):
|
| 44 |
+
"""
|
| 45 |
+
Clothing Classification Model based on ResNet50.
|
| 46 |
+
Extracts deep features and uses two independent classifiers for predictions.
|
| 47 |
+
"""
|
| 48 |
+
def __init__(self, num_classes1=14, num_classes2=None):
|
| 49 |
+
super(CC_model, self).__init__()
|
| 50 |
+
|
| 51 |
+
# If num_classes2 is not specified, default to num_classes1
|
| 52 |
+
num_classes2 = num_classes2 if num_classes2 else num_classes1
|
| 53 |
+
assert num_classes1 == num_classes2 # Ensure both classifiers predict the same categories
|
| 54 |
+
|
| 55 |
+
self.num_classes = num_classes1
|
| 56 |
+
|
| 57 |
+
# Load a pretrained ResNet-50 model as the feature extractor
|
| 58 |
+
self.model_resnet = models.resnet50(weights='ResNet50_Weights.DEFAULT')
|
| 59 |
+
|
| 60 |
+
# Remove ResNet's original classification layer to use as a feature extractor
|
| 61 |
+
num_ftrs = self.model_resnet.fc.in_features
|
| 62 |
+
self.model_resnet.fc = nn.Identity() # Identity layer keeps feature dimensions
|
| 63 |
+
|
| 64 |
+
# Additional transformation layer reducing feature size to 128D
|
| 65 |
+
self.dr = nn.Linear(num_ftrs, 128)
|
| 66 |
+
|
| 67 |
+
# Two independent classifiers
|
| 68 |
+
self.fc1 = ResClassifier(num_classes1)
|
| 69 |
+
self.fc2 = ResClassifier(num_classes1)
|
| 70 |
+
|
| 71 |
+
def forward(self, x, detach_feature=False):
|
| 72 |
+
"""
|
| 73 |
+
Forward pass through the model.
|
| 74 |
+
Extracts deep features from ResNet and processes them through classifiers.
|
| 75 |
+
"""
|
| 76 |
+
with torch.no_grad():
|
| 77 |
+
# Extract deep features using ResNet-50 (without its original classification head)
|
| 78 |
+
feature = self.model_resnet(x)
|
| 79 |
+
|
| 80 |
+
# Generate transformed features (128D) using the custom linear layer
|
| 81 |
+
dr_feature = self.dr(feature)
|
| 82 |
+
|
| 83 |
+
if detach_feature:
|
| 84 |
+
dr_feature = dr_feature.detach() # Detach feature for non-trainable forward pass
|
| 85 |
+
|
| 86 |
+
# Pass features through two independent classifiers
|
| 87 |
+
out1 = self.fc1(dr_feature)
|
| 88 |
+
out2 = self.fc2(dr_feature)
|
| 89 |
+
|
| 90 |
+
# Compute the mean prediction from both classifiers
|
| 91 |
+
output_mean = (out1 + out2) / 2
|
| 92 |
+
|
| 93 |
+
return dr_feature, output_mean # Returning feature embeddings and final prediction
|
app.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
+
import torchvision.transforms as transforms
|
| 6 |
+
from PIL import Image
|
| 7 |
+
from ResNet_for_CC import CC_model # Ensure the correct import
|
| 8 |
+
|
| 9 |
+
# β
Detect available device (CPU/GPU)
|
| 10 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 11 |
+
print(f"[INFO] Running on: {device}")
|
| 12 |
+
|
| 13 |
+
# β
Load the trained CC_model
|
| 14 |
+
model_path = "CC_net.pt"
|
| 15 |
+
print(f"[INFO] Loading model from: {model_path}")
|
| 16 |
+
|
| 17 |
+
# Initialize and load model weights
|
| 18 |
+
model = CC_model(num_classes=14)
|
| 19 |
+
try:
|
| 20 |
+
state_dict = torch.load(model_path, map_location=device)
|
| 21 |
+
model.load_state_dict(state_dict, strict=False)
|
| 22 |
+
model.to(device)
|
| 23 |
+
model.eval() # Set to evaluation mode
|
| 24 |
+
print("[β
] Model loaded successfully!")
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"[β ERROR] Failed to load model: {e}")
|
| 27 |
+
|
| 28 |
+
# β
Define class labels for Clothing1M dataset
|
| 29 |
+
class_labels = [
|
| 30 |
+
"T-Shirt", "Shirt", "Knitwear", "Chiffon", "Sweater", "Hoodie",
|
| 31 |
+
"Windbreaker", "Jacket", "Downcoat", "Suit", "Shawl", "Dress",
|
| 32 |
+
"Vest", "Underwear"
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
# β
Image Preprocessing Pipeline
|
| 36 |
+
transform = transforms.Compose([
|
| 37 |
+
transforms.Resize((224, 224)), # Resize to fixed dimensions
|
| 38 |
+
transforms.ToTensor(),
|
| 39 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
| 40 |
+
])
|
| 41 |
+
|
| 42 |
+
# β
Inference Function
|
| 43 |
+
def classify_image(image):
|
| 44 |
+
"""
|
| 45 |
+
Processes the input image and returns the predicted clothing category.
|
| 46 |
+
"""
|
| 47 |
+
try:
|
| 48 |
+
print("\n[DEBUG] Processing input image...")
|
| 49 |
+
|
| 50 |
+
# Convert image to tensor and move to device
|
| 51 |
+
image = transform(image).unsqueeze(0).to(device)
|
| 52 |
+
|
| 53 |
+
# Forward pass through the model
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
logits = model(image)
|
| 56 |
+
|
| 57 |
+
# Validate model output shape
|
| 58 |
+
if logits.shape[1] != len(class_labels):
|
| 59 |
+
return f"[β ERROR] Model output mismatch! Expected {len(class_labels)}, but got {logits.shape[1]}."
|
| 60 |
+
|
| 61 |
+
# Convert logits to probabilities
|
| 62 |
+
probabilities = F.softmax(logits, dim=1)[0]
|
| 63 |
+
predicted_index = torch.argmax(probabilities).item()
|
| 64 |
+
confidence = probabilities[predicted_index].item() * 100
|
| 65 |
+
|
| 66 |
+
# β
Return formatted prediction
|
| 67 |
+
predicted_label = class_labels[predicted_index]
|
| 68 |
+
print(f"[INFO] Prediction: {predicted_label} (Confidence: {confidence:.2f}%)")
|
| 69 |
+
|
| 70 |
+
# Return label and confidence
|
| 71 |
+
return f"Predicted Class: {predicted_label} (Confidence: {confidence:.2f}%)"
|
| 72 |
+
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"[β ERROR] Exception during classification: {e}")
|
| 75 |
+
return "[ERROR] Failed to process image. Please check logs."
|
| 76 |
+
|
| 77 |
+
# β
Create Gradio Interface
|
| 78 |
+
interface = gr.Interface(
|
| 79 |
+
fn=classify_image,
|
| 80 |
+
inputs=gr.Image(type="pil"),
|
| 81 |
+
outputs="text",
|
| 82 |
+
title="π Clothing1M Classifier",
|
| 83 |
+
description="Upload a clothing image, and the AI model will classify it into one of 14 categories."
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
# β
Run the Gradio Interface
|
| 87 |
+
if __name__ == "__main__":
|
| 88 |
+
print("[INFO] Launching Gradio interface...")
|
| 89 |
+
interface.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
clip==0.2.0
|
| 2 |
+
numpy==1.23.4
|
| 3 |
+
openai_clip==1.0.1
|
| 4 |
+
Pillow==9.4.0
|
| 5 |
+
torch==2.6.0
|
| 6 |
+
torchvision==0.21.0
|
| 7 |
+
tqdm==4.64.1
|