🥅ControlNet2Anything🥅
Collection
A collection of GANs using control nets as a translation medium. Training and Data at https://github.com/Leoleojames1/CycleGANControlNet2Anything
•
6 items
•
Updated
This model transforms depth maps into robot-style images, and also transforms robot-style images into estimated depth maps using CycleGAN architecture.
# Clone the repository
git clone https://huggingface.co/Borcherding/CycleGAN_Depth2RobotsV2_Blend
cd cycleGAN_Depth2RobotsV2
# Install dependencies
pip install torch torchvision gradio pyvirtualcam
Run the simple test interface to quickly try out the model:
python cycleGANtest.py
This launches a Gradio interface where you can:
For a more advanced setup that includes real-time webcam processing with Depth Anything V2:
# Set the path to Depth Anything V2
export DEPTH_ANYTHING_V2_PATH=/path/to/depth-anything-v2
# Run the integrated application
python discordDepth2AnythingGAN.py
This launches a Gradio interface that allows you to:
import torch
import numpy as np
import torchvision.transforms as transforms
from PIL import Image
from huggingface_hub import hf_hub_download
# Define the Generator architecture (as shown in the provided code)
class ResidualBlock(nn.Module):
def __init__(self, channels):
super(ResidualBlock, self).__init__()
self.conv_block = nn.Sequential(
nn.ReflectionPad2d(1),
nn.Conv2d(channels, channels, 3),
nn.InstanceNorm2d(channels),
nn.ReLU(inplace=True),
nn.ReflectionPad2d(1),
nn.Conv2d(channels, channels, 3),
nn.InstanceNorm2d(channels)
)
def forward(self, x):
return x + self.conv_block(x)
class Generator(nn.Module):
def __init__(self, input_channels=3, output_channels=3, n_residual_blocks=9):
super(Generator, self).__init__()
# Initial convolution
model = [
nn.ReflectionPad2d(3),
nn.Conv2d(input_channels, 64, 7),
nn.InstanceNorm2d(64),
nn.ReLU(inplace=True)
]
# Downsampling
in_features = 64
out_features = in_features * 2
for _ in range(2):
model += [
nn.Conv2d(in_features, out_features, 3, stride=2, padding=1),
nn.InstanceNorm2d(out_features),
nn.ReLU(inplace=True)
]
in_features = out_features
out_features = in_features * 2
# Residual blocks
for _ in range(n_residual_blocks):
model += [ResidualBlock(in_features)]
# Upsampling
out_features = in_features // 2
for _ in range(2):
model += [
nn.ConvTranspose2d(in_features, out_features, 3, stride=2, padding=1, output_padding=1),
nn.InstanceNorm2d(out_features),
nn.ReLU(inplace=True)
]
in_features = out_features
out_features = in_features // 2
# Output layer
model += [
nn.ReflectionPad2d(3),
nn.Conv2d(64, output_channels, 7),
nn.Tanh()
]
self.model = nn.Sequential(*model)
def forward(self, x):
return self.model(x)
# Download the model
def download_model(direction="depth2image"):
if direction == "depth2image":
filename = "latest_net_G_A.pth"
else: # "image2depth"
filename = "latest_net_G_B.pth"
model_path = hf_hub_download(
repo_id="Borcherding/CycleGAN_Depth2RobotsV2_Blend",
filename=filename
)
return model_path
# Image preprocessing
def preprocess_image(image):
"""
Preprocess image for model input
Args:
image: PIL Image or numpy array
Returns:
torch.Tensor: Normalized tensor ready for model input
"""
if isinstance(image, np.ndarray):
image = Image.fromarray(image.astype('uint8'), 'RGB')
transform = transforms.Compose([
transforms.Resize(256),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
return transform(image).unsqueeze(0)
# Image postprocessing
def postprocess_image(tensor):
"""
Convert model output tensor to numpy image
Args:
tensor: Model output tensor
Returns:
numpy.ndarray: RGB image array (0-255)
"""
tensor = tensor.squeeze(0).cpu()
tensor = (tensor + 1) / 2
tensor = tensor.clamp(0, 1)
tensor = tensor.permute(1, 2, 0).numpy()
return (tensor * 255).astype(np.uint8)
# Example usage
def transform_image(input_image_path, direction="depth2image"):
"""
Transform an image using the Depth2Robot model
Args:
input_image_path: Path to input image
direction: "depth2image" or "image2depth"
Returns:
numpy.ndarray: Transformed image
"""
# Load model
model_path = download_model(direction)
model = Generator()
model.load_state_dict(torch.load(model_path, map_location='cpu'), strict=False)
model.eval()
# Load and preprocess image
input_image = Image.open(input_image_path).convert('RGB')
input_tensor = preprocess_image(input_image)
# Generate output
with torch.no_grad():
output_tensor = model(input_tensor)
# Postprocess output
output_image = postprocess_image(output_tensor)
return output_image
The model checkpoints are available on Hugging Face:
latest_net_G_A.pth
- Generator for Depth to Robot Image transformationlatest_net_G_B.pth
- Generator for Robot Image to Depth transformationThe integrated application (discordDepth2AnythingGAN.py
) also leverages Depth Anything V2 for real-time depth estimation, providing a complete pipeline:
[Insert your license information here]
Base model
keras-io/CycleGAN