import gradio as gr
import numpy as np
import PIL
import trimesh
from transformers_js import import_transformers_js, as_url
transformers = await import_transformers_js()
pipeline = transformers.pipeline
depth_estimator = await pipeline('depth-estimation', 'Xenova/depth-anything-small-hf');
def depthmap_to_glb_trimesh(depth_map, rgb_image, file_path):
assert depth_map.shape[:2] == rgb_image.shape[:2], "Depth map and RGB image must have the same dimensions"
# Generate vertices and faces
vertices = []
colors = []
faces = []
height, width = depth_map.shape
for y in range(height):
for x in range(width):
z = depth_map[y, x]
vertices.append([x, y, z])
colors.append(rgb_image[y, x])
# Create faces (2 triangles per pixel, except for edges)
for y in range(height - 1):
for x in range(width - 1):
top_left = y * width + x
top_right = top_left + 1
bottom_left = top_left + width
bottom_right = bottom_left + 1
faces.append([top_left, bottom_left, top_right])
faces.append([top_right, bottom_left, bottom_right])
# Convert to numpy arrays
vertices = np.array(vertices, dtype=np.float64)
colors = np.array(colors, dtype=np.uint8)
faces = np.array(faces, dtype=np.int32)
mesh = trimesh.Trimesh(vertices=vertices, faces=faces, vertex_colors=colors, process=False)
# Export to GLB
mesh.export(file_path, file_type='glb')
def invert_depth(depth_map):
max_depth = np.max(depth_map)
return max_depth - depth_map
def invert_xy(map):
return map[::-1, ::-1]
async def estimate(image_path, depth_scale):
image = PIL.Image.open(image_path)
image.thumbnail((384, 384)) # Resize the image keeping the aspect ratio
predictions = await depth_estimator(as_url(image_path))
depth_image = predictions["depth"].to_pil()
tensor = predictions["predicted_depth"]
tensor_data = {
"dims": tensor.dims,
"type": tensor.type,
"size": tensor.size,
}
# Construct the 3D model from the depth map and the RGB image
depth = predictions["predicted_depth"].to_numpy()
depth = invert_depth(depth)
depth = invert_xy(depth)
depth = depth * depth_scale
# The model outputs the depth map in a different size than the input image.
# So we resize the depth map to match the original image size.
depth = np.array(PIL.Image.fromarray(depth).resize(image.size))
image_array = np.asarray(image)
image_array = invert_xy(image_array)
glb_file_path = "output.glb"
depthmap_to_glb_trimesh(depth, image_array, glb_file_path)
return depth_image, glb_file_path, tensor_data
demo = gr.Interface(
fn=estimate,
inputs=[
gr.Image(type="filepath"),
gr.Slider(minimum=1, maximum=100, value=10, label="Depth Scale")
],
outputs=[
gr.Image(label="Depth Image"),
gr.Model3D(label="3D Model"),
gr.JSON(label="Tensor"),
],
examples=[
["bread_small.png"],
["cats.jpg"],
]
)
demo.launch()
transformers_js_py
trimesh