Spaces:
Sleeping
Sleeping
| import torch | |
| from torch import nn | |
| from transformers import AutoImageProcessor, AutoModel | |
| import gradio as gr | |
| import numpy as np | |
| from PIL import Image | |
| # Nome do modelo no Hugging Face Hub | |
| MODEL_NAME = "facebook/dinov2-small" | |
| # Carregando processador e modelo | |
| processor = AutoImageProcessor.from_pretrained(MODEL_NAME) | |
| model = AutoModel.from_pretrained(MODEL_NAME) | |
| # Projeção para 512D (caso a saída seja >512, reduzimos) | |
| projection = nn.Linear(model.config.hidden_size, 512) | |
| def get_embedding(image: Image.Image): | |
| # Preprocessamento | |
| inputs = processor(images=image, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| # Usando o CLS token como embedding da imagem | |
| last_hidden_state = outputs.last_hidden_state # (batch, seq_len, hidden) | |
| embedding = last_hidden_state[:, 0] # pegando o [CLS] token | |
| # Projeta para 512D | |
| embedding_512 = projection(embedding) | |
| # Converte para lista Python | |
| return embedding_512.squeeze().tolist() | |
| # Cria API com Gradio (sem interface visual, apenas endpoint) | |
| iface = gr.Interface( | |
| fn=get_embedding, | |
| inputs=gr.Image(type="pil"), | |
| outputs=gr.JSON(), | |
| live=False, | |
| api_name="embed" # endpoint em /embed | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |