Spaces:
Runtime error
Runtime error
import os | |
import sys | |
import jax | |
import streamlit as st | |
import transformers | |
from huggingface_hub import snapshot_download | |
from transformers import AutoTokenizer | |
LOCAL_PATH = snapshot_download("flax-community/clip-spanish") | |
sys.path.append(LOCAL_PATH) | |
from modeling_hybrid_clip import FlaxHybridCLIP | |
from test_on_image import prepare_image, prepare_text | |
def save_file_to_disk(uplaoded_file): | |
temp_file = os.path.join("/tmp", uplaoded_file.name) | |
with open(temp_file, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
return temp_file | |
def load_tokenizer_and_model(): | |
# load the saved model | |
tokenizer = AutoTokenizer.from_pretrained("bertin-project/bertin-roberta-base-spanish") | |
model = FlaxHybridCLIP.from_pretrained(LOCAL_PATH) | |
return tokenizer, model | |
def run_inference(image_path, text, model, tokenizer): | |
pixel_values = prepare_image(image_path, model) | |
input_text = prepare_text(text, tokenizer) | |
model_output = model( | |
input_text["input_ids"], | |
pixel_values, | |
attention_mask=input_text["attention_mask"], | |
train=False, | |
return_dict=True, | |
) | |
logits = model_output["logits_per_image"] | |
score = jax.nn.sigmoid(logits)[0][0] | |
return score | |
tokenizer, model = load_tokenizer_and_model() | |
st.title("Caption Scoring") | |
uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg"]) | |
text_input = st.text_input("Type a caption") | |
if uploaded_file is not None and text_input: | |
local_image_path = None | |
try: | |
local_image_path = save_file_to_disk(uploaded_file) | |
score = run_inference(local_image_path, text_input, model, tokenizer).tolist() | |
st.image( | |
uploaded_file, | |
caption=text_input, | |
width=None, | |
use_column_width=None, | |
clamp=False, | |
channels="RGB", | |
output_format="auto", | |
) | |
st.write(f"## Score: {score:.2f}") | |
finally: | |
if local_image_path: | |
os.remove(local_image_path) | |