Spaces:

Shahabmoin
/

Real-Time-Image-to-Text-Generator

Sleeping

App Files Files Community

Shahabmoin commited on Dec 24, 2024

Commit

de9f3f9

verified ·

1 Parent(s): ac7c0b7

Create app.py

Browse files

Files changed (1) hide show

app.py +56 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import streamlit as st
+from PIL import Image
+import torch
+from transformers import CLIPProcessor, CLIPModel
+import matplotlib.pyplot as plt
+# Load the pre-trained CLIP model and processor
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
+# Function to make predictions from the image
+def predict_image_description(image):
+    # Preprocess the image and generate text inputs
+    inputs = processor(text=["a photo of a cat", "a photo of a dog", "a photo of a car", "a photo of a tree", "a photo of a house"],
+                       images=image,
+                       return_tensors="pt",
+                       padding=True)
+    # Get model predictions
+    outputs = model(**inputs)
+    logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+    probs = logits_per_image.softmax(dim=1)  # Softmax to get probabilities
+    # Return top 3 predictions
+    top_3_probabilities, top_3_indices = torch.topk(probs, 3)
+    labels = ["a cat", "a dog", "a car", "a tree", "a house"]
+    predictions = []
+    for i in range(3):
+        prediction = labels[top_3_indices[0][i]]  # Get the label
+        probability = top_3_probabilities[0][i].item()  # Get probability
+        predictions.append(f"{prediction}: {probability * 100:.2f}%")
+    return predictions
+# Streamlit UI
+st.title("Real-Time Image-to-Text Generator")
+st.markdown("Upload an image, and I will tell you what it is!")
+# Image upload feature
+uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+if uploaded_file is not None:
+    # Open the uploaded image
+    image = Image.open(uploaded_file)
+    # Display the image
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Predict the description
+    predictions = predict_image_description(image)
+    # Display the predictions
+    st.write("Predictions:")
+    for prediction in predictions:
+        st.write(prediction)