Spaces:

vivien
/

clip

Running

App Files Files Community

Vivien commited on Jan 30, 2022

Commit

5185219

1 Parent(s): dab4ce7

Add possibility to compose queries and use images as queries

Browse files

Files changed (2) hide show

app.py +75 -27
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import streamlit as st
 import pandas as pd, numpy as np
-from html import escape
-import os
 from transformers import CLIPProcessor, CLIPModel
 @st.cache(
@@ -19,47 +20,72 @@ def load():
     df = {0: pd.read_csv("data.csv"), 1: pd.read_csv("data2.csv")}
     embeddings = {0: np.load("embeddings.npy"), 1: np.load("embeddings2.npy")}
     for k in [0, 1]:
-        embeddings[k] = np.divide(
-            embeddings[k], np.sqrt(np.sum(embeddings[k] ** 2, axis=1, keepdims=True))
         )
     return model, processor, df, embeddings
 model, processor, df, embeddings = load()
 source = {0: "\nSource: Unsplash", 1: "\nSource: The Movie Database (TMDB)"}
-def get_html(url_list, height=200):
-    html = "<div style='margin-top: 20px; max-width: 1200px; display: flex; flex-wrap: wrap; justify-content: space-evenly'>"
-    for url, title, link in url_list:
-        html2 = f"<img title='{escape(title)}' style='height: {height}px; margin: 5px' src='{escape(url)}'>"
-        if len(link) > 0:
-            html2 = f"<a href='{escape(link)}' target='_blank'>" + html2 + "</a>"
-        html = html + html2
-    html += "</div>"
-    return html
 def compute_text_embeddings(list_of_strings):
     inputs = processor(text=list_of_strings, return_tensors="pt", padding=True)
-    return model.get_text_features(**inputs)
-st.cache(show_spinner=False)
 def image_search(query, corpus, n_results=24):
-    text_embeddings = compute_text_embeddings([query]).detach().numpy()
     k = 0 if corpus == "Unsplash" else 1
-    results = np.argsort((embeddings[k] @ text_embeddings.T)[:, 0])[
-        -1 : -n_results - 1 : -1
-    ]
     return [
         (
             df[k].iloc[i]["path"],
             df[k].iloc[i]["tooltip"] + source[k],
-            df[k].iloc[i]["link"],
         )
         for i in results
     ]
@@ -112,11 +138,33 @@ def main():
     )
     st.sidebar.markdown(description)
     _, c, _ = st.columns((1, 3, 1))
-    query = c.text_input("", value="clouds at sunset")
     corpus = st.radio("", ["Unsplash", "Movies"])
     if len(query) > 0:
         results = image_search(query, corpus)
-        st.markdown(get_html(results), unsafe_allow_html=True)
 if __name__ == "__main__":

+from html import escape
+import re
 import streamlit as st
 import pandas as pd, numpy as np
 from transformers import CLIPProcessor, CLIPModel
+from st_clickable_images import clickable_images
 @st.cache(
     df = {0: pd.read_csv("data.csv"), 1: pd.read_csv("data2.csv")}
     embeddings = {0: np.load("embeddings.npy"), 1: np.load("embeddings2.npy")}
     for k in [0, 1]:
+        embeddings[k] = embeddings[k] - np.mean(embeddings[k], axis=0)
+        embeddings[k] = embeddings[k] / np.linalg.norm(
+            embeddings[k], axis=1, keepdims=True
         )
     return model, processor, df, embeddings
 model, processor, df, embeddings = load()
 source = {0: "\nSource: Unsplash", 1: "\nSource: The Movie Database (TMDB)"}
 def compute_text_embeddings(list_of_strings):
     inputs = processor(text=list_of_strings, return_tensors="pt", padding=True)
+    result = model.get_text_features(**inputs).detach().numpy()
+    return result / np.linalg.norm(result, axis=1, keepdims=True)
 def image_search(query, corpus, n_results=24):
+    positive_embeddings = None
+    def concatenate_embeddings(e1, e2):
+        if e1 is None:
+            return e2
+        else:
+            return np.concatenate((e1, e2), axis=0)
+    splitted_query = query.split("/")
+    positive_queries = splitted_query[0].split(";")
+    for positive_query in positive_queries:
+        match = re.match(r"\[(Movies|Unsplash):(\d{1,5})\](.*)", positive_query)
+        if match:
+            corpus2, idx, remainder = match.groups()
+            idx, remainder = int(idx), remainder.strip()
+            k = 0 if corpus2 == "Unsplash" else 1
+            positive_embeddings = concatenate_embeddings(
+                positive_embeddings, embeddings[k][idx : idx + 1, :]
+            )
+            if len(remainder) > 0:
+                positive_embeddings = concatenate_embeddings(
+                    positive_embeddings, compute_text_embeddings([remainder])
+                )
+        else:
+            positive_embeddings = concatenate_embeddings(
+                positive_embeddings, compute_text_embeddings([positive_query])
+            )
     k = 0 if corpus == "Unsplash" else 1
+    dot_product = embeddings[k] @ positive_embeddings.T
+    dot_product = dot_product - np.mean(dot_product, axis=0)
+    dot_product = dot_product / np.linalg.norm(dot_product, axis=0)
+    dot_product = np.min(dot_product, axis=1)
+    if len(splitted_query) > 1:
+        negative_queries = (" ".join(splitted_query[1:])).split(";")
+        negative_embeddings = compute_text_embeddings(negative_queries)
+        dot_product2 = embeddings[k] @ negative_embeddings.T
+        dot_product2 = dot_product2 - np.mean(dot_product2, axis=0)
+        dot_product2 = dot_product2 / np.linalg.norm(dot_product2, axis=0)
+        dot_product -= np.max(dot_product2, axis=1)
+    results = np.argsort(dot_product)[-1 : -n_results - 1 : -1]
     return [
         (
             df[k].iloc[i]["path"],
             df[k].iloc[i]["tooltip"] + source[k],
+            i,
         )
         for i in results
     ]
     )
     st.sidebar.markdown(description)
     _, c, _ = st.columns((1, 3, 1))
+    if "query" in st.session_state:
+        query = c.text_input("", value=st.session_state["query"])
+    else:
+        query = c.text_input("", value="clouds at sunset")
     corpus = st.radio("", ["Unsplash", "Movies"])
     if len(query) > 0:
         results = image_search(query, corpus)
+        clicked = clickable_images(
+            [result[0] for result in results],
+            titles=[result[1] for result in results],
+            div_style={
+                "display": "flex",
+                "justify-content": "center",
+                "flex-wrap": "wrap",
+            },
+            img_style={"margin": "2px", "height": "200px"},
+        )
+        if clicked >= 0:
+            change_query = False
+            if "last_clicked" not in st.session_state:
+                change_query = True
+            else:
+                if clicked != st.session_state["last_clicked"]:
+                    change_query = True
+            if change_query:
+                st.session_state["query"] = f"[{corpus}:{results[clicked][2]}]"
+                st.experimental_rerun()
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ torch
 transformers
 numpy
 pandas

 transformers
 numpy
 pandas
+st-clickable-images