File size: 1,388 Bytes
c49578b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
import torch
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import semantic_search

title_dataset = load_dataset("pyimagesearch/blog-title", data_files="bp-title.csv")

title_embeddings = load_dataset("pyimagesearch/blog-title", data_files="embeddings.csv")
title_embeddings = torch.from_numpy(title_embeddings["train"].to_pandas().to_numpy()).to(torch.float)

model = SentenceTransformer("paraphrase-MiniLM-L6-v2")

title="Title Semantic Search"
description="Provide a blog post title, and we'll find the most similar titles from our already written blog posts."

examples=[
    "Introduction to Keras",
    "Conditional GANs with Keras",
    "A Gentle Introduction to PyTorch with Deep Learning",
]

def get_titles(query):
    query_embed = model.encode(query)
    hits = semantic_search(query_embed, title_embeddings, top_k=5)[0]
    
    titles = list()

    for hit in hits:
        index = hit["corpus_id"]
        selected_title = title_dataset["train"]["title"][index]
        # score = hit["score"]
        titles.append(selected_title)
    return "\n".join(titles)

space = gr.Interface(
    fn=get_titles,
    inputs=gr.Textbox(label="Input Title"),
    outputs=gr.Textbox(label="Similar Titles"),
    title=title,
    description=description,
    examples=examples,
)

space.launch()