t-m-6's picture
update:タイトルの変更
6c2dbae verified
import gradio as gr
import joblib
import pickle
import numpy as np
from janome.tokenizer import Tokenizer
model = joblib.load('./doc2vec.pkl')
with open('document_vecs.pkl', 'rb') as f:
document_vecs = pickle.load(f)
with open('Title.pkl', 'rb') as f:
Title = pickle.load(f)
def sep_by_janome(text):
t = Tokenizer()
tokens = t.tokenize(text)
docs=[]
for token in tokens:
docs.append(token.surface)
return docs
def cos_calc(text):
tokens = sep_by_janome(text)
input_vec = model.infer_vector(tokens)
rank_size = 5
v1 = np.linalg.norm(input_vec)
cos_sim = []
for v2 in document_vecs:
cos_sim.append( np.dot(input_vec,v2)/(v1*np.linalg.norm(v2)) )
doc_sort = np.argsort(np.array(cos_sim))[::-1]
cos_sort = sorted(cos_sim,reverse=True)
cos = []
titles = []
for i in range(rank_size):
cos.append(cos_sort[i])
titles.append(Title[doc_sort[i]])
rank_1 = f'{titles[0]} \nコサイン類似度:{cos[0]}'
rank_2 = f'{titles[1]} \nコサイン類似度:{cos[1]}'
rank_3 = f'{titles[2]} \nコサイン類似度:{cos[2]}'
rank_4 = f'{titles[3]} \nコサイン類似度:{cos[3]}'
rank_5 = f'{titles[4]} \nコサイン類似度:{cos[4]}'
return rank_1, rank_2, rank_3, rank_4, rank_5
demo = gr.Interface(fn=cos_calc,
title="プログラミング参考書推薦アプリ",
inputs=gr.Textbox(label="検索ワード"),
outputs=[gr.Textbox(label='1位'),
gr.Textbox(label='2位'),
gr.Textbox(label='3位'),
gr.Textbox(label='4位'),
gr.Textbox(label='5位')
])
demo.launch()