|
import streamlit as st |
|
'---' |
|
|
|
st.markdown( |
|
""" |
|
<style> |
|
.stTextInput > div > div > input { |
|
background-color: #d3d3d3; |
|
} |
|
body { |
|
background-color: #f0f0f0; |
|
} |
|
</style> |
|
""", |
|
unsafe_allow_html=True |
|
) |
|
|
|
st.header('Watson Assistant VDF TOBi improvement') |
|
'---' |
|
st.write('The model is trained on the TOBi 🤖 intents in Romanian language.') |
|
|
|
|
|
|
|
import os |
|
import pandas as pd |
|
import re |
|
from time import time |
|
from src.E_Model_utils import load_model, train_model, get_embeddings |
|
from src.E_Faiss_utils import load_embeddings_and_index, normalize_embeddings |
|
from src.A_Preprocess import load_data, clean_text |
|
import warnings |
|
warnings.filterwarnings("ignore", category=FutureWarning) |
|
|
|
|
|
|
|
model_name = st.sidebar.radio("Selectează modelul 👇", ["other","e5_small_fine_tuned_model","multilingual-e5-small","all-MiniLM-L6-v2","all-distilroberta-v1"]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if model_name != "other": |
|
|
|
model_path = f"output/fine-tuned-model" |
|
st.write("Model path:", model_path) |
|
|
|
|
|
if model_name == "multilingual-e5-small": |
|
infloat_model_name = "intfloat/multilingual-e5-small" |
|
model = load_model(infloat_model_name) |
|
elif model_name == "e5_small_fine_tuned_model": |
|
infloat_model_name = "intfloat/multilingual-e5-small" |
|
model = load_model(infloat_model_name) |
|
pass |
|
else: |
|
model = load_model(model_name) |
|
|
|
st.write(f"Modelul selectat: {model_name}") |
|
|
|
st.write("Model loaded successfully!") |
|
|
|
|
|
|
|
|
|
|
|
st.stop() |
|
|
|
uploaded_file = st.file_uploader("Încarcă fișierul cu intenții", type="csv") |
|
|
|
if uploaded_file is not None: |
|
data = pd.read_csv(uploaded_file) |
|
st.write("CSV file successfully uploaded!") |
|
|
|
|
|
st.session_state.data = data |
|
else: |
|
|
|
data = st.session_state.data if 'data' in st.session_state else None |
|
|
|
if data is not None: |
|
|
|
utterances = data['utterance'].tolist() |
|
intents = data['intent'].tolist() |
|
|
|
user_text = st.text_input("Te rog introdu un text.") |
|
|
|
if user_text: |
|
if st.button("Identifică Intenția"): |
|
|
|
start = time() |
|
st.write("Procesare text...") |
|
st.write(start) |
|
cleaned_text = clean_text(user_text) |
|
input_embedding = get_embeddings(model, [cleaned_text]) |
|
normalized_embedding = normalize_embeddings(input_embedding) |
|
|
|
embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index") |
|
D, I = index.search(normalized_embedding, 1) |
|
|
|
print(I) |
|
intents = data['intent'].tolist() |
|
intent = intents[I[0][0]] |
|
distance = D[0][0] |
|
|
|
similarity = 1 / (1 + distance) |
|
|
|
st.write(f"Intenția identificată: {intent}") |
|
st.write(f"Nivel de încredere: {similarity:.4f}") |
|
st.write(f"Timp de răspuns: {time() - start:.4f} secunde") |
|
else: |
|
|
|
st.write("Te rog introdu un text.") |
|
|
|
|
|
st.stop() |
|
|
|
input_text = st.text_input("Introdu mai jos textul! 👇", label_visibility="visible") |
|
|
|
if input_text: |
|
start = time() |
|
input_embeddings = model.encode([input_text]) |
|
|
|
|
|
if st.button("Identifică Intenția"): |
|
if input_text: |
|
|
|
cleaned_text = clean_text(input_text) |
|
input_embedding = get_embeddings(model, [cleaned_text]) |
|
normalized_embedding = normalize_embeddings(input_embedding) |
|
|
|
D, I = index.search(normalized_embedding, 1) |
|
|
|
|
|
intents = data['intent'].tolist() |
|
intent = intentions[I[0][0]] |
|
distance = D[0][0] |
|
|
|
similarity = 1 / (1 + distance) |
|
|
|
st.write(f"Intenția identificată: {intent}") |
|
st.write(f"Nivel de încredere: {similarity:.4f}") |
|
st.write(f"Timp de răspuns: {time() - start:.4f} secunde") |
|
else: |
|
st.write("Te rog introdu un text.") |
|
|
|
|
|
|
|
|
|
|
|
st.stop() |
|
|