Spaces:

georgeek
/

HF-LLM-Intent-Detection

Running

File size: 4,610 Bytes

5ecde30

import streamlit as st
'---'
#st.title('Watson Assistant VDF TOBi improvement')
st.markdown(
    """
  <style>
    .stTextInput > div > div > input {
        background-color: #d3d3d3;
    }
    body {
        background-color: #f0f0f0;
    }
    </style>
    """,
    unsafe_allow_html=True
)

st.header('Watson Assistant VDF TOBi improvement')
'---'
st.write('The model is trained on the TOBi 🤖 intents in Romanian language.')
#st.write('🤖') 
#:robot_face:

import os
import pandas as pd
import re
from time import time
from src.E_Model_utils import load_model, train_model, get_embeddings
from src.E_Faiss_utils import load_embeddings_and_index, normalize_embeddings
from src.A_Preprocess import load_data, clean_text
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)



model_name = st.sidebar.radio("Selectează modelul 👇", ["other","e5_small_fine_tuned_model","multilingual-e5-small","all-MiniLM-L6-v2","all-distilroberta-v1"])
# Load the saved embeddings
#model_name = "xlm-roberta-base"  # Choose the desired model
#model_name = "xlm-r-distilroberta-base-paraphrase-v1"

# Model path
 


# Load the trained model
if model_name != "other":
    # future improvement: add a loading spinner
    model_path = f"output/fine-tuned-model"
    st.write("Model path:", model_path)
    #model = load_model(model_path)

    if model_name == "multilingual-e5-small":
        infloat_model_name = "intfloat/multilingual-e5-small"
        model = load_model(infloat_model_name)
    elif model_name == "e5_small_fine_tuned_model":
        infloat_model_name = "intfloat/multilingual-e5-small"
        model = load_model(infloat_model_name)
        pass#model = load_model(model_path)
    else:
        model = load_model(model_name)

    st.write(f"Modelul selectat: {model_name}")

    st.write("Model loaded successfully!")

# Load the embeddings and the index

    #embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index")

st.stop()
# Upload the intents data
uploaded_file = st.file_uploader("Încarcă fișierul cu intenții", type="csv")

if uploaded_file is not None:
    data = pd.read_csv(uploaded_file)
    st.write("CSV file successfully uploaded!")
    
    # Save data to session state
    st.session_state.data = data
else:
    # If no file is uploaded, try to load data from session state
    data = st.session_state.data if 'data' in st.session_state else None

if data is not None:
    # Extract utterances and intents
    utterances = data['utterance'].tolist()
    intents = data['intent'].tolist()
    
    user_text = st.text_input("Te rog introdu un text.")

    if user_text:
        if st.button("Identifică Intenția"):

            start = time()
            st.write("Procesare text...")
            st.write(start)
            cleaned_text = clean_text(user_text)
            input_embedding = get_embeddings(model, [cleaned_text])
            normalized_embedding = normalize_embeddings(input_embedding)

            embeddings, index = load_embeddings_and_index(f"embeddings/{model_name}_vector_db.index")
            D, I = index.search(normalized_embedding, 1)  # Caută cel mai apropiat vecin
            
            print(I)
            intents = data['intent'].tolist()
            intent = intents[I[0][0]]
            distance = D[0][0]

            similarity = 1 / (1 + distance)

            st.write(f"Intenția identificată: {intent}")
            st.write(f"Nivel de încredere: {similarity:.4f}")
            st.write(f"Timp de răspuns: {time() - start:.4f} secunde")
    else:

        st.write("Te rog introdu un text.")


st.stop()
# Endpoint pentru identificarea intenției
input_text = st.text_input("Introdu mai jos textul! 👇",  label_visibility="visible")

if input_text:
    start = time()
    input_embeddings = model.encode([input_text])


if st.button("Identifică Intenția"):
    if input_text:

        cleaned_text = clean_text(input_text)
        input_embedding = get_embeddings(model, [cleaned_text])
        normalized_embedding = normalize_embeddings(input_embedding)

        D, I = index.search(normalized_embedding, 1)  # Caută cel mai apropiat vecin
        
        #print(I)
        intents = data['intent'].tolist()
        intent = intentions[I[0][0]]
        distance = D[0][0]

        similarity = 1 / (1 + distance)

        st.write(f"Intenția identificată: {intent}")
        st.write(f"Nivel de încredere: {similarity:.4f}")
        st.write(f"Timp de răspuns: {time() - start:.4f} secunde")
    else:
        st.write("Te rog introdu un text.")





st.stop()