File size: 1,183 Bytes
5ecde30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pandas as pd
from E_Model_utils import train_model, get_embeddings
from E_Faiss_utils import load_faiss_index, normalize_embeddings
from A_Preprocess import load_data

# Load data
data_file_path = r"C:\Users\serban.tica\Documents\Intent_detection\data\Pager_Intents.csv"
data = load_data(data_file_path)
intentions = data['intent'].tolist()

# Models to evaluate
models = {
    "mBERT": "bert-base-multilingual-cased",
    "XLM-R": "xlm-roberta-base",
    "Romanian BERT": "dumitrescustefan/bert-base-romanian-cased-v1"
}

# Evaluate models
for model_name, model_path in models.items():
    print(f"Evaluating model: {model_name}")
    model = train_model(model_path)
    index = load_faiss_index(f"embeddings/{model_name}_vector_db.index")
    
    # Test with a sample input text
    input_text = "exemplu de text"
    input_embedding = get_embeddings(model, [input_text]).cpu().numpy()
    normalized_embedding = normalize_embeddings(input_embedding)

    D, I = index.search(normalized_embedding, 1)  # Caută cel mai apropiat vecin
    intent = intentions[I[0][0]]

    print(f"Intenția identificată de {model_name}: {intent} cu nivel de încredere: {float(D[0][0])}")