import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score import joblib from preprocess import clean_text # Load dataset df = pd.read_csv("data/intents.csv") # Clean text df["patterns"] = df["patterns"].apply(clean_text) # Features (patterns) and Labels (tags) X = df["patterns"] y = df["tag"] # Train/Test Split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Build pipeline model = Pipeline([ ("tfidf", TfidfVectorizer()), ("clf", MultinomialNB()) ]) # Train model.fit(X_train, y_train) # Evaluate y_pred = model.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) # Save model & responses joblib.dump(model, "models/lms_chatbot.joblib") # Save responses by tag responses = df.groupby("tag")["responses"].apply(list).to_dict() joblib.dump(responses, "models/responses.joblib") print("✅ Training complete. Model and responses saved.")