|
--- |
|
tags: |
|
- chatbot |
|
--- |
|
import nltk |
|
import numpy as np |
|
import random |
|
|
|
class Chatbot: |
|
|
|
def __init__(self, train_data): |
|
self.train_data = train_data |
|
self.vocabulary = set() |
|
self.word_to_index = {} |
|
self.index_to_word = {} |
|
self.create_vocabulary() |
|
self.build_model() |
|
|
|
def create_vocabulary(self): |
|
for sentence in self.train_data: |
|
for word in sentence: |
|
self.vocabulary.add(word) |
|
self.vocabulary = sorted(self.vocabulary) |
|
self.word_to_index = {word: i for i, word in enumerate(self.vocabulary)} |
|
self.index_to_word = {i: word for i, word in enumerate(self.vocabulary)} |
|
|
|
def build_model(self): |
|
self.num_words = len(self.vocabulary) |
|
self.W = np.random.randn(self.num_words, self.num_words) |
|
self.b = np.random.randn(self.num_words) |
|
|
|
def predict(self, sentence): |
|
# Convert the sentence to a sequence of indices. |
|
indices = [] |
|
for word in sentence: |
|
indices.append(self.word_to_index[word]) |
|
|
|
# Calculate the probability of each possible next word. |
|
probabilities = np.dot(indices, self.W) + self.b |
|
|
|
# Choose the word with the highest probability. |
|
next_word = self.index_to_word[np.argmax(probabilities)] |
|
|
|
return next_word |
|
|
|
def generate_text(self, start_text, max_length=100): |
|
sentence = start_text |
|
for _ in range(max_length): |
|
next_word = self.predict(sentence) |
|
sentence += " " + next_word |
|
return sentence |
|
|
|
def respond_to(self, input_text): |
|
input_words = nltk.word_tokenize(input_text.lower()) |
|
# Check for special commands |
|
if input_words[0] == "repeat": |
|
return " ".join(input_words[1:]) |
|
elif input_words[0] == "generate": |
|
start_text = " ".join(input_words[1:]) |
|
return self.generate_text(start_text) |
|
else: |
|
# Find the most similar sentence in the training data. |
|
similarity_scores = [] |
|
for sentence in self.train_data: |
|
similarity_score = nltk.jaccard_distance(set(sentence), set(input_words)) |
|
similarity_scores.append(similarity_score) |
|
most_similar_index = np.argmin(similarity_scores) |
|
most_similar_sentence = self.train_data[most_similar_index] |
|
# Generate a response based on the most similar sentence. |
|
response = "" |
|
for word in most_similar_sentence: |
|
response += self.predict([word]) + " " |
|
return response.strip() |
|
|
|
def main(): |
|
# Load the training data. |
|
train_data = nltk.corpus.reuters.sents() |
|
|
|
# Create the chatbot. |
|
chatbot = Chatbot(train_data) |
|
|
|
# Start a conversation. |
|
print("Chatbot: Hi, I'm a chatbot. What can I help you with?") |
|
while True: |
|
user_input = input("User: ") |
|
if user_input.lower() in ["bye", "goodbye", "exit", "quit"]: |
|
print("Chatbot: Goodbye!") |
|
break |
|
response = chatbot.respond_to(user_input) |
|
print("Chatbot:", response) |
|
|
|
if __name__ == "__main__": |
|
main() |