File size: 3,157 Bytes

d3a22aa

---
tags:
- chatbot
---
import nltk
import numpy as np
import random

class Chatbot:

    def __init__(self, train_data):
        self.train_data = train_data
        self.vocabulary = set()
        self.word_to_index = {}
        self.index_to_word = {}
        self.create_vocabulary()
        self.build_model()

    def create_vocabulary(self):
        for sentence in self.train_data:
            for word in sentence:
                self.vocabulary.add(word)
        self.vocabulary = sorted(self.vocabulary)
        self.word_to_index = {word: i for i, word in enumerate(self.vocabulary)}
        self.index_to_word = {i: word for i, word in enumerate(self.vocabulary)}

    def build_model(self):
        self.num_words = len(self.vocabulary)
        self.W = np.random.randn(self.num_words, self.num_words)
        self.b = np.random.randn(self.num_words)

    def predict(self, sentence):
        # Convert the sentence to a sequence of indices.
        indices = []
        for word in sentence:
            indices.append(self.word_to_index[word])

        # Calculate the probability of each possible next word.
        probabilities = np.dot(indices, self.W) + self.b

        # Choose the word with the highest probability.
        next_word = self.index_to_word[np.argmax(probabilities)]

        return next_word

    def generate_text(self, start_text, max_length=100):
        sentence = start_text
        for _ in range(max_length):
            next_word = self.predict(sentence)
            sentence += " " + next_word
        return sentence

    def respond_to(self, input_text):
        input_words = nltk.word_tokenize(input_text.lower())
        # Check for special commands
        if input_words[0] == "repeat":
            return " ".join(input_words[1:])
        elif input_words[0] == "generate":
            start_text = " ".join(input_words[1:])
            return self.generate_text(start_text)
        else:
            # Find the most similar sentence in the training data.
            similarity_scores = []
            for sentence in self.train_data:
                similarity_score = nltk.jaccard_distance(set(sentence), set(input_words))
                similarity_scores.append(similarity_score)
            most_similar_index = np.argmin(similarity_scores)
            most_similar_sentence = self.train_data[most_similar_index]
            # Generate a response based on the most similar sentence.
            response = ""
            for word in most_similar_sentence:
                response += self.predict([word]) + " "
            return response.strip()

def main():
    # Load the training data.
    train_data = nltk.corpus.reuters.sents()

    # Create the chatbot.
    chatbot = Chatbot(train_data)

    # Start a conversation.
    print("Chatbot: Hi, I'm a chatbot. What can I help you with?")
    while True:
        user_input = input("User: ")
        if user_input.lower() in ["bye", "goodbye", "exit", "quit"]:
            print("Chatbot: Goodbye!")
            break
        response = chatbot.respond_to(user_input)
        print("Chatbot:", response)

if __name__ == "__main__":
    main()