File size: 3,157 Bytes
d3a22aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
---
tags:
- chatbot
---
import nltk
import numpy as np
import random
class Chatbot:
def __init__(self, train_data):
self.train_data = train_data
self.vocabulary = set()
self.word_to_index = {}
self.index_to_word = {}
self.create_vocabulary()
self.build_model()
def create_vocabulary(self):
for sentence in self.train_data:
for word in sentence:
self.vocabulary.add(word)
self.vocabulary = sorted(self.vocabulary)
self.word_to_index = {word: i for i, word in enumerate(self.vocabulary)}
self.index_to_word = {i: word for i, word in enumerate(self.vocabulary)}
def build_model(self):
self.num_words = len(self.vocabulary)
self.W = np.random.randn(self.num_words, self.num_words)
self.b = np.random.randn(self.num_words)
def predict(self, sentence):
# Convert the sentence to a sequence of indices.
indices = []
for word in sentence:
indices.append(self.word_to_index[word])
# Calculate the probability of each possible next word.
probabilities = np.dot(indices, self.W) + self.b
# Choose the word with the highest probability.
next_word = self.index_to_word[np.argmax(probabilities)]
return next_word
def generate_text(self, start_text, max_length=100):
sentence = start_text
for _ in range(max_length):
next_word = self.predict(sentence)
sentence += " " + next_word
return sentence
def respond_to(self, input_text):
input_words = nltk.word_tokenize(input_text.lower())
# Check for special commands
if input_words[0] == "repeat":
return " ".join(input_words[1:])
elif input_words[0] == "generate":
start_text = " ".join(input_words[1:])
return self.generate_text(start_text)
else:
# Find the most similar sentence in the training data.
similarity_scores = []
for sentence in self.train_data:
similarity_score = nltk.jaccard_distance(set(sentence), set(input_words))
similarity_scores.append(similarity_score)
most_similar_index = np.argmin(similarity_scores)
most_similar_sentence = self.train_data[most_similar_index]
# Generate a response based on the most similar sentence.
response = ""
for word in most_similar_sentence:
response += self.predict([word]) + " "
return response.strip()
def main():
# Load the training data.
train_data = nltk.corpus.reuters.sents()
# Create the chatbot.
chatbot = Chatbot(train_data)
# Start a conversation.
print("Chatbot: Hi, I'm a chatbot. What can I help you with?")
while True:
user_input = input("User: ")
if user_input.lower() in ["bye", "goodbye", "exit", "quit"]:
print("Chatbot: Goodbye!")
break
response = chatbot.respond_to(user_input)
print("Chatbot:", response)
if __name__ == "__main__":
main() |