# Download nltk and numpy | |
import os | |
os.system('pip install nltk numpy') | |
import nltk | |
import numpy as np | |
nltk.download('punkt') | |
def train_model(corpus): | |
tokens = nltk.word_tokenize(corpus) | |
model = {} | |
for i in range(len(tokens) - 1): | |
if tokens[i] in model: | |
model[tokens[i]].append(tokens[i + 1]) | |
else: | |
model[tokens[i]] = [tokens[i + 1]] | |
return model | |
import pickle | |
# Train the model on a given corpus | |
corpus = open('corpus.txt').read() | |
model = train_model(corpus) | |
# Save the model to a file | |
with open("model.pkl", "wb") as f: | |
pickle.dump(model, f) | |