|
import requests |
|
from bs4 import BeautifulSoup |
|
from keras.preprocessing.text import Tokenizer |
|
from keras.preprocessing.sequence import pad_sequences |
|
from keras.models import Sequential |
|
from keras.layers import LSTM, Embedding, Dense |
|
import numpy as np |
|
import random |
|
|
|
|
|
topics = { |
|
"Technology": { |
|
"query": "latest technology news", |
|
"urls": [ |
|
"https://geeksforgeeks.org", |
|
"https://theverge.com", |
|
] |
|
}, |
|
"Science": { |
|
"query": "latest science discoveries", |
|
"urls": [ |
|
"https://oercommons.org/hubs/NSDL", |
|
] |
|
}, |
|
"History": { |
|
"query": "historical events", |
|
"urls": [ |
|
"https://history.com", |
|
] |
|
}, |
|
"Math": { |
|
"query": "", |
|
"urls": [] |
|
} |
|
} |
|
|
|
|
|
selected_topic = random.choice(list(topics.keys())) |
|
print(f"Selected topic: {selected_topic}") |
|
|
|
|
|
def fetch_data(url, query): |
|
search_url = f"{url}/search?q={query}" |
|
response = requests.get(search_url) |
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
return soup.get_text() |
|
|
|
|
|
def solve_math_problem(): |
|
operations = ['+', '-', '*', '/'] |
|
num1 = random.randint(1, 100) |
|
num2 = random.randint(1, 100) |
|
operation = random.choice(operations) |
|
|
|
problem = f"{num1} {operation} {num2}" |
|
|
|
|
|
if operation == '/': |
|
while num2 == 0: |
|
num2 = random.randint(1, 100) |
|
problem = f"{num1} {operation} {num2}" |
|
|
|
solution = eval(problem) |
|
return problem, solution |
|
|
|
|
|
if selected_topic != "Math": |
|
data = "" |
|
for url in topics[selected_topic]["urls"]: |
|
data += fetch_data(url, topics[selected_topic]["query"]) |
|
else: |
|
|
|
data = "This is a sample text for math topic." |
|
|
|
|
|
tokenizer = Tokenizer() |
|
tokenizer.fit_on_texts([data]) |
|
sequence_data = tokenizer.texts_to_sequences([data])[0] |
|
|
|
|
|
X = [] |
|
Y = [] |
|
for i in range(0, len(sequence_data) - 1): |
|
X.append(sequence_data[i:i+1]) |
|
Y.append(sequence_data[i+1]) |
|
|
|
|
|
X = pad_sequences(X, maxlen=1) |
|
Y = np.array(Y) |
|
|
|
|
|
model = Sequential() |
|
model.add(Embedding(len(tokenizer.word_index) + 1, 10, input_length=1)) |
|
model.add(LSTM(10)) |
|
model.add(Dense(len(tokenizer.word_index) + 1, activation='softmax')) |
|
|
|
|
|
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) |
|
model.summary() |
|
|
|
|
|
model.fit(X, Y, epochs=10, verbose=1) |
|
|
|
|
|
def generate_text(model, tokenizer, max_sequence_len, input_text, num_words): |
|
for _ in range(num_words): |
|
token_list = tokenizer.texts_to_sequences([input_text])[0] |
|
token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre') |
|
predicted = model.predict_classes(token_list, verbose=0) |
|
output_word = "" |
|
for word, index in tokenizer.word_index.items(): |
|
if index == predicted: |
|
output_word = word |
|
break |
|
input_text += " " + output_word |
|
return input_text |
|
|
|
|
|
initial_input_text = "This is a generated text" |
|
num_words = 100 |
|
|
|
|
|
generated_text = generate_text(model, tokenizer, 2, initial_input_text, num_words) |
|
|
|
|
|
if selected_topic == "Math": |
|
math_problem, math_solution = solve_math_problem() |
|
final_text = f"{generated_text}\n\nMath Problem: {math_problem}\nSolution: {math_solution}" |
|
else: |
|
final_text = generated_text |
|
|
|
print(final_text) |
|
|