import requests from bs4 import BeautifulSoup from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences from keras.models import Sequential from keras.layers import LSTM, Embedding, Dense import numpy as np import random # List of predefined topics, their queries, and corresponding URLs topics = { "Technology": { "query": "latest technology news", "urls": [ "https://geeksforgeeks.org", "https://theverge.com", ] }, "Science": { "query": "latest science discoveries", "urls": [ "https://oercommons.org/hubs/NSDL", ] }, "History": { "query": "historical events", "urls": [ "https://history.com", ] }, "Math": { "query": "", "urls": [] } } # Randomly select a topic selected_topic = random.choice(list(topics.keys())) print(f"Selected topic: {selected_topic}") # Fetch data from predefined URLs with queries def fetch_data(url, query): search_url = f"{url}/search?q={query}" response = requests.get(search_url) soup = BeautifulSoup(response.content, 'html.parser') return soup.get_text() # Function to perform basic arithmetic operations def solve_math_problem(): operations = ['+', '-', '*', '/'] num1 = random.randint(1, 100) num2 = random.randint(1, 100) operation = random.choice(operations) problem = f"{num1} {operation} {num2}" # Safeguard division by zero if operation == '/': while num2 == 0: num2 = random.randint(1, 100) problem = f"{num1} {operation} {num2}" solution = eval(problem) return problem, solution # Load data or generate math problem if selected_topic != "Math": data = "" for url in topics[selected_topic]["urls"]: data += fetch_data(url, topics[selected_topic]["query"]) else: # Create a dummy data string for tokenization and sequence generation data = "This is a sample text for math topic." # Tokenization tokenizer = Tokenizer() tokenizer.fit_on_texts([data]) sequence_data = tokenizer.texts_to_sequences([data])[0] # Creating sequences X = [] Y = [] for i in range(0, len(sequence_data) - 1): X.append(sequence_data[i:i+1]) Y.append(sequence_data[i+1]) # Padding sequences X = pad_sequences(X, maxlen=1) Y = np.array(Y) # Defining a lighter model model = Sequential() model.add(Embedding(len(tokenizer.word_index) + 1, 10, input_length=1)) model.add(LSTM(10)) # Reduce LSTM size model.add(Dense(len(tokenizer.word_index) + 1, activation='softmax')) # Compiling the model model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() # Training the model with fewer epochs model.fit(X, Y, epochs=10, verbose=1) # Reduced epochs # Function to generate text based on input def generate_text(model, tokenizer, max_sequence_len, input_text, num_words): for _ in range(num_words): token_list = tokenizer.texts_to_sequences([input_text])[0] token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre') predicted = model.predict_classes(token_list, verbose=0) output_word = "" for word, index in tokenizer.word_index.items(): if index == predicted: output_word = word break input_text += " " + output_word return input_text # Get initial input text and number of words to generate initial_input_text = "This is a generated text" num_words = 100 # Number of words to generate # Generate text generated_text = generate_text(model, tokenizer, 2, initial_input_text, num_words) # Append the math problem and solution to the generated text if the topic is math if selected_topic == "Math": math_problem, math_solution = solve_math_problem() final_text = f"{generated_text}\n\nMath Problem: {math_problem}\nSolution: {math_solution}" else: final_text = generated_text print(final_text)