import requests
from bs4 import BeautifulSoup
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense
import numpy as np
import random

# List of predefined topics, their queries, and corresponding URLs
topics = {
    "Technology": {
        "query": "latest technology news",
        "urls": [
            "https://geeksforgeeks.org",
            "https://theverge.com",
        ]
    },
    "Science": {
        "query": "latest science discoveries",
        "urls": [
            "https://oercommons.org/hubs/NSDL",
        ]
    },
    "History": {
        "query": "historical events",
        "urls": [
            "https://history.com",
        ]
    },
    "Math": {
        "query": "",
        "urls": []
    }
}

# Randomly select a topic
selected_topic = random.choice(list(topics.keys()))
print(f"Selected topic: {selected_topic}")

# Fetch data from predefined URLs with queries
def fetch_data(url, query):
    search_url = f"{url}/search?q={query}"
    response = requests.get(search_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    return soup.get_text()

# Function to perform basic arithmetic operations
def solve_math_problem():
    operations = ['+', '-', '*', '/']
    num1 = random.randint(1, 100)
    num2 = random.randint(1, 100)
    operation = random.choice(operations)
    
    problem = f"{num1} {operation} {num2}"
    
    # Safeguard division by zero
    if operation == '/':
        while num2 == 0:
            num2 = random.randint(1, 100)
        problem = f"{num1} {operation} {num2}"
    
    solution = eval(problem)
    return problem, solution

# Load data or generate math problem
if selected_topic != "Math":
    data = ""
    for url in topics[selected_topic]["urls"]:
        data += fetch_data(url, topics[selected_topic]["query"])
else:
    # Create a dummy data string for tokenization and sequence generation
    data = "This is a sample text for math topic."

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
sequence_data = tokenizer.texts_to_sequences([data])[0]

# Creating sequences
X = []
Y = []
for i in range(0, len(sequence_data) - 1):
    X.append(sequence_data[i:i+1])
    Y.append(sequence_data[i+1])

# Padding sequences
X = pad_sequences(X, maxlen=1)
Y = np.array(Y)

# Defining a lighter model
model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, 10, input_length=1))
model.add(LSTM(10))  # Reduce LSTM size
model.add(Dense(len(tokenizer.word_index) + 1, activation='softmax'))

# Compiling the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Training the model with fewer epochs
model.fit(X, Y, epochs=10, verbose=1)  # Reduced epochs

# Function to generate text based on input
def generate_text(model, tokenizer, max_sequence_len, input_text, num_words):
    for _ in range(num_words):
        token_list = tokenizer.texts_to_sequences([input_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
        predicted = model.predict_classes(token_list, verbose=0)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        input_text += " " + output_word
    return input_text

# Get initial input text and number of words to generate
initial_input_text = "This is a generated text"
num_words = 100  # Number of words to generate

# Generate text
generated_text = generate_text(model, tokenizer, 2, initial_input_text, num_words)

# Append the math problem and solution to the generated text if the topic is math
if selected_topic == "Math":
    math_problem, math_solution = solve_math_problem()
    final_text = f"{generated_text}\n\nMath Problem: {math_problem}\nSolution: {math_solution}"
else:
    final_text = generated_text

print(final_text)