File size: 4,045 Bytes
6d9c2d9
 
 
 
 
 
 
 
 
abd995d
6d9c2d9
abd995d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d9c2d9
 
 
 
 
 
abd995d
 
 
 
6d9c2d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abd995d
 
6d9c2d9
abd995d
 
6d9c2d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abd995d
6d9c2d9
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import requests
from bs4 import BeautifulSoup
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense
import numpy as np
import random

# List of predefined topics, their queries, and corresponding URLs
topics = {
    "Technology": {
        "query": "latest technology news",
        "urls": [
            "https://geeksforgeeks.org",
            "https://theverge.com",
        ]
    },
    "Science": {
        "query": "latest science discoveries",
        "urls": [
            "https://oercommons.org/hubs/NSDL",
        ]
    },
    "History": {
        "query": "historical events",
        "urls": [
            "https://history.com",
        ]
    },
    "Math": {
        "query": "",
        "urls": []
    }
}

# Randomly select a topic
selected_topic = random.choice(list(topics.keys()))
print(f"Selected topic: {selected_topic}")

# Fetch data from predefined URLs with queries
def fetch_data(url, query):
    search_url = f"{url}/search?q={query}"
    response = requests.get(search_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    return soup.get_text()

# Function to perform basic arithmetic operations
def solve_math_problem():
    operations = ['+', '-', '*', '/']
    num1 = random.randint(1, 100)
    num2 = random.randint(1, 100)
    operation = random.choice(operations)
    
    problem = f"{num1} {operation} {num2}"
    
    # Safeguard division by zero
    if operation == '/':
        while num2 == 0:
            num2 = random.randint(1, 100)
        problem = f"{num1} {operation} {num2}"
    
    solution = eval(problem)
    return problem, solution

# Load data or generate math problem
if selected_topic != "Math":
    data = ""
    for url in topics[selected_topic]["urls"]:
        data += fetch_data(url, topics[selected_topic]["query"])
else:
    # Create a dummy data string for tokenization and sequence generation
    data = "This is a sample text for math topic."

# Tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
sequence_data = tokenizer.texts_to_sequences([data])[0]

# Creating sequences
X = []
Y = []
for i in range(0, len(sequence_data) - 1):
    X.append(sequence_data[i:i+1])
    Y.append(sequence_data[i+1])

# Padding sequences
X = pad_sequences(X, maxlen=1)
Y = np.array(Y)

# Defining a lighter model
model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, 10, input_length=1))
model.add(LSTM(10))  # Reduce LSTM size
model.add(Dense(len(tokenizer.word_index) + 1, activation='softmax'))

# Compiling the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Training the model with fewer epochs
model.fit(X, Y, epochs=10, verbose=1)  # Reduced epochs

# Function to generate text based on input
def generate_text(model, tokenizer, max_sequence_len, input_text, num_words):
    for _ in range(num_words):
        token_list = tokenizer.texts_to_sequences([input_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
        predicted = model.predict_classes(token_list, verbose=0)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        input_text += " " + output_word
    return input_text

# Get initial input text and number of words to generate
initial_input_text = "This is a generated text"
num_words = 100  # Number of words to generate

# Generate text
generated_text = generate_text(model, tokenizer, 2, initial_input_text, num_words)

# Append the math problem and solution to the generated text if the topic is math
if selected_topic == "Math":
    math_problem, math_solution = solve_math_problem()
    final_text = f"{generated_text}\n\nMath Problem: {math_problem}\nSolution: {math_solution}"
else:
    final_text = generated_text

print(final_text)