SkillForge45 commited on
Commit
2f81cdc
verified
1 Parent(s): 0a1bc71

Create model.py

Browse files
Files changed (1) hide show
  1. model.py +246 -0
model.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import requests
4
+ from io import StringIO
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ import speech_recognition as sr
8
+ import pyttsx3
9
+ from googlesearch import search
10
+ from bs4 import BeautifulSoup
11
+ import urllib.request
12
+ from urllib.parse import quote
13
+
14
+ class HybridChatBot:
15
+ def __init__(self, dataset_url=None):
16
+ self.dataset_url = dataset_url
17
+ self.qa_pairs = {}
18
+ self.vectorizer = TfidfVectorizer()
19
+ self.X = None
20
+ self.recognizer = sr.Recognizer()
21
+ self.engine = pyttsx3.init()
22
+
23
+ # Voice engine settings
24
+ voices = self.engine.getProperty('voices')
25
+ self.engine.setProperty('voice', voices[0].id)
26
+ self.engine.setProperty('rate', 150)
27
+
28
+ if dataset_url:
29
+ self.load_dataset()
30
+ self.train()
31
+
32
+ def load_dataset(self):
33
+ """Load dataset from web resource"""
34
+ try:
35
+ response = requests.get(self.dataset_url)
36
+ response.raise_for_status()
37
+
38
+ if self.dataset_url.endswith('.csv'):
39
+ data = pd.read_csv(StringIO(response.text))
40
+ elif self.dataset_url.endswith('.json'):
41
+ data = pd.read_json(StringIO(response.text))
42
+ else:
43
+ print("File format not supported")
44
+ return
45
+
46
+ for _, row in data.iterrows():
47
+ self.qa_pairs[row["question"].lower()] = row["answer"]
48
+
49
+ print(f"Loaded {len(self.qa_pairs)} question-answer pairs")
50
+
51
+ except Exception as e:
52
+ print(f"Error loading dataset: {e}")
53
+
54
+ def train(self):
55
+ """Train the model on loaded data"""
56
+ if not self.qa_pairs:
57
+ print("No data available for training!")
58
+ return
59
+
60
+ questions = list(self.qa_pairs.keys())
61
+ self.X = self.vectorizer.fit_transform(questions)
62
+ print("Model trained on loaded data")
63
+
64
+ def add_qa_pair(self, question, answer):
65
+ """Add new question-answer pair"""
66
+ self.qa_pairs[question.lower()] = answer
67
+ self.train()
68
+
69
+ def web_search(self, query, num_results=3):
70
+ """Perform web search and extract information"""
71
+ try:
72
+ print(f"\nSearching the web: {query}")
73
+ search_results = []
74
+
75
+ # Perform Google search
76
+ for url in search(query, num_results=num_results, lang='en'):
77
+ try:
78
+ # Get page content
79
+ req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
80
+ with urllib.request.urlopen(req, timeout=5) as response:
81
+ html = response.read()
82
+
83
+ # Parse HTML
84
+ soup = BeautifulSoup(html, 'html.parser')
85
+
86
+ # Remove unwanted elements (scripts, styles, etc.)
87
+ for script in soup(["script", "style", "iframe", "nav", "footer"]):
88
+ script.extract()
89
+
90
+ # Get page text
91
+ text = soup.get_text(separator=' ', strip=True)
92
+ text = ' '.join(text.split()[:200]) # Take first 200 words
93
+
94
+ search_results.append({
95
+ 'url': url,
96
+ 'content': text
97
+ })
98
+
99
+ except Exception as e:
100
+ print(f"Error processing {url}: {e}")
101
+ continue
102
+
103
+ return search_results
104
+
105
+ except Exception as e:
106
+ print(f"Search error: {e}")
107
+ return None
108
+
109
+ def get_response(self, user_input):
110
+ """Get response to user input"""
111
+ if not self.qa_pairs:
112
+ return "I'm not trained yet. Please add questions and answers."
113
+
114
+ # Check if user wants to perform web search
115
+ if "search the web for" in user_input.lower() or "find online" in user_input.lower():
116
+ query = user_input.replace("search the web for", "").replace("find online", "").strip()
117
+ search_results = self.web_search(query)
118
+ if search_results:
119
+ response = "Here's what I found online:\n"
120
+ for i, result in enumerate(search_results, 1):
121
+ response += f"\n{i}. {result['content']}\n(Source: {result['url']})\n"
122
+ return response[:2000] # Limit response length
123
+ else:
124
+ return "Couldn't find any information online."
125
+
126
+ # Regular question-answer search
127
+ user_vec = self.vectorizer.transform([user_input.lower()])
128
+ similarities = cosine_similarity(user_vec, self.X)
129
+ best_match_idx = np.argmax(similarities)
130
+ best_match_score = similarities[0, best_match_idx]
131
+
132
+ if best_match_score > 0.5:
133
+ best_question = list(self.qa_pairs.keys())[best_match_idx]
134
+ return self.qa_pairs[best_question]
135
+ else:
136
+ return "I don't know the answer to this question. Would you like me to search online? (Say 'search the web for...')"
137
+
138
+ def text_to_speech(self, text):
139
+ """Convert text to speech"""
140
+ self.engine.say(text)
141
+ self.engine.runAndWait()
142
+
143
+ def speech_to_text(self):
144
+ """Convert speech from microphone to text"""
145
+ with sr.Microphone() as source:
146
+ print("\nSpeak now...")
147
+ self.recognizer.adjust_for_ambient_noise(source)
148
+ try:
149
+ audio = self.recognizer.listen(source, timeout=5)
150
+ text = self.recognizer.recognize_google(audio, language="en-US")
151
+ print(f"Recognized: {text}")
152
+ return text
153
+ except sr.UnknownValueError:
154
+ print("Speech not recognized")
155
+ return None
156
+ except sr.RequestError:
157
+ print("Recognition service error")
158
+ return None
159
+ except sr.WaitTimeoutError:
160
+ print("Timeout expired")
161
+ return None
162
+
163
+ def run(self):
164
+ """Improved interaction interface"""
165
+ print("\n" + "="*50)
166
+ print("WELCOME TO INTELLIGENT CHATBOT".center(50))
167
+ print("="*50)
168
+
169
+ current_mode = "text"
170
+ while True:
171
+ print("\n" + "-"*50)
172
+ print(f"Current input mode: {current_mode.upper()}")
173
+ print("[1] Send text message")
174
+ print("[2] Speak to the bot")
175
+ print("[3] Switch input mode")
176
+ print("[4] Teach the bot a new answer")
177
+ print("[5] Web search")
178
+ print("[6] Exit")
179
+
180
+ try:
181
+ choice = input("Choose action (1-6): ").strip()
182
+
183
+ if choice == "1":
184
+ user_input = input("\nYour message: ")
185
+ if user_input.lower() in ["exit", "stop"]:
186
+ break
187
+
188
+ response = self.get_response(user_input)
189
+ if response:
190
+ print(f"\nBot: {response}")
191
+ self.text_to_speech(response)
192
+ else:
193
+ print("\nBot: I don't know what to say. Would you like to teach me?")
194
+
195
+ elif choice == "2":
196
+ user_input = self.speech_to_text()
197
+ if user_input:
198
+ if user_input.lower() in ["exit", "stop"]:
199
+ break
200
+
201
+ response = self.get_response(user_input)
202
+ if response:
203
+ print(f"\nBot: {response}")
204
+ self.text_to_speech(response)
205
+ else:
206
+ print("\nBot: I don't know how to respond to that.")
207
+ self.text_to_speech("I don't know how to respond to that")
208
+
209
+ elif choice == "3":
210
+ current_mode = "voice" if current_mode == "text" else "text"
211
+ print(f"\nMode changed to: {current_mode.upper()}")
212
+
213
+ elif choice == "4":
214
+ print("\nTeaching the bot:")
215
+ question = input("Enter question: ")
216
+ answer = input("Enter answer: ")
217
+ self.add_qa_pair(question, answer)
218
+ print("Bot successfully trained!")
219
+
220
+ elif choice == "5":
221
+ query = input("\nEnter search query: ")
222
+ search_results = self.web_search(query)
223
+ if search_results:
224
+ print("\nSearch results:")
225
+ for i, result in enumerate(search_results, 1):
226
+ print(f"\n{i}. {result['content']}\n(Source: {result['url']})\n")
227
+ else:
228
+ print("\nNothing found.")
229
+
230
+ elif choice == "6":
231
+ print("\nShutting down...")
232
+ break
233
+
234
+ else:
235
+ print("\nPlease choose an option between 1 and 6")
236
+
237
+ except KeyboardInterrupt:
238
+ print("\nShutting down...")
239
+ break
240
+
241
+ if __name__ == "__main__":
242
+
243
+ DATASET_URL = "https://raw.githubusercontent.com/user/repo/main/qa_data.csv"
244
+
245
+ bot = HybridChatBot(DATASET_URL)
246
+ bot.run()