File size: 9,704 Bytes
2f81cdc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import numpy as np
import pandas as pd
import requests
from io import StringIO
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import speech_recognition as sr
import pyttsx3
from googlesearch import search
from bs4 import BeautifulSoup
import urllib.request
from urllib.parse import quote

class HybridChatBot:
    def __init__(self, dataset_url=None):
        self.dataset_url = dataset_url
        self.qa_pairs = {}
        self.vectorizer = TfidfVectorizer()
        self.X = None
        self.recognizer = sr.Recognizer()
        self.engine = pyttsx3.init()
        
        # Voice engine settings
        voices = self.engine.getProperty('voices')
        self.engine.setProperty('voice', voices[0].id)
        self.engine.setProperty('rate', 150)
        
        if dataset_url:
            self.load_dataset()
            self.train()

    def load_dataset(self):
        """Load dataset from web resource"""
        try:
            response = requests.get(self.dataset_url)
            response.raise_for_status()
            
            if self.dataset_url.endswith('.csv'):
                data = pd.read_csv(StringIO(response.text))
            elif self.dataset_url.endswith('.json'):
                data = pd.read_json(StringIO(response.text))
            else:
                print("File format not supported")
                return
                
            for _, row in data.iterrows():
                self.qa_pairs[row["question"].lower()] = row["answer"]
                
            print(f"Loaded {len(self.qa_pairs)} question-answer pairs")
            
        except Exception as e:
            print(f"Error loading dataset: {e}")

    def train(self):
        """Train the model on loaded data"""
        if not self.qa_pairs:
            print("No data available for training!")
            return
        
        questions = list(self.qa_pairs.keys())
        self.X = self.vectorizer.fit_transform(questions)
        print("Model trained on loaded data")

    def add_qa_pair(self, question, answer):
        """Add new question-answer pair"""
        self.qa_pairs[question.lower()] = answer
        self.train()

    def web_search(self, query, num_results=3):
        """Perform web search and extract information"""
        try:
            print(f"\nSearching the web: {query}")
            search_results = []
            
            # Perform Google search
            for url in search(query, num_results=num_results, lang='en'):
                try:
                    # Get page content
                    req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
                    with urllib.request.urlopen(req, timeout=5) as response:
                        html = response.read()
                    
                    # Parse HTML
                    soup = BeautifulSoup(html, 'html.parser')
                    
                    # Remove unwanted elements (scripts, styles, etc.)
                    for script in soup(["script", "style", "iframe", "nav", "footer"]):
                        script.extract()
                    
                    # Get page text
                    text = soup.get_text(separator=' ', strip=True)
                    text = ' '.join(text.split()[:200])  # Take first 200 words
                    
                    search_results.append({
                        'url': url,
                        'content': text
                    })
                    
                except Exception as e:
                    print(f"Error processing {url}: {e}")
                    continue
            
            return search_results
            
        except Exception as e:
            print(f"Search error: {e}")
            return None

    def get_response(self, user_input):
        """Get response to user input"""
        if not self.qa_pairs:
            return "I'm not trained yet. Please add questions and answers."
        
        # Check if user wants to perform web search
        if "search the web for" in user_input.lower() or "find online" in user_input.lower():
            query = user_input.replace("search the web for", "").replace("find online", "").strip()
            search_results = self.web_search(query)
            if search_results:
                response = "Here's what I found online:\n"
                for i, result in enumerate(search_results, 1):
                    response += f"\n{i}. {result['content']}\n(Source: {result['url']})\n"
                return response[:2000]  # Limit response length
            else:
                return "Couldn't find any information online."
        
        # Regular question-answer search
        user_vec = self.vectorizer.transform([user_input.lower()])
        similarities = cosine_similarity(user_vec, self.X)
        best_match_idx = np.argmax(similarities)
        best_match_score = similarities[0, best_match_idx]
        
        if best_match_score > 0.5:
            best_question = list(self.qa_pairs.keys())[best_match_idx]
            return self.qa_pairs[best_question]
        else:
            return "I don't know the answer to this question. Would you like me to search online? (Say 'search the web for...')"

    def text_to_speech(self, text):
        """Convert text to speech"""
        self.engine.say(text)
        self.engine.runAndWait()

    def speech_to_text(self):
        """Convert speech from microphone to text"""
        with sr.Microphone() as source:
            print("\nSpeak now...")
            self.recognizer.adjust_for_ambient_noise(source)
            try:
                audio = self.recognizer.listen(source, timeout=5)
                text = self.recognizer.recognize_google(audio, language="en-US")
                print(f"Recognized: {text}")
                return text
            except sr.UnknownValueError:
                print("Speech not recognized")
                return None
            except sr.RequestError:
                print("Recognition service error")
                return None
            except sr.WaitTimeoutError:
                print("Timeout expired")
                return None

    def run(self):
        """Improved interaction interface"""
        print("\n" + "="*50)
        print("WELCOME TO INTELLIGENT CHATBOT".center(50))
        print("="*50)
        
        current_mode = "text"
        while True:
            print("\n" + "-"*50)
            print(f"Current input mode: {current_mode.upper()}")
            print("[1] Send text message")
            print("[2] Speak to the bot")
            print("[3] Switch input mode")
            print("[4] Teach the bot a new answer")
            print("[5] Web search")
            print("[6] Exit")
            
            try:
                choice = input("Choose action (1-6): ").strip()
                
                if choice == "1":
                    user_input = input("\nYour message: ")
                    if user_input.lower() in ["exit", "stop"]:
                        break
                        
                    response = self.get_response(user_input)
                    if response:
                        print(f"\nBot: {response}")
                        self.text_to_speech(response)
                    else:
                        print("\nBot: I don't know what to say. Would you like to teach me?")
                
                elif choice == "2":
                    user_input = self.speech_to_text()
                    if user_input:
                        if user_input.lower() in ["exit", "stop"]:
                            break
                            
                        response = self.get_response(user_input)
                        if response:
                            print(f"\nBot: {response}")
                            self.text_to_speech(response)
                        else:
                            print("\nBot: I don't know how to respond to that.")
                            self.text_to_speech("I don't know how to respond to that")
                
                elif choice == "3":
                    current_mode = "voice" if current_mode == "text" else "text"
                    print(f"\nMode changed to: {current_mode.upper()}")
                
                elif choice == "4":
                    print("\nTeaching the bot:")
                    question = input("Enter question: ")
                    answer = input("Enter answer: ")
                    self.add_qa_pair(question, answer)
                    print("Bot successfully trained!")
                
                elif choice == "5":
                    query = input("\nEnter search query: ")
                    search_results = self.web_search(query)
                    if search_results:
                        print("\nSearch results:")
                        for i, result in enumerate(search_results, 1):
                            print(f"\n{i}. {result['content']}\n(Source: {result['url']})\n")
                    else:
                        print("\nNothing found.")
                
                elif choice == "6":
                    print("\nShutting down...")
                    break
                
                else:
                    print("\nPlease choose an option between 1 and 6")
            
            except KeyboardInterrupt:
                print("\nShutting down...")
                break

if __name__ == "__main__":
   
    DATASET_URL = "https://raw.githubusercontent.com/user/repo/main/qa_data.csv"
    
    bot = HybridChatBot(DATASET_URL)
    bot.run()