KavyaBansal commited on
Commit
fd8dafd
·
verified ·
1 Parent(s): e2a2bfc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -0
app.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import torch
3
+ import numpy as np
4
+ import transformers
5
+ import scipy.io.wavfile as wavfile
6
+ import openai
7
+ from transformers import pipeline
8
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
9
+ from gtts import gTTS
10
+ import os
11
+ import gradio as gr
12
+ import librosa
13
+
14
+ # Set your OpenAI API key (consider using environment variables for security)
15
+ openai.api_key = "your_api_key_here" # Replace with your actual API key
16
+
17
+ class MoodEnhancerModel:
18
+ def __init__(self):
19
+ print("Initializing Mood Enhancer Model...")
20
+ # Initialize Whisper for speech recognition
21
+ print("Loading Whisper model...")
22
+ self.whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-base")
23
+ self.whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
24
+
25
+ # Initialize BERT for sentiment analysis/mood detection
26
+ print("Loading BERT model...")
27
+ self.sentiment_analyzer = pipeline(
28
+ "sentiment-analysis",
29
+ model="nlptown/bert-base-multilingual-uncased-sentiment"
30
+ )
31
+
32
+ print("All models loaded successfully!")
33
+
34
+ def transcribe_audio(self, audio_file):
35
+ """Transcribe audio using Whisper"""
36
+ print("Transcribing audio...")
37
+ # Process through Whisper API for better results
38
+ try:
39
+ with open(audio_file, "rb") as f:
40
+ audio_data = f.read()
41
+
42
+ transcript = openai.Audio.transcribe("whisper-1", audio_data)
43
+ transcribed_text = transcript["text"]
44
+ except Exception as e:
45
+ print(f"OpenAI API error: {e}")
46
+ # Fallback to local Whisper model
47
+
48
+ # Load and preprocess the audio
49
+ audio_array, sampling_rate = librosa.load(audio_file, sr=16000)
50
+ input_features = self.whisper_processor(
51
+ audio_array, sampling_rate=16000, return_tensors="pt"
52
+ ).input_features
53
+
54
+ # Generate token ids
55
+ predicted_ids = self.whisper_model.generate(input_features)
56
+
57
+ # Decode token ids to text
58
+ transcribed_text = self.whisper_processor.batch_decode(
59
+ predicted_ids, skip_special_tokens=True
60
+ )[0]
61
+
62
+ print(f"Transcribed text: {transcribed_text}")
63
+ return transcribed_text
64
+
65
+ def analyze_mood(self, text):
66
+ """Analyze mood using BERT"""
67
+ print("Analyzing mood...")
68
+ results = self.sentiment_analyzer(text)
69
+
70
+ # Convert 1-5 star rating to mood scale
71
+ sentiment_score = int(results[0]['label'].split()[0])
72
+
73
+ moods = {
74
+ 1: "very negative",
75
+ 2: "negative",
76
+ 3: "neutral",
77
+ 4: "positive",
78
+ 5: "very positive"
79
+ }
80
+
81
+ detected_mood = moods[sentiment_score]
82
+ print(f"Detected mood: {detected_mood}")
83
+ return detected_mood, sentiment_score
84
+
85
+ def generate_response(self, text, mood, mood_score):
86
+ """Generate mood enhancing response using OpenAI"""
87
+ print("Generating mood enhancing response...")
88
+
89
+ # Customize the prompt based on detected mood
90
+ if mood_score <= 2:
91
+ prompt = f"""
92
+ The user seems to be feeling {mood}. Their message was: "{text}"
93
+ Generate an empathetic and uplifting response that acknowledges their feelings
94
+ but helps shift their perspective to something more positive. Keep the response
95
+ conversational, warm and under 3 sentences.
96
+ """
97
+ elif mood_score == 3:
98
+ prompt = f"""
99
+ The user seems to be feeling {mood}. Their message was: "{text}"
100
+ Generate a cheerful response that builds on any positive aspects of their message
101
+ and adds some encouraging thoughts. Keep the response conversational,
102
+ warm and under 3 sentences.
103
+ """
104
+ else:
105
+ prompt = f"""
106
+ The user seems to be feeling {mood}. Their message was: "{text}"
107
+ Generate a response that celebrates their positive state and offers a way to
108
+ maintain or enhance this good feeling. Keep the response conversational,
109
+ warm and under 3 sentences.
110
+ """
111
+
112
+ try:
113
+ # Updated for OpenAI's current API format
114
+ response = openai.ChatCompletion.create(
115
+ model="gpt-3.5-turbo",
116
+ messages=[
117
+ {"role": "system", "content": "You are an empathetic AI assistant designed to enhance the user's mood."},
118
+ {"role": "user", "content": prompt}
119
+ ],
120
+ max_tokens=150,
121
+ temperature=0.7
122
+ )
123
+
124
+ enhanced_response = response['choices'][0]['message']['content'].strip()
125
+ print(f"Generated response: {enhanced_response}")
126
+ return enhanced_response
127
+
128
+ except Exception as e:
129
+ print(f"Error with OpenAI API: {e}")
130
+ # Fallback responses if API fails
131
+ if mood_score <= 2:
132
+ return "I notice you might be feeling down. Remember that challenging moments are temporary, and small positive steps can help shift your perspective."
133
+ elif mood_score == 3:
134
+ return "I sense a neutral mood. What's one small thing that brought you joy today? Focusing on positive moments, even tiny ones, can boost your overall wellbeing."
135
+ else:
136
+ return "It sounds like you're in a good mood! That's wonderful to hear. Savoring these positive feelings can help them last longer."
137
+
138
+ def text_to_speech(self, text):
139
+ """Convert text to speech"""
140
+ print("Converting to speech...")
141
+ tts = gTTS(text=text, lang='en', slow=False)
142
+ output_path = "response.mp3"
143
+ tts.save(output_path)
144
+ return output_path
145
+
146
+ def process_text_input(self, text_input):
147
+ """Process text input and return results"""
148
+ mood, mood_score = self.analyze_mood(text_input)
149
+ response = self.generate_response(text_input, mood, mood_score)
150
+ audio_file = self.text_to_speech(response)
151
+
152
+ return text_input, mood, mood_score, response, audio_file
153
+
154
+ def process_audio_input(self, audio_file):
155
+ """Process audio input and return results"""
156
+ text = self.transcribe_audio(audio_file)
157
+ mood, mood_score = self.analyze_mood(text)
158
+ response = self.generate_response(text, mood, mood_score)
159
+ audio_response = self.text_to_speech(response)
160
+
161
+ return text, mood, mood_score, response, audio_response
162
+
163
+ # Initialize the model
164
+ model = MoodEnhancerModel()
165
+
166
+ # Create a Gradio interface for text input
167
+ def text_interface(text):
168
+ input_text, mood, mood_score, response, audio_file = model.process_text_input(text)
169
+ return mood, f"Mood score: {mood_score}/5", response, audio_file
170
+
171
+ # Create a Gradio interface for audio input
172
+ def audio_interface(audio):
173
+ input_text, mood, mood_score, response, audio_file = model.process_audio_input(audio)
174
+ return input_text, mood, f"Mood score: {mood_score}/5", response, audio_file
175
+
176
+ # Create Gradio tabs for different input types
177
+ with gr.Blocks(title="Mood Enhancer") as demo:
178
+ gr.Markdown("# Mood Enhancer")
179
+ gr.Markdown("Upload an audio file or enter text to analyze your mood and receive an uplifting response.")
180
+
181
+ with gr.Tabs():
182
+ with gr.TabItem("Text Input"):
183
+ with gr.Row():
184
+ text_input = gr.Textbox(label="Enter your text", placeholder="How are you feeling today?", lines=3)
185
+
186
+ text_button = gr.Button("Analyze Mood")
187
+
188
+ with gr.Row():
189
+ text_mood = gr.Textbox(label="Detected Mood")
190
+ text_score = gr.Textbox(label="Mood Score")
191
+
192
+ text_response = gr.Textbox(label="Response", lines=3)
193
+ text_audio = gr.Audio(label="Audio Response")
194
+
195
+ text_button.click(
196
+ fn=text_interface,
197
+ inputs=text_input,
198
+ outputs=[text_mood, text_score, text_response, text_audio]
199
+ )
200
+
201
+ with gr.TabItem("Audio Input"):
202
+ audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")
203
+ audio_button = gr.Button("Analyze Audio")
204
+
205
+ with gr.Row():
206
+ transcribed_text = gr.Textbox(label="Transcribed Text")
207
+
208
+ with gr.Row():
209
+ audio_mood = gr.Textbox(label="Detected Mood")
210
+ audio_score = gr.Textbox(label="Mood Score")
211
+
212
+ audio_response = gr.Textbox(label="Response", lines=3)
213
+ response_audio = gr.Audio(label="Audio Response")
214
+
215
+ audio_button.click(
216
+ fn=audio_interface,
217
+ inputs=audio_input,
218
+ outputs=[transcribed_text, audio_mood, audio_score, audio_response, response_audio]
219
+ )
220
+
221
+ # Launch the Gradio interface
222
+ demo.launch(share=True)