File size: 9,093 Bytes
fd8dafd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# Import necessary libraries
import torch
import numpy as np
import transformers
import scipy.io.wavfile as wavfile
import openai
from transformers import pipeline
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from gtts import gTTS
import os
import gradio as gr
import librosa

# Set your OpenAI API key (consider using environment variables for security)
openai.api_key = "your_api_key_here"  # Replace with your actual API key

class MoodEnhancerModel:
    def __init__(self):
        print("Initializing Mood Enhancer Model...")
        # Initialize Whisper for speech recognition
        print("Loading Whisper model...")
        self.whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-base")
        self.whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")

        # Initialize BERT for sentiment analysis/mood detection
        print("Loading BERT model...")
        self.sentiment_analyzer = pipeline(
            "sentiment-analysis",
            model="nlptown/bert-base-multilingual-uncased-sentiment"
        )

        print("All models loaded successfully!")

    def transcribe_audio(self, audio_file):
        """Transcribe audio using Whisper"""
        print("Transcribing audio...")
        # Process through Whisper API for better results
        try:
            with open(audio_file, "rb") as f:
                audio_data = f.read()
                
            transcript = openai.Audio.transcribe("whisper-1", audio_data)
            transcribed_text = transcript["text"]
        except Exception as e:
            print(f"OpenAI API error: {e}")
            # Fallback to local Whisper model
            
            # Load and preprocess the audio
            audio_array, sampling_rate = librosa.load(audio_file, sr=16000)
            input_features = self.whisper_processor(
                audio_array, sampling_rate=16000, return_tensors="pt"
            ).input_features
            
            # Generate token ids
            predicted_ids = self.whisper_model.generate(input_features)
            
            # Decode token ids to text
            transcribed_text = self.whisper_processor.batch_decode(
                predicted_ids, skip_special_tokens=True
            )[0]

        print(f"Transcribed text: {transcribed_text}")
        return transcribed_text

    def analyze_mood(self, text):
        """Analyze mood using BERT"""
        print("Analyzing mood...")
        results = self.sentiment_analyzer(text)

        # Convert 1-5 star rating to mood scale
        sentiment_score = int(results[0]['label'].split()[0])

        moods = {
            1: "very negative",
            2: "negative",
            3: "neutral",
            4: "positive",
            5: "very positive"
        }

        detected_mood = moods[sentiment_score]
        print(f"Detected mood: {detected_mood}")
        return detected_mood, sentiment_score

    def generate_response(self, text, mood, mood_score):
        """Generate mood enhancing response using OpenAI"""
        print("Generating mood enhancing response...")

        # Customize the prompt based on detected mood
        if mood_score <= 2:
            prompt = f"""
            The user seems to be feeling {mood}. Their message was: "{text}"
            Generate an empathetic and uplifting response that acknowledges their feelings
            but helps shift their perspective to something more positive. Keep the response
            conversational, warm and under 3 sentences.
            """
        elif mood_score == 3:
            prompt = f"""
            The user seems to be feeling {mood}. Their message was: "{text}"
            Generate a cheerful response that builds on any positive aspects of their message
            and adds some encouraging thoughts. Keep the response conversational,
            warm and under 3 sentences.
            """
        else:
            prompt = f"""
            The user seems to be feeling {mood}. Their message was: "{text}"
            Generate a response that celebrates their positive state and offers a way to
            maintain or enhance this good feeling. Keep the response conversational,
            warm and under 3 sentences.
            """

        try:
            # Updated for OpenAI's current API format
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are an empathetic AI assistant designed to enhance the user's mood."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=150,
                temperature=0.7
            )

            enhanced_response = response['choices'][0]['message']['content'].strip()
            print(f"Generated response: {enhanced_response}")
            return enhanced_response

        except Exception as e:
            print(f"Error with OpenAI API: {e}")
            # Fallback responses if API fails
            if mood_score <= 2:
                return "I notice you might be feeling down. Remember that challenging moments are temporary, and small positive steps can help shift your perspective."
            elif mood_score == 3:
                return "I sense a neutral mood. What's one small thing that brought you joy today? Focusing on positive moments, even tiny ones, can boost your overall wellbeing."
            else:
                return "It sounds like you're in a good mood! That's wonderful to hear. Savoring these positive feelings can help them last longer."

    def text_to_speech(self, text):
        """Convert text to speech"""
        print("Converting to speech...")
        tts = gTTS(text=text, lang='en', slow=False)
        output_path = "response.mp3"
        tts.save(output_path)
        return output_path

    def process_text_input(self, text_input):
        """Process text input and return results"""
        mood, mood_score = self.analyze_mood(text_input)
        response = self.generate_response(text_input, mood, mood_score)
        audio_file = self.text_to_speech(response)
        
        return text_input, mood, mood_score, response, audio_file
    
    def process_audio_input(self, audio_file):
        """Process audio input and return results"""
        text = self.transcribe_audio(audio_file)
        mood, mood_score = self.analyze_mood(text)
        response = self.generate_response(text, mood, mood_score)
        audio_response = self.text_to_speech(response)
        
        return text, mood, mood_score, response, audio_response

# Initialize the model
model = MoodEnhancerModel()

# Create a Gradio interface for text input
def text_interface(text):
    input_text, mood, mood_score, response, audio_file = model.process_text_input(text)
    return mood, f"Mood score: {mood_score}/5", response, audio_file

# Create a Gradio interface for audio input
def audio_interface(audio):
    input_text, mood, mood_score, response, audio_file = model.process_audio_input(audio)
    return input_text, mood, f"Mood score: {mood_score}/5", response, audio_file

# Create Gradio tabs for different input types
with gr.Blocks(title="Mood Enhancer") as demo:
    gr.Markdown("# Mood Enhancer")
    gr.Markdown("Upload an audio file or enter text to analyze your mood and receive an uplifting response.")
    
    with gr.Tabs():
        with gr.TabItem("Text Input"):
            with gr.Row():
                text_input = gr.Textbox(label="Enter your text", placeholder="How are you feeling today?", lines=3)
            
            text_button = gr.Button("Analyze Mood")
            
            with gr.Row():
                text_mood = gr.Textbox(label="Detected Mood")
                text_score = gr.Textbox(label="Mood Score")
            
            text_response = gr.Textbox(label="Response", lines=3)
            text_audio = gr.Audio(label="Audio Response")
            
            text_button.click(
                fn=text_interface,
                inputs=text_input,
                outputs=[text_mood, text_score, text_response, text_audio]
            )
        
        with gr.TabItem("Audio Input"):
            audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")
            audio_button = gr.Button("Analyze Audio")
            
            with gr.Row():
                transcribed_text = gr.Textbox(label="Transcribed Text")
            
            with gr.Row():
                audio_mood = gr.Textbox(label="Detected Mood")
                audio_score = gr.Textbox(label="Mood Score")
            
            audio_response = gr.Textbox(label="Response", lines=3)
            response_audio = gr.Audio(label="Audio Response")
            
            audio_button.click(
                fn=audio_interface,
                inputs=audio_input,
                outputs=[transcribed_text, audio_mood, audio_score, audio_response, response_audio]
            )

# Launch the Gradio interface
demo.launch(share=True)