Spaces:
Sleeping
Sleeping
# Import necessary libraries | |
import torch | |
import numpy as np | |
import transformers | |
import scipy.io.wavfile as wavfile | |
import openai | |
from transformers import pipeline | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
from gtts import gTTS | |
import os | |
import gradio as gr | |
import librosa | |
# Set your OpenAI API key (consider using environment variables for security) | |
openai.api_key = "your_api_key_here" # Replace with your actual API key | |
class MoodEnhancerModel: | |
def __init__(self): | |
print("Initializing Mood Enhancer Model...") | |
# Initialize Whisper for speech recognition | |
print("Loading Whisper model...") | |
self.whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-base") | |
self.whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") | |
# Initialize BERT for sentiment analysis/mood detection | |
print("Loading BERT model...") | |
self.sentiment_analyzer = pipeline( | |
"sentiment-analysis", | |
model="nlptown/bert-base-multilingual-uncased-sentiment" | |
) | |
print("All models loaded successfully!") | |
def transcribe_audio(self, audio_file): | |
"""Transcribe audio using Whisper""" | |
print("Transcribing audio...") | |
# Process through Whisper API for better results | |
try: | |
with open(audio_file, "rb") as f: | |
audio_data = f.read() | |
transcript = openai.Audio.transcribe("whisper-1", audio_data) | |
transcribed_text = transcript["text"] | |
except Exception as e: | |
print(f"OpenAI API error: {e}") | |
# Fallback to local Whisper model | |
# Load and preprocess the audio | |
audio_array, sampling_rate = librosa.load(audio_file, sr=16000) | |
input_features = self.whisper_processor( | |
audio_array, sampling_rate=16000, return_tensors="pt" | |
).input_features | |
# Generate token ids | |
predicted_ids = self.whisper_model.generate(input_features) | |
# Decode token ids to text | |
transcribed_text = self.whisper_processor.batch_decode( | |
predicted_ids, skip_special_tokens=True | |
)[0] | |
print(f"Transcribed text: {transcribed_text}") | |
return transcribed_text | |
def analyze_mood(self, text): | |
"""Analyze mood using BERT""" | |
print("Analyzing mood...") | |
results = self.sentiment_analyzer(text) | |
# Convert 1-5 star rating to mood scale | |
sentiment_score = int(results[0]['label'].split()[0]) | |
moods = { | |
1: "very negative", | |
2: "negative", | |
3: "neutral", | |
4: "positive", | |
5: "very positive" | |
} | |
detected_mood = moods[sentiment_score] | |
print(f"Detected mood: {detected_mood}") | |
return detected_mood, sentiment_score | |
def generate_response(self, text, mood, mood_score): | |
"""Generate mood enhancing response using OpenAI""" | |
print("Generating mood enhancing response...") | |
# Customize the prompt based on detected mood | |
if mood_score <= 2: | |
prompt = f""" | |
The user seems to be feeling {mood}. Their message was: "{text}" | |
Generate an empathetic and uplifting response that acknowledges their feelings | |
but helps shift their perspective to something more positive. Keep the response | |
conversational, warm and under 3 sentences. | |
""" | |
elif mood_score == 3: | |
prompt = f""" | |
The user seems to be feeling {mood}. Their message was: "{text}" | |
Generate a cheerful response that builds on any positive aspects of their message | |
and adds some encouraging thoughts. Keep the response conversational, | |
warm and under 3 sentences. | |
""" | |
else: | |
prompt = f""" | |
The user seems to be feeling {mood}. Their message was: "{text}" | |
Generate a response that celebrates their positive state and offers a way to | |
maintain or enhance this good feeling. Keep the response conversational, | |
warm and under 3 sentences. | |
""" | |
try: | |
# Updated for OpenAI's current API format | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages=[ | |
{"role": "system", "content": "You are an empathetic AI assistant designed to enhance the user's mood."}, | |
{"role": "user", "content": prompt} | |
], | |
max_tokens=150, | |
temperature=0.7 | |
) | |
enhanced_response = response['choices'][0]['message']['content'].strip() | |
print(f"Generated response: {enhanced_response}") | |
return enhanced_response | |
except Exception as e: | |
print(f"Error with OpenAI API: {e}") | |
# Fallback responses if API fails | |
if mood_score <= 2: | |
return "I notice you might be feeling down. Remember that challenging moments are temporary, and small positive steps can help shift your perspective." | |
elif mood_score == 3: | |
return "I sense a neutral mood. What's one small thing that brought you joy today? Focusing on positive moments, even tiny ones, can boost your overall wellbeing." | |
else: | |
return "It sounds like you're in a good mood! That's wonderful to hear. Savoring these positive feelings can help them last longer." | |
def text_to_speech(self, text): | |
"""Convert text to speech""" | |
print("Converting to speech...") | |
tts = gTTS(text=text, lang='en', slow=False) | |
output_path = "response.mp3" | |
tts.save(output_path) | |
return output_path | |
def process_text_input(self, text_input): | |
"""Process text input and return results""" | |
mood, mood_score = self.analyze_mood(text_input) | |
response = self.generate_response(text_input, mood, mood_score) | |
audio_file = self.text_to_speech(response) | |
return text_input, mood, mood_score, response, audio_file | |
def process_audio_input(self, audio_file): | |
"""Process audio input and return results""" | |
text = self.transcribe_audio(audio_file) | |
mood, mood_score = self.analyze_mood(text) | |
response = self.generate_response(text, mood, mood_score) | |
audio_response = self.text_to_speech(response) | |
return text, mood, mood_score, response, audio_response | |
# Initialize the model | |
model = MoodEnhancerModel() | |
# Create a Gradio interface for text input | |
def text_interface(text): | |
input_text, mood, mood_score, response, audio_file = model.process_text_input(text) | |
return mood, f"Mood score: {mood_score}/5", response, audio_file | |
# Create a Gradio interface for audio input | |
def audio_interface(audio): | |
input_text, mood, mood_score, response, audio_file = model.process_audio_input(audio) | |
return input_text, mood, f"Mood score: {mood_score}/5", response, audio_file | |
# Create Gradio tabs for different input types | |
with gr.Blocks(title="Mood Enhancer") as demo: | |
gr.Markdown("# Mood Enhancer") | |
gr.Markdown("Upload an audio file or enter text to analyze your mood and receive an uplifting response.") | |
with gr.Tabs(): | |
with gr.TabItem("Text Input"): | |
with gr.Row(): | |
text_input = gr.Textbox(label="Enter your text", placeholder="How are you feeling today?", lines=3) | |
text_button = gr.Button("Analyze Mood") | |
with gr.Row(): | |
text_mood = gr.Textbox(label="Detected Mood") | |
text_score = gr.Textbox(label="Mood Score") | |
text_response = gr.Textbox(label="Response", lines=3) | |
text_audio = gr.Audio(label="Audio Response") | |
text_button.click( | |
fn=text_interface, | |
inputs=text_input, | |
outputs=[text_mood, text_score, text_response, text_audio] | |
) | |
with gr.TabItem("Audio Input"): | |
audio_input = gr.Audio(label="Upload or Record Audio", type="filepath") | |
audio_button = gr.Button("Analyze Audio") | |
with gr.Row(): | |
transcribed_text = gr.Textbox(label="Transcribed Text") | |
with gr.Row(): | |
audio_mood = gr.Textbox(label="Detected Mood") | |
audio_score = gr.Textbox(label="Mood Score") | |
audio_response = gr.Textbox(label="Response", lines=3) | |
response_audio = gr.Audio(label="Audio Response") | |
audio_button.click( | |
fn=audio_interface, | |
inputs=audio_input, | |
outputs=[transcribed_text, audio_mood, audio_score, audio_response, response_audio] | |
) | |
# Launch the Gradio interface | |
demo.launch(share=True) |