Spaces:

usmanyousaf
/

virtual_therapist

Sleeping

App Files Files Community

usmanyousaf commited on Dec 20, 2024

Commit

61028a5

verified ·

1 Parent(s): 5affd96

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -10

app.py CHANGED Viewed

@@ -25,15 +25,18 @@ app.secret_key = os.urandom(24)
 # Store conversation history
 conversation_history = []
-# Synthesize therapist response to speech without permanent storage
-def synthesize_audio(text, model="aura-asteria-en"):
     try:
         options = SpeakOptions(model=model)
-        # Use a temporary file to hold audio data briefly
         with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_file:
             tmp_filename = tmp_file.name
-        # Synthesize the response and save it to the temporary file
         deepgram.speak.v("1").save(tmp_filename, {"text": text}, options)
         # Read the audio data into memory
@@ -43,8 +46,10 @@ def synthesize_audio(text, model="aura-asteria-en"):
         # Remove the temporary file
         os.remove(tmp_filename)
-        # Return the audio data as base64 encoded string
-        return base64.b64encode(audio_data).decode('utf-8')
     except Exception as e:
         raise ValueError(f"Speech synthesis failed: {str(e)}")
@@ -63,7 +68,7 @@ def process_audio():
     global conversation_history
     # Step 1: Accept audio input
-    audio_data = request.files.get('audio_data')  # Retrieve audio blob from client
     if not audio_data:
         return jsonify({'error': 'No audio file uploaded'}), 400
@@ -106,13 +111,14 @@ def process_audio():
         # Append assistant reply to conversation history
         conversation_history.append({"role": "assistant", "content": assistant_reply})
-        # Step 4: Synthesize therapist response to speech (no permanent saving)
-        audio_base64 = synthesize_audio(assistant_reply)
         return jsonify({
             'transcription': user_input,
             'response': assistant_reply,
-            'audioBase64': audio_base64
         })
     except Exception as e:

 # Store conversation history
 conversation_history = []
+# Synthesize therapist response to speech without permanently storing the file
+def synthesize_audio(text):
     try:
+        # Retrieve the selected voice or default to "aura-asteria-en"
+        model = session.get('selected_voice', 'aura-asteria-en')
         options = SpeakOptions(model=model)
+        # Use a temporary file to store the synthesized audio
         with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_file:
             tmp_filename = tmp_file.name
+        # Synthesize the response and save to the temporary file
         deepgram.speak.v("1").save(tmp_filename, {"text": text}, options)
         # Read the audio data into memory
         # Remove the temporary file
         os.remove(tmp_filename)
+        # Encode the audio data as base64 and return a data URI
+        audio_base64 = base64.b64encode(audio_data).decode('utf-8')
+        audio_data_uri = f"data:audio/mpeg;base64,{audio_base64}"
+        return audio_data_uri
     except Exception as e:
         raise ValueError(f"Speech synthesis failed: {str(e)}")
     global conversation_history
     # Step 1: Accept audio input
+    audio_data = request.files.get('audio_data')
     if not audio_data:
         return jsonify({'error': 'No audio file uploaded'}), 400
         # Append assistant reply to conversation history
         conversation_history.append({"role": "assistant", "content": assistant_reply})
+        # Step 4: Synthesize therapist response to speech (in memory, no permanent files)
+        audio_url = synthesize_audio(assistant_reply)
+        # Return data URI instead of file URL
         return jsonify({
             'transcription': user_input,
             'response': assistant_reply,
+            'audioUrl': audio_url
         })
     except Exception as e: