VoiceBot

Sleeping

App Files Files Community

Chris4K commited on Feb 4

Commit

1e952bf

verified ·

1 Parent(s): c851a9c

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -40

app.py CHANGED Viewed

@@ -68,36 +68,71 @@ async def detect_wakeword(audio_chunk: bytes) -> bool:
     # You might want to use libraries like Porcupine or build your own wake word detector
     return True
-async def process_audio_stream(websocket: WebSocket) -> AsyncGenerator[str, None]:
     buffer = []
     is_speaking = False
     silence_frames = 0
     while True:
         try:
-            # Add a timeout to prevent indefinite waiting
             try:
-                audio_data = await asyncio.wait_for(websocket.receive_bytes(), timeout=5.0)
             except asyncio.TimeoutError:
-                print("WebSocket receive timeout")
                 continue
-            except Exception as receive_error:
-                print(f"Error receiving audio data: {receive_error}")
-                # Break the loop if there's a persistent receive error
-                break
             # Validate audio data
-            if not audio_data or len(audio_data) == 0:
-                print("Received empty audio data")
-                continue
-            # Ensure audio data meets minimum size for VAD processing
-            if len(audio_data) < CHUNK_SIZE:
-                print(f"Audio chunk too small: {len(audio_data)} bytes")
                 continue
             try:
-                # Convert audio data to the right format for VAD
                 is_speech = vad.is_speech(audio_data, SAMPLE_RATE)
             except Exception as vad_error:
                 print(f"VAD processing error: {vad_error}")
@@ -133,7 +168,7 @@ async def process_audio_stream(websocket: WebSocket) -> AsyncGenerator[str, None
                             user_speech_text = stt(wav_buffer, desired_language)
                             if "computer" in user_speech_text.lower():
                                 translated_text = to_en_translation(user_speech_text, desired_language)
-                                response = await agent.arun(translated_text)  # Assuming agent.run is made async
                                 bot_response_de = from_en_translation(response, desired_language)
                                 # Stream the response
@@ -153,38 +188,20 @@ async def process_audio_stream(websocket: WebSocket) -> AsyncGenerator[str, None
                     except Exception as processing_error:
                         print(f"Error processing speech utterance: {processing_error}")
         except Exception as e:
             print(f"Unexpected error in audio stream processing: {e}")
-            # Add a small delay to prevent rapid reconnection attempts
             await asyncio.sleep(1)
-            break
 @app.get("/", response_class=HTMLResponse)
 async def get_index():
     with open("static/index.html") as f:
         return f.read()
-@app.websocket("/ws")
-async def websocket_endpoint(websocket: WebSocket):
-    await websocket.accept()
-    try:
-        # Keep the WebSocket connection open with a persistent loop
-        while True:
-            try:
-                async for response in process_audio_stream(websocket):
-                    await websocket.send_text(response)
-            except Exception as stream_error:
-                print(f"Audio stream error: {stream_error}")
-                # Attempt to restart the stream
-                await asyncio.sleep(1)
-    except Exception as e:
-        print(f"WebSocket endpoint error: {e}")
-    finally:
-        try:
-            await websocket.close(code=1000)
-        except Exception as close_error:
-            print(f"Error closing WebSocket: {close_error}")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

     # You might want to use libraries like Porcupine or build your own wake word detector
     return True
+@app.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket):
+    await websocket.accept()
+    try:
+        # Use a queue to manage audio chunks
+        audio_queue = asyncio.Queue()
+        # Create a task to process the audio stream
+        stream_task = asyncio.create_task(process_audio_stream(audio_queue))
+        # Main receive loop
+        while True:
+            try:
+                # Try to receive audio data with a timeout
+                audio_data = await asyncio.wait_for(websocket.receive_bytes(), timeout=5.0)
+                # Put audio data into queue
+                await audio_queue.put(audio_data)
+            except asyncio.TimeoutError:
+                # Timeout is normal, just continue
+                continue
+            except WebSocketDisconnect:
+                # Handle clean disconnection
+                print("WebSocket disconnected")
+                break
+            except Exception as e:
+                print(f"WebSocket receive error: {e}")
+                break
+    except Exception as e:
+        print(f"WebSocket endpoint error: {e}")
+    finally:
+        # Cancel the stream processing task
+        stream_task.cancel()
+        try:
+            await websocket.close(code=1000)
+        except Exception as close_error:
+            print(f"Error closing WebSocket: {close_error}")
+async def process_audio_stream(audio_queue: asyncio.Queue) -> AsyncGenerator[str, None]:
     buffer = []
     is_speaking = False
     silence_frames = 0
     while True:
         try:
+            # Get audio data from queue with timeout
             try:
+                audio_data = await asyncio.wait_for(audio_queue.get(), timeout=5.0)
             except asyncio.TimeoutError:
+                # No audio for a while, reset state
+                buffer = []
+                is_speaking = False
+                silence_frames = 0
                 continue
             # Validate audio data
+            if not audio_data or len(audio_data) < CHUNK_SIZE:
                 continue
             try:
                 is_speech = vad.is_speech(audio_data, SAMPLE_RATE)
             except Exception as vad_error:
                 print(f"VAD processing error: {vad_error}")
                             user_speech_text = stt(wav_buffer, desired_language)
                             if "computer" in user_speech_text.lower():
                                 translated_text = to_en_translation(user_speech_text, desired_language)
+                                response = await agent.arun(translated_text)
                                 bot_response_de = from_en_translation(response, desired_language)
                                 # Stream the response
                     except Exception as processing_error:
                         print(f"Error processing speech utterance: {processing_error}")
+        except asyncio.CancelledError:
+            # Handle task cancellation
+            break
         except Exception as e:
             print(f"Unexpected error in audio stream processing: {e}")
+            # Prevent tight error loop
             await asyncio.sleep(1)
 @app.get("/", response_class=HTMLResponse)
 async def get_index():
     with open("static/index.html") as f:
         return f.read()
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)