freddyaboulton HF staff commited on
Commit
b88286b
·
verified ·
1 Parent(s): 0b1f5d0

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +25 -47
app.py CHANGED
@@ -62,44 +62,28 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
62
  api_key=os.getenv("GEMINI_API_KEY"), http_options={"api_version": "v1alpha"}
63
  )
64
  config = {"response_modalities": ["AUDIO"]}
65
- try:
66
- async with client.aio.live.connect(
67
- model="gemini-2.0-flash-exp", config=config
68
- ) as session:
69
- self.session = session
70
- print("set session")
71
- while not self.quit.is_set():
72
- turn = self.session.receive()
73
- async for response in turn:
74
- if data := response.data:
75
- audio = np.frombuffer(data, dtype=np.int16).reshape(1, -1)
76
- self.audio_queue.put_nowait(audio)
77
- except Exception as e:
78
- import traceback
79
-
80
- traceback.print_exc()
81
 
82
  async def video_receive(self, frame: np.ndarray):
83
- try:
84
- print("out")
85
- if self.session:
86
- print("here")
87
- # send image every 1 second
88
- print(time.time() - self.last_frame_time)
89
- if time.time() - self.last_frame_time > 1:
90
- self.last_frame_time = time.time()
91
- print("sending image")
92
- await self.session.send(input=encode_image(frame))
93
- print("sent image")
94
- if self.latest_args[1] is not None:
95
- print("sending image2")
96
- await self.session.send(input=encode_image(self.latest_args[1]))
97
- print("sent image2")
98
- except Exception as e:
99
- print(e)
100
- import traceback
101
-
102
- traceback.print_exc()
103
  self.video_queue.put_nowait(frame)
104
 
105
  async def video_emit(self):
@@ -110,13 +94,7 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
110
  array = array.squeeze()
111
  audio_message = encode_audio(array)
112
  if self.session:
113
- try:
114
- await self.session.send(input=audio_message)
115
- except Exception as e:
116
- print(e)
117
- import traceback
118
-
119
- traceback.print_exc()
120
 
121
  async def emit(self):
122
  array = await self.audio_queue.get()
@@ -142,8 +120,8 @@ stream = Stream(
142
  ],
143
  ui_args={
144
  "icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
145
- "pulse_color": "rgb(35, 157, 225)",
146
- "icon_button_color": "rgb(35, 157, 225)",
147
  "title": "Gemini Audio Video Chat",
148
  },
149
  )
@@ -179,8 +157,8 @@ with gr.Blocks(css=css) as demo:
179
  if get_space() == "spaces"
180
  else None,
181
  icon="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
182
- pulse_color="rgb(35, 157, 225)",
183
- icon_button_color="rgb(35, 157, 225)",
184
  )
185
  with gr.Column():
186
  image_input = gr.Image(
 
62
  api_key=os.getenv("GEMINI_API_KEY"), http_options={"api_version": "v1alpha"}
63
  )
64
  config = {"response_modalities": ["AUDIO"]}
65
+ async with client.aio.live.connect(
66
+ model="gemini-2.0-flash-exp", config=config
67
+ ) as session:
68
+ self.session = session
69
+ print("set session")
70
+ while not self.quit.is_set():
71
+ turn = self.session.receive()
72
+ async for response in turn:
73
+ if data := response.data:
74
+ audio = np.frombuffer(data, dtype=np.int16).reshape(1, -1)
75
+ self.audio_queue.put_nowait(audio)
 
 
 
 
 
76
 
77
  async def video_receive(self, frame: np.ndarray):
78
+ if self.session:
79
+ # send image every 1 second
80
+ print(time.time() - self.last_frame_time)
81
+ if time.time() - self.last_frame_time > 1:
82
+ self.last_frame_time = time.time()
83
+ await self.session.send(input=encode_image(frame))
84
+ if self.latest_args[1] is not None:
85
+ await self.session.send(input=encode_image(self.latest_args[1]))
86
+
 
 
 
 
 
 
 
 
 
 
 
87
  self.video_queue.put_nowait(frame)
88
 
89
  async def video_emit(self):
 
94
  array = array.squeeze()
95
  audio_message = encode_audio(array)
96
  if self.session:
97
+ await self.session.send(input=audio_message)
 
 
 
 
 
 
98
 
99
  async def emit(self):
100
  array = await self.audio_queue.get()
 
120
  ],
121
  ui_args={
122
  "icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
123
+ "pulse_color": "rgb(255, 255, 255)",
124
+ "icon_button_color": "rgb(255, 255, 255)",
125
  "title": "Gemini Audio Video Chat",
126
  },
127
  )
 
157
  if get_space() == "spaces"
158
  else None,
159
  icon="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
160
+ pulse_color="rgb(255, 255, 255)",
161
+ icon_button_color="rgb(255, 255, 255)",
162
  )
163
  with gr.Column():
164
  image_input = gr.Image(