gemma-3n

Running on Zero

App Files Files Community

freddyaboulton HF Staff commited on about 16 hours ago

Commit

6a84ce9

1 Parent(s): 52f5c65

first draft

Browse files

Files changed (2) hide show

app.py +36 -50
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -9,8 +9,10 @@ import gradio as gr
 import spaces
 import torch
 from gradio.utils import get_upload_folder
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.generation.streamers import TextIteratorStreamer
 model_id = "google/gemma-3n-E4B-it"
@@ -152,9 +154,8 @@ def process_history(history: list[dict]) -> list[dict]:
     return messages
-@spaces.GPU(duration=120)
 @torch.inference_mode()
-def generate(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message):
         yield ""
         return
@@ -199,54 +200,39 @@ def generate(message: dict, history: list[dict], system_prompt: str = "", max_ne
         yield output
-examples = [
-    [
-        {
-            "text": "What is the capital of France?",
-            "files": [],
-        }
-    ],
-    [
-        {
-            "text": "Describe this image in detail.",
-            "files": ["assets/cat.jpeg"],
-        }
-    ],
-    [
-        {
-            "text": "Transcribe the following speech segment in English.",
-            "files": ["assets/speech.wav"],
-        }
-    ],
-    [
-        {
-            "text": "Transcribe the following speech segment in English.",
-            "files": ["assets/speech2.wav"],
-        }
-    ],
-]
-demo = gr.ChatInterface(
-    fn=generate,
-    type="messages",
-    textbox=gr.MultimodalTextbox(
-        file_types=list(IMAGE_FILE_TYPES + VIDEO_FILE_TYPES + AUDIO_FILE_TYPES),
-        file_count="multiple",
-        autofocus=True,
-    ),
-    multimodal=True,
-    additional_inputs=[
-        gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
-        gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700),
-    ],
-    stop_btn=False,
-    title="Gemma 3n E4B it",
-    examples=examples,
-    run_examples_on_click=False,
-    cache_examples=False,
-    css_paths="style.css",
-    delete_cache=(1800, 1800),
-)
 if __name__ == "__main__":
     demo.launch()

 import spaces
 import torch
 from gradio.utils import get_upload_folder
+from gradio.processing_utils import save_audio_to_cache
 from transformers import AutoModelForImageTextToText, AutoProcessor
 from transformers.generation.streamers import TextIteratorStreamer
+from fastrtc import ReplyOnPause, WebRTCData, WebRTC, AdditionalOutputs, get_hf_turn_credentials
 model_id = "google/gemma-3n-E4B-it"
     return messages
 @torch.inference_mode()
+def _generate(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message):
         yield ""
         return
         yield output
+@spaces.GPU(time_limit=120)
+def generate(data: WebRTCData, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512, image=None):
+    message = {"text": data.textbox, "files": [save_audio_to_cache(data.audio[1], data.audio[0], format="mp3", cache_dir=get_upload_folder())]}
+    new_message = {"role": "assistant", "content": ""}
+    for output in _generate(message, history, system_prompt, max_new_tokens):
+        new_message["content"] += output
+        yield AdditionalOutputs(history + [new_message])
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot(type="messages")
+    webrtc = WebRTC(
+        modality="audio",
+        mode="send",
+        variant="textbox",
+        rtc_configuration=get_hf_turn_credentials,
+        server_rtc_configuration=get_hf_turn_credentials(ttl=3_600 * 24 * 30)
+    )
+    with gr.Accordion(label="Additional Inputs"):
+        sp = gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
+        slider = gr.Slider(label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700)
+        image = gr.Image()
+    webrtc.stream(
+        ReplyOnPause(response),  # type: ignore
+        inputs=[webrtc, chatbot, sp, slider, image],
+        outputs=[chatbot],
+        concurrency_limit=100,
+    )
+    webrtc.on_additional_outputs(
+        lambda old, new: new, inputs=[chatbot], outputs=[chatbot], concurrency_limit=100
+    )
 if __name__ == "__main__":
     demo.launch()

requirements.txt CHANGED Viewed

@@ -311,3 +311,4 @@ uvicorn==0.34.3
     # via gradio
 websockets==15.0.1
     # via gradio-client

     # via gradio
 websockets==15.0.1
     # via gradio-client
+fastrtc[vad]