Ggwavephone / app.py
nakas's picture
Create app.py
df0a64d verified
raw
history blame
11.5 kB
import gradio as gr
import numpy as np
import subprocess
import tempfile
import os
import json
from pathlib import Path
import base64
import wave
# Install ggwave if not already installed
try:
import ggwave
except ImportError:
subprocess.run(["pip", "install", "ggwave"], check=True)
import ggwave
class GGWaveTransceiver:
def __init__(self):
self.sample_rate = 48000
self.protocols = {
'Normal': ggwave.GGWAVE_PROTOCOL_AUDIBLE_NORMAL,
'Fast': ggwave.GGWAVE_PROTOCOL_AUDIBLE_FAST,
'Fastest': ggwave.GGWAVE_PROTOCOL_AUDIBLE_FASTEST,
'Ultrasonic': ggwave.GGWAVE_PROTOCOL_ULTRASONIC_NORMAL,
'DT (Data Transfer)': ggwave.GGWAVE_PROTOCOL_DT_NORMAL,
}
def encode_text_to_audio(self, text, protocol='Normal', volume=50):
"""Encode text to audio using ggwave"""
try:
protocol_id = self.protocols.get(protocol, ggwave.GGWAVE_PROTOCOL_AUDIBLE_NORMAL)
# Create ggwave instance
instance = ggwave.GGWave(
sample_rate=self.sample_rate,
samples_per_frame=1024,
sound_marker_threshold=0.1,
marker_duration=0.2
)
# Encode text to audio
audio_data = instance.encode(text.encode('utf-8'), protocol_id, volume)
if audio_data is None:
return None, "Failed to encode text"
# Convert to numpy array
audio_array = np.frombuffer(audio_data, dtype=np.float32)
# Create temporary WAV file
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
with wave.open(tmp_file.name, 'w') as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(self.sample_rate)
# Convert float32 to int16
audio_int16 = (audio_array * 32767).astype(np.int16)
wav_file.writeframes(audio_int16.tobytes())
return tmp_file.name, f"Successfully encoded: '{text}' using {protocol} protocol"
except Exception as e:
return None, f"Error encoding: {str(e)}"
def decode_audio_to_text(self, audio_file):
"""Decode audio file to text using ggwave"""
try:
if audio_file is None:
return "No audio file provided"
# Read audio file
with wave.open(audio_file, 'r') as wav_file:
frames = wav_file.readframes(wav_file.getnframes())
audio_data = np.frombuffer(frames, dtype=np.int16)
# Convert to float32
audio_float = audio_data.astype(np.float32) / 32767.0
# Create ggwave instance
instance = ggwave.GGWave(
sample_rate=self.sample_rate,
samples_per_frame=1024,
sound_marker_threshold=0.1,
marker_duration=0.2
)
# Decode audio
decoded = instance.decode(audio_float.tobytes())
if decoded:
return f"Decoded message: {decoded.decode('utf-8')}"
else:
return "No valid ggwave signal detected in audio"
except Exception as e:
return f"Error decoding: {str(e)}"
# Initialize the transceiver
transceiver = GGWaveTransceiver()
# Gradio interface functions
def encode_text(text, protocol, volume):
"""Encode text to audio for Gradio"""
if not text.strip():
return None, "Please enter text to encode"
audio_file, message = transceiver.encode_text_to_audio(text, protocol, volume)
return audio_file, message
def decode_audio(audio_file):
"""Decode audio to text for Gradio"""
result = transceiver.decode_audio_to_text(audio_file)
return result
def create_demo_audio():
"""Create a demo audio file"""
demo_text = "Hello from ggwave!"
audio_file, _ = transceiver.encode_text_to_audio(demo_text, 'Normal', 50)
return audio_file
# Phone integration helper functions
def get_twilio_webhook_code():
"""Return sample Twilio webhook code"""
return '''
# Twilio Webhook Integration Example
from flask import Flask, request
from twilio.twiml import VoiceResponse
import requests
app = Flask(__name__)
@app.route("/webhook", methods=['POST'])
def handle_call():
"""Handle incoming Twilio call"""
response = VoiceResponse()
# Record the call audio for ggwave processing
response.record(
max_length=30,
action='/process_recording',
transcribe=False,
play_beep=False
)
return str(response)
@app.route("/process_recording", methods=['POST'])
def process_recording():
"""Process recorded audio through ggwave"""
recording_url = request.form['RecordingUrl']
# Download the recording
audio_response = requests.get(recording_url)
# Send to your ggwave processing endpoint
# (This would be your Gradio space or separate service)
result = process_with_ggwave(audio_response.content)
# Respond back to caller
response = VoiceResponse()
response.say(f"Received data: {result}")
return str(response)
'''
def get_deployment_instructions():
"""Return deployment instructions"""
return '''
# Deployment Instructions for Phone Integration
## Option 1: Twilio Integration
1. Sign up for Twilio account
2. Get a phone number
3. Set up webhook URL pointing to your server
4. Use the provided webhook code to handle calls
## Option 2: Vonage/Nexmo Integration
Similar to Twilio, but with Vonage APIs
## Option 3: Direct SIP Integration
For more advanced users who want direct SIP handling
## Hugging Face Spaces Limitations:
- Spaces don't directly support phone call handling
- You'll need a separate service (like Heroku, Railway, etc.) for telephony
- Use this Space as your ggwave processing engine
- Call it via API from your phone handling service
## Architecture:
Phone Call β†’ Telephony Service β†’ Your Server β†’ HF Space (ggwave) β†’ Response
'''
# Create Gradio interface
with gr.Blocks(title="GGWave Phone Data Transfer", theme=gr.themes.Soft()) as app:
gr.Markdown("""
# 🎡 GGWave Phone Data Transfer System
This system demonstrates data transmission over audio using ggwave. Perfect for sending data through phone calls!
**How it works:**
1. Encode text into audio signals
2. Play the audio during a phone call
3. Decode the received audio back to text
""")
with gr.Tabs():
# Encoding tab
with gr.Tab("πŸ“€ Encode Text to Audio"):
with gr.Row():
with gr.Column():
encode_input = gr.Textbox(
label="Text to Encode",
placeholder="Enter your message here...",
lines=3
)
with gr.Row():
protocol_choice = gr.Dropdown(
choices=list(transceiver.protocols.keys()),
value="Normal",
label="Protocol"
)
volume_slider = gr.Slider(
minimum=10,
maximum=100,
value=50,
label="Volume %"
)
encode_btn = gr.Button("🎡 Generate Audio", variant="primary")
with gr.Column():
encoded_audio = gr.Audio(
label="Generated Audio",
type="filepath"
)
encode_status = gr.Textbox(
label="Status",
interactive=False
)
# Decoding tab
with gr.Tab("πŸ“₯ Decode Audio to Text"):
with gr.Row():
with gr.Column():
decode_audio_input = gr.Audio(
label="Upload Audio File",
type="filepath"
)
decode_btn = gr.Button("πŸ” Decode Audio", variant="primary")
with gr.Column():
decoded_result = gr.Textbox(
label="Decoded Message",
interactive=False,
lines=5
)
# Phone integration tab
with gr.Tab("πŸ“ž Phone Integration"):
gr.Markdown("""
## Setting up Phone Call Integration
Since Hugging Face Spaces can't directly handle phone calls, you'll need to integrate with a telephony service:
""")
with gr.Accordion("πŸ“‹ Twilio Webhook Code", open=False):
twilio_code = gr.Code(
value=get_twilio_webhook_code(),
language="python",
label="Twilio Integration Code"
)
with gr.Accordion("πŸš€ Deployment Instructions", open=True):
deployment_info = gr.Markdown(get_deployment_instructions())
gr.Markdown("""
## API Usage
You can call this Space programmatically:
```python
import requests
# For encoding
response = requests.post(
"https://your-space-url/api/predict",
json={"data": ["your text", "Normal", 50]}
)
# Download the generated audio
audio_url = response.json()["data"][0]
```
""")
# Testing tab
with gr.Tab("πŸ§ͺ Test & Demo"):
gr.Markdown("## Try the Demo")
with gr.Row():
demo_btn = gr.Button("🎯 Generate Demo Audio", variant="secondary")
demo_audio = gr.Audio(label="Demo Audio")
gr.Markdown("""
## Testing Workflow:
1. Click "Generate Demo Audio" above
2. Download the audio file
3. Go to the "Decode" tab and upload it
4. Verify it decodes correctly
## Phone Testing:
1. Generate audio for your message
2. Call your phone integration service
3. Play the audio during the call
4. Verify the server receives the correct data
""")
# Event handlers
encode_btn.click(
fn=encode_text,
inputs=[encode_input, protocol_choice, volume_slider],
outputs=[encoded_audio, encode_status]
)
decode_btn.click(
fn=decode_audio,
inputs=[decode_audio_input],
outputs=[decoded_result]
)
demo_btn.click(
fn=create_demo_audio,
outputs=[demo_audio]
)
# Launch the app
if __name__ == "__main__":
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=True
)