Ggwavephone / app.py
nakas's picture
Update app.py
87484df verified
import gradio as gr
import numpy as np
import subprocess
import tempfile
import os
import json
from pathlib import Path
import base64
import wave
# Install ggwave if not already installed
try:
import ggwave
except ImportError:
subprocess.run(["pip", "install", "ggwave"], check=True)
import ggwave
class GGWaveTransceiver:
def __init__(self):
self.sample_rate = 48000
self.protocols = {
'Normal': 0, # Normal audible
'Fast': 1, # Fast audible
'Fastest': 2, # Fastest audible
'Ultrasonic Normal': 3, # Ultrasonic normal
'Ultrasonic Fast': 4, # Ultrasonic fast
'Ultrasonic Fastest': 5, # Ultrasonic fastest
'DT Normal': 6, # Data Transfer normal
'DT Fast': 7, # Data Transfer fast
'DT Fastest': 8, # Data Transfer fastest
}
def encode_text_to_audio(self, text, protocol='Normal', volume=50):
"""Encode text to audio using ggwave"""
try:
protocol_id = self.protocols.get(protocol, 0)
# Use the simple ggwave.encode function
audio_data = ggwave.encode(text, protocolId=protocol_id, volume=volume)
if audio_data is None:
return None, "Failed to encode text"
# Convert bytes to numpy array (ggwave returns raw bytes)
audio_array = np.frombuffer(audio_data, dtype=np.float32)
# Create temporary WAV file
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
with wave.open(tmp_file.name, 'w') as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(self.sample_rate)
# Convert float32 to int16
audio_int16 = (audio_array * 32767).astype(np.int16)
wav_file.writeframes(audio_int16.tobytes())
return tmp_file.name, f"Successfully encoded: '{text}' using {protocol} protocol"
except Exception as e:
return None, f"Error encoding: {str(e)}"
def decode_audio_to_text(self, audio_file):
"""Decode audio file to text using ggwave"""
try:
if audio_file is None:
return "No audio file provided"
# Read audio file
with wave.open(audio_file, 'r') as wav_file:
frames = wav_file.readframes(wav_file.getnframes())
audio_data = np.frombuffer(frames, dtype=np.int16)
# Convert to float32
audio_float = audio_data.astype(np.float32) / 32767.0
# Initialize ggwave instance for decoding
instance = ggwave.init()
try:
# Decode audio in chunks
chunk_size = 1024
decoded_text = None
for i in range(0, len(audio_float), chunk_size):
chunk = audio_float[i:i+chunk_size]
# Convert chunk to bytes for ggwave
chunk_bytes = chunk.astype(np.float32).tobytes()
# Try to decode this chunk
result = ggwave.decode(instance, chunk_bytes)
if result is not None:
decoded_text = result.decode('utf-8')
break
if decoded_text:
return f"Decoded message: {decoded_text}"
else:
return "No valid ggwave signal detected in audio"
finally:
# Clean up the instance
ggwave.free(instance)
except Exception as e:
return f"Error decoding: {str(e)}"
# Initialize the transceiver
transceiver = GGWaveTransceiver()
# Gradio interface functions
def encode_text(text, protocol, volume):
"""Encode text to audio for Gradio"""
if not text.strip():
return None, "Please enter text to encode"
audio_file, message = transceiver.encode_text_to_audio(text, protocol, volume)
return audio_file, message
def decode_audio(audio_file):
"""Decode audio to text for Gradio"""
result = transceiver.decode_audio_to_text(audio_file)
return result
def create_demo_audio():
"""Create a demo audio file"""
demo_text = "Hello from ggwave!"
try:
# Use the simple ggwave.encode function directly
audio_data = ggwave.encode(demo_text, protocolId=0, volume=50)
if audio_data is None:
return None
# Convert to numpy array and create WAV file
audio_array = np.frombuffer(audio_data, dtype=np.float32)
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
with wave.open(tmp_file.name, 'w') as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(48000)
# Convert float32 to int16
audio_int16 = (audio_array * 32767).astype(np.int16)
wav_file.writeframes(audio_int16.tobytes())
return tmp_file.name
except Exception as e:
print(f"Error creating demo: {e}")
return None
# Phone integration helper functions
def get_twilio_webhook_code():
"""Return sample Twilio webhook code"""
return '''
# Twilio Webhook Integration Example
from flask import Flask, request
from twilio.twiml import VoiceResponse
import requests
app = Flask(__name__)
@app.route("/webhook", methods=['POST'])
def handle_call():
"""Handle incoming Twilio call"""
response = VoiceResponse()
# Record the call audio for ggwave processing
response.record(
max_length=30,
action='/process_recording',
transcribe=False,
play_beep=False
)
return str(response)
@app.route("/process_recording", methods=['POST'])
def process_recording():
"""Process recorded audio through ggwave"""
recording_url = request.form['RecordingUrl']
# Download the recording
audio_response = requests.get(recording_url)
# Send to your ggwave processing endpoint
# (This would be your Gradio space or separate service)
result = process_with_ggwave(audio_response.content)
# Respond back to caller
response = VoiceResponse()
response.say(f"Received data: {result}")
return str(response)
'''
def get_deployment_instructions():
"""Return deployment instructions"""
return '''
# Deployment Instructions for Phone Integration
## Option 1: Twilio Integration
1. Sign up for Twilio account
2. Get a phone number
3. Set up webhook URL pointing to your server
4. Use the provided webhook code to handle calls
## Option 2: Vonage/Nexmo Integration
Similar to Twilio, but with Vonage APIs
## Option 3: Direct SIP Integration
For more advanced users who want direct SIP handling
## Hugging Face Spaces Limitations:
- Spaces don't directly support phone call handling
- You'll need a separate service (like Heroku, Railway, etc.) for telephony
- Use this Space as your ggwave processing engine
- Call it via API from your phone handling service
## Architecture:
Phone Call β†’ Telephony Service β†’ Your Server β†’ HF Space (ggwave) β†’ Response
'''
# Create Gradio interface
with gr.Blocks(title="GGWave Phone Data Transfer", theme=gr.themes.Soft()) as app:
gr.Markdown("""
# 🎡 GGWave Phone Data Transfer System
This system demonstrates data transmission over audio using ggwave. Perfect for sending data through phone calls!
**How it works:**
1. Encode text into audio signals
2. Play the audio during a phone call
3. Decode the received audio back to text
""")
with gr.Tabs():
# Encoding tab
with gr.Tab("πŸ“€ Encode Text to Audio"):
with gr.Row():
with gr.Column():
encode_input = gr.Textbox(
label="Text to Encode",
placeholder="Enter your message here...",
lines=3
)
with gr.Row():
protocol_choice = gr.Dropdown(
choices=list(transceiver.protocols.keys()),
value="Normal",
label="Protocol"
)
volume_slider = gr.Slider(
minimum=10,
maximum=100,
value=50,
label="Volume %"
)
encode_btn = gr.Button("🎡 Generate Audio", variant="primary")
with gr.Column():
encoded_audio = gr.Audio(
label="Generated Audio",
type="filepath"
)
encode_status = gr.Textbox(
label="Status",
interactive=False
)
# Decoding tab
with gr.Tab("πŸ“₯ Decode Audio to Text"):
with gr.Row():
with gr.Column():
decode_audio_input = gr.Audio(
label="Upload Audio File",
type="filepath"
)
decode_btn = gr.Button("πŸ” Decode Audio", variant="primary")
with gr.Column():
decoded_result = gr.Textbox(
label="Decoded Message",
interactive=False,
lines=5
)
# Phone integration tab
with gr.Tab("πŸ“ž Phone Integration"):
gr.Markdown("""
## Setting up Phone Call Integration
Since Hugging Face Spaces can't directly handle phone calls, you'll need to integrate with a telephony service:
""")
with gr.Accordion("πŸ“‹ Twilio Webhook Code", open=False):
twilio_code = gr.Code(
value=get_twilio_webhook_code(),
language="python",
label="Twilio Integration Code"
)
with gr.Accordion("πŸš€ Deployment Instructions", open=True):
deployment_info = gr.Markdown(get_deployment_instructions())
gr.Markdown("""
## API Usage
You can call this Space programmatically:
```python
import requests
# For encoding
response = requests.post(
"https://your-space-url/api/predict",
json={"data": ["your text", "Normal", 50]}
)
# Download the generated audio
audio_url = response.json()["data"][0]
```
""")
# Testing tab
with gr.Tab("πŸ§ͺ Test & Demo"):
gr.Markdown("## Try the Demo")
with gr.Row():
demo_btn = gr.Button("🎯 Generate Demo Audio", variant="secondary")
demo_audio = gr.Audio(label="Demo Audio")
gr.Markdown("""
## Testing Workflow:
1. Click "Generate Demo Audio" above
2. Download the audio file
3. Go to the "Decode" tab and upload it
4. Verify it decodes correctly
## Phone Testing:
1. Generate audio for your message
2. Call your phone integration service
3. Play the audio during the call
4. Verify the server receives the correct data
""")
# Event handlers
encode_btn.click(
fn=encode_text,
inputs=[encode_input, protocol_choice, volume_slider],
outputs=[encoded_audio, encode_status]
)
decode_btn.click(
fn=decode_audio,
inputs=[decode_audio_input],
outputs=[decoded_result]
)
demo_btn.click(
fn=create_demo_audio,
outputs=[demo_audio]
)
# Launch the app
if __name__ == "__main__":
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=True
)