Spaces:

RinggAI
/

STT

Running

App Files Files Community

utkarshshukla2912 commited on Oct 31

Commit

033af1b

1 Parent(s): 6d80c52

UI improvements

Browse files

Files changed (1) hide show

app.py +121 -89

app.py CHANGED Viewed

@@ -25,6 +25,58 @@ custom_css = """
     border-radius: 10px;
     margin-bottom: 20px;
 }
 footer {
     visibility: hidden !important;
     height: 50px !important;
@@ -42,68 +94,65 @@ footer:after {
 # Backend API endpoint (ngrok URL)
 # You can update this via Hugging Face Space Secrets
-API_ENDPOINT = os.environ.get("STT_API_ENDPOINT", "https://unintuitional-vibrational-jordy.ngrok-free.dev")
 class RinggSTTClient:
     """Client for Ringg STT API"""
     def __init__(self, api_endpoint: str):
-        self.api_endpoint = api_endpoint.rstrip('/')
         self.session = requests.Session()
-        self.session.headers.update({
-            'User-Agent': 'RinggSTT-HF-Space/1.0'
-        })
     def check_health(self) -> dict:
         """Check if the API is available"""
         try:
-            response = self.session.get(
-                f"{self.api_endpoint}/health",
-                timeout=5
-            )
             if response.status_code == 200:
                 return {"status": "healthy", "message": "✅ API is online"}
             else:
-                return {"status": "error", "message": f"❌ API returned status {response.status_code}"}
         except requests.exceptions.Timeout:
             return {"status": "error", "message": "⏱️ API request timed out"}
         except requests.exceptions.ConnectionError:
             return {"status": "error", "message": "❌ Cannot connect to API"}
         except Exception as e:
             return {"status": "error", "message": f"❌ Error: {str(e)}"}
     def transcribe_audio(self, audio_file_path: str) -> str:
         """Transcribe audio file via API"""
         try:
             # Read audio file and encode as base64
-            with open(audio_file_path, 'rb') as f:
                 audio_data = f.read()
-            audio_base64 = base64.b64encode(audio_data).decode('utf-8')
             # Make API request
             response = self.session.post(
                 f"{self.api_endpoint}/transcribe",
-                json={
-                    "audio_data": audio_base64,
-                    "sample_rate": 16000
-                },
-                timeout=30
             )
             if response.status_code == 200:
                 result = response.json()
                 return result.get("transcription", "No transcription received")
             else:
                 return f"❌ API Error: {response.status_code} - {response.text}"
         except requests.exceptions.Timeout:
             return "⏱️ Request timed out. The audio file might be too long."
         except requests.exceptions.ConnectionError:
             return "❌ Cannot connect to the transcription service. Please try again later."
         except Exception as e:
             return f"❌ Error: {str(e)}"
 # Initialize API client
 print(f"🔗 Connecting to STT API: {API_ENDPOINT}")
 stt_client = RinggSTTClient(API_ENDPOINT)
@@ -115,81 +164,35 @@ print(f"API Health: {health_status}")
 def create_interface():
     """Create Gradio interface"""
     def transcribe_audio(audio_file):
         """Transcribe uploaded audio"""
         if audio_file is None:
             return "Please upload an audio file!"
         return stt_client.transcribe_audio(audio_file)
     def check_api_status():
         """Check API health status"""
         health = stt_client.check_health()
         return health["message"]
     # Create interface
-    with gr.Blocks(title="Ringg STT V0", theme=gr.themes.Soft(), css=custom_css) as demo:
-        gr.Markdown("""
         <div class="main-header">
         <h1>🎙️ Ringg STT V0</h1>
-        <p>High-Accuracy Hindi Speech-to-Text</p>
         </div>
         """)
-        # API Status indicator
-        with gr.Row():
-            with gr.Column(scale=4):
-                api_status = gr.Textbox(
-                    label="🔌 API Status",
-                    value=health_status["message"],
-                    interactive=False
-                )
-            with gr.Column(scale=1):
-                check_btn = gr.Button("🔄 Check Status", size="sm")
-                check_btn.click(check_api_status, outputs=api_status)
-        gr.Markdown("""
-        ### 📁 File Upload
-        Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
-        """)
-        audio_input = gr.Audio(
-            label="📁 Upload Audio File",
-            type="filepath",
-            sources=["upload"]
-        )
-        transcribe_btn = gr.Button("🔄 Transcribe", variant="primary", size="lg")
-        file_output = gr.Textbox(
-            label="Transcription Result",
-            lines=8,
-            interactive=False,
-            placeholder="Upload a file and click Transcribe..."
-        )
-        transcribe_btn.click(
-            transcribe_audio,
-            inputs=audio_input,
-            outputs=file_output
         )
-        gr.Markdown("""
-        ### ✨ Features
-        - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
-        - 🎯 **High Accuracy**: Competitive with leading ASR models
-        - 📁 **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
-        - ⚡ **Fast Processing**: Optimized for quick transcription
-        """)
-        # Performance Comparison Table
-        gr.Markdown("""
-        ## Performance Benchmarks
-        Our model achieves **state-of-the-art performance** on Hindi speech recognition benchmarks:
-        """)
         with gr.Row():
             gr.DataFrame(
                 value=[
@@ -203,20 +206,49 @@ def create_interface():
                 datatype=["str", "str", "str"],
                 row_count=5,
                 col_count=(3, "fixed"),
-                label="Word Error Rate Comparison (Lower is Better)"
             )
         gr.Markdown("""
-        **Ringg STT V0** ranks **2nd** among top models, outperforming OpenAI Whisper Large-v3 and other leading solutions.
-        Lower WER (Word Error Rate) indicates better accuracy. Our model delivers competitive performance for Hindi transcription tasks.
         """)
         gr.Markdown("""
-        ### 🙏 Acknowledgements
         - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
         """)
     return demo

     border-radius: 10px;
     margin-bottom: 20px;
 }
+.status-dot {
+    display: inline-block;
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+    margin-left: 8px;
+}
+.status-dot.healthy {
+    background-color: #22c55e;
+    animation: pulse-green 2s ease-in-out infinite;
+}
+.status-dot.error {
+    background-color: #ef4444;
+    animation: pulse-red 2s ease-in-out infinite;
+}
+@keyframes pulse-green {
+    0% {
+        box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
+    }
+    70% {
+        box-shadow: 0 0 0 6px rgba(34, 197, 94, 0);
+    }
+    100% {
+        box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
+    }
+}
+@keyframes pulse-red {
+    0% {
+        box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.7);
+    }
+    70% {
+        box-shadow: 0 0 0 6px rgba(239, 68, 68, 0);
+    }
+    100% {
+        box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
+    }
+}
+div[data-testid="audio"] {
+    min-height: 60px !important;
+    max-height: 80px !important;
+}
+div[data-testid="audio"] > div {
+    height: auto !important;
+    min-height: auto !important;
+}
+.wrap.wrap.wrap.svelte-1w6y6zl {
+    height: auto !important;
+    min-height: auto !important;
+}
+.gradio-row {
+    min-height: auto !important;
+}
 footer {
     visibility: hidden !important;
     height: 50px !important;
 # Backend API endpoint (ngrok URL)
 # You can update this via Hugging Face Space Secrets
+API_ENDPOINT = os.environ.get("STT_API_ENDPOINT", "http://13.234.40.75:7861")
 class RinggSTTClient:
     """Client for Ringg STT API"""
     def __init__(self, api_endpoint: str):
+        self.api_endpoint = api_endpoint.rstrip("/")
         self.session = requests.Session()
+        self.session.headers.update({"User-Agent": "RinggSTT-HF-Space/1.0"})
     def check_health(self) -> dict:
         """Check if the API is available"""
         try:
+            response = self.session.get(f"{self.api_endpoint}/health", timeout=5)
             if response.status_code == 200:
                 return {"status": "healthy", "message": "✅ API is online"}
             else:
+                return {
+                    "status": "error",
+                    "message": f"❌ API returned status {response.status_code}",
+                }
         except requests.exceptions.Timeout:
             return {"status": "error", "message": "⏱️ API request timed out"}
         except requests.exceptions.ConnectionError:
             return {"status": "error", "message": "❌ Cannot connect to API"}
         except Exception as e:
             return {"status": "error", "message": f"❌ Error: {str(e)}"}
     def transcribe_audio(self, audio_file_path: str) -> str:
         """Transcribe audio file via API"""
         try:
             # Read audio file and encode as base64
+            with open(audio_file_path, "rb") as f:
                 audio_data = f.read()
+            audio_base64 = base64.b64encode(audio_data).decode("utf-8")
             # Make API request
             response = self.session.post(
                 f"{self.api_endpoint}/transcribe",
+                json={"audio_data": audio_base64, "sample_rate": 16000},
+                timeout=30,
             )
             if response.status_code == 200:
                 result = response.json()
                 return result.get("transcription", "No transcription received")
             else:
                 return f"❌ API Error: {response.status_code} - {response.text}"
         except requests.exceptions.Timeout:
             return "⏱️ Request timed out. The audio file might be too long."
         except requests.exceptions.ConnectionError:
             return "❌ Cannot connect to the transcription service. Please try again later."
         except Exception as e:
             return f"❌ Error: {str(e)}"
 # Initialize API client
 print(f"🔗 Connecting to STT API: {API_ENDPOINT}")
 stt_client = RinggSTTClient(API_ENDPOINT)
 def create_interface():
     """Create Gradio interface"""
     def transcribe_audio(audio_file):
         """Transcribe uploaded audio"""
         if audio_file is None:
             return "Please upload an audio file!"
         return stt_client.transcribe_audio(audio_file)
     def check_api_status():
         """Check API health status"""
         health = stt_client.check_health()
         return health["message"]
     # Create interface
+    with gr.Blocks(
+        title="Ringg STT V0", theme=gr.themes.Soft(), css=custom_css
+    ) as demo:
+        status_class = "healthy" if health_status["status"] == "healthy" else "error"
+        gr.Markdown(f"""
         <div class="main-header">
         <h1>🎙️ Ringg STT V0</h1>
+        <p>High-Accuracy Hindi Speech-to-Text <span class="status-dot {status_class}"></span></p>
         </div>
         """)
+        gr.Markdown(
+            """ # 🎯 Performance Benchmarks \n #### **Ringg STT V0** Ranks **2nd** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other leading Solutions."""
         )
         with gr.Row():
             gr.DataFrame(
                 value=[
                 datatype=["str", "str", "str"],
                 row_count=5,
                 col_count=(3, "fixed"),
             )
         gr.Markdown("""
+        -----------------
+        # 📁 Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
         """)
+        with gr.Row():
+            audio_input = gr.Audio(
+                label="📁 Upload Audio File",
+                type="filepath",
+                sources=["upload"],
+                scale=3,
+            )
+            transcribe_btn = gr.Button(
+                "Transcribe", variant="primary", size="sm", scale=1
+            )
+        file_output = gr.Textbox(
+            label="Transcription Result",
+            lines=3,
+            interactive=True,
+            placeholder="Upload a file and click Transcribe...",
+        )
+        transcribe_btn.click(transcribe_audio, inputs=audio_input, outputs=file_output)
+        # gr.Markdown("""
+        # ### ✨ Features
+        # - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
+        # - 🎯 **High Accuracy**: Competitive with leading ASR models
+        # - 📁 **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
+        # - ⚡ **Fast Processing**: Optimized for quick transcription
+        # """)
         gr.Markdown("""
+        # 🙏 Acknowledgements
         - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
+        - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
         """)
     return demo