utkarshshukla2912 commited on
Commit
033af1b
Β·
1 Parent(s): 6d80c52

UI improvements

Browse files
Files changed (1) hide show
  1. app.py +121 -89
app.py CHANGED
@@ -25,6 +25,58 @@ custom_css = """
25
  border-radius: 10px;
26
  margin-bottom: 20px;
27
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  footer {
29
  visibility: hidden !important;
30
  height: 50px !important;
@@ -42,68 +94,65 @@ footer:after {
42
 
43
  # Backend API endpoint (ngrok URL)
44
  # You can update this via Hugging Face Space Secrets
45
- API_ENDPOINT = os.environ.get("STT_API_ENDPOINT", "https://unintuitional-vibrational-jordy.ngrok-free.dev")
 
46
 
47
  class RinggSTTClient:
48
  """Client for Ringg STT API"""
49
-
50
  def __init__(self, api_endpoint: str):
51
- self.api_endpoint = api_endpoint.rstrip('/')
52
  self.session = requests.Session()
53
- self.session.headers.update({
54
- 'User-Agent': 'RinggSTT-HF-Space/1.0'
55
- })
56
-
57
  def check_health(self) -> dict:
58
  """Check if the API is available"""
59
  try:
60
- response = self.session.get(
61
- f"{self.api_endpoint}/health",
62
- timeout=5
63
- )
64
  if response.status_code == 200:
65
  return {"status": "healthy", "message": "βœ… API is online"}
66
  else:
67
- return {"status": "error", "message": f"❌ API returned status {response.status_code}"}
 
 
 
68
  except requests.exceptions.Timeout:
69
  return {"status": "error", "message": "⏱️ API request timed out"}
70
  except requests.exceptions.ConnectionError:
71
  return {"status": "error", "message": "❌ Cannot connect to API"}
72
  except Exception as e:
73
  return {"status": "error", "message": f"❌ Error: {str(e)}"}
74
-
75
  def transcribe_audio(self, audio_file_path: str) -> str:
76
  """Transcribe audio file via API"""
77
  try:
78
  # Read audio file and encode as base64
79
- with open(audio_file_path, 'rb') as f:
80
  audio_data = f.read()
81
-
82
- audio_base64 = base64.b64encode(audio_data).decode('utf-8')
83
-
84
  # Make API request
85
  response = self.session.post(
86
  f"{self.api_endpoint}/transcribe",
87
- json={
88
- "audio_data": audio_base64,
89
- "sample_rate": 16000
90
- },
91
- timeout=30
92
  )
93
-
94
  if response.status_code == 200:
95
  result = response.json()
96
  return result.get("transcription", "No transcription received")
97
  else:
98
  return f"❌ API Error: {response.status_code} - {response.text}"
99
-
100
  except requests.exceptions.Timeout:
101
  return "⏱️ Request timed out. The audio file might be too long."
102
  except requests.exceptions.ConnectionError:
103
  return "❌ Cannot connect to the transcription service. Please try again later."
104
  except Exception as e:
105
  return f"❌ Error: {str(e)}"
106
-
 
107
  # Initialize API client
108
  print(f"πŸ”— Connecting to STT API: {API_ENDPOINT}")
109
  stt_client = RinggSTTClient(API_ENDPOINT)
@@ -115,81 +164,35 @@ print(f"API Health: {health_status}")
115
 
116
  def create_interface():
117
  """Create Gradio interface"""
118
-
119
  def transcribe_audio(audio_file):
120
  """Transcribe uploaded audio"""
121
  if audio_file is None:
122
  return "Please upload an audio file!"
123
-
124
  return stt_client.transcribe_audio(audio_file)
125
-
126
  def check_api_status():
127
  """Check API health status"""
128
  health = stt_client.check_health()
129
  return health["message"]
130
-
131
  # Create interface
132
- with gr.Blocks(title="Ringg STT V0", theme=gr.themes.Soft(), css=custom_css) as demo:
133
- gr.Markdown("""
 
 
 
134
  <div class="main-header">
135
  <h1>πŸŽ™οΈ Ringg STT V0</h1>
136
- <p>High-Accuracy Hindi Speech-to-Text</p>
137
  </div>
138
  """)
139
 
140
- # API Status indicator
141
- with gr.Row():
142
- with gr.Column(scale=4):
143
- api_status = gr.Textbox(
144
- label="πŸ”Œ API Status",
145
- value=health_status["message"],
146
- interactive=False
147
- )
148
- with gr.Column(scale=1):
149
- check_btn = gr.Button("πŸ”„ Check Status", size="sm")
150
- check_btn.click(check_api_status, outputs=api_status)
151
-
152
- gr.Markdown("""
153
- ### πŸ“ File Upload
154
- Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
155
- """)
156
-
157
- audio_input = gr.Audio(
158
- label="πŸ“ Upload Audio File",
159
- type="filepath",
160
- sources=["upload"]
161
- )
162
-
163
- transcribe_btn = gr.Button("πŸ”„ Transcribe", variant="primary", size="lg")
164
-
165
- file_output = gr.Textbox(
166
- label="Transcription Result",
167
- lines=8,
168
- interactive=False,
169
- placeholder="Upload a file and click Transcribe..."
170
- )
171
-
172
- transcribe_btn.click(
173
- transcribe_audio,
174
- inputs=audio_input,
175
- outputs=file_output
176
  )
177
 
178
- gr.Markdown("""
179
- ### ✨ Features
180
- - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
181
- - 🎯 **High Accuracy**: Competitive with leading ASR models
182
- - πŸ“ **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
183
- - ⚑ **Fast Processing**: Optimized for quick transcription
184
- """)
185
-
186
- # Performance Comparison Table
187
- gr.Markdown("""
188
- ## Performance Benchmarks
189
-
190
- Our model achieves **state-of-the-art performance** on Hindi speech recognition benchmarks:
191
- """)
192
-
193
  with gr.Row():
194
  gr.DataFrame(
195
  value=[
@@ -203,20 +206,49 @@ def create_interface():
203
  datatype=["str", "str", "str"],
204
  row_count=5,
205
  col_count=(3, "fixed"),
206
- label="Word Error Rate Comparison (Lower is Better)"
207
  )
208
-
209
  gr.Markdown("""
210
- **Ringg STT V0** ranks **2nd** among top models, outperforming OpenAI Whisper Large-v3 and other leading solutions.
 
211
 
212
- Lower WER (Word Error Rate) indicates better accuracy. Our model delivers competitive performance for Hindi transcription tasks.
213
  """)
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  gr.Markdown("""
216
- ### πŸ™ Acknowledgements
217
  - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
 
218
  """)
219
-
220
  return demo
221
 
222
 
 
25
  border-radius: 10px;
26
  margin-bottom: 20px;
27
  }
28
+ .status-dot {
29
+ display: inline-block;
30
+ width: 8px;
31
+ height: 8px;
32
+ border-radius: 50%;
33
+ margin-left: 8px;
34
+ }
35
+ .status-dot.healthy {
36
+ background-color: #22c55e;
37
+ animation: pulse-green 2s ease-in-out infinite;
38
+ }
39
+ .status-dot.error {
40
+ background-color: #ef4444;
41
+ animation: pulse-red 2s ease-in-out infinite;
42
+ }
43
+ @keyframes pulse-green {
44
+ 0% {
45
+ box-shadow: 0 0 0 0 rgba(34, 197, 94, 0.7);
46
+ }
47
+ 70% {
48
+ box-shadow: 0 0 0 6px rgba(34, 197, 94, 0);
49
+ }
50
+ 100% {
51
+ box-shadow: 0 0 0 0 rgba(34, 197, 94, 0);
52
+ }
53
+ }
54
+ @keyframes pulse-red {
55
+ 0% {
56
+ box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.7);
57
+ }
58
+ 70% {
59
+ box-shadow: 0 0 0 6px rgba(239, 68, 68, 0);
60
+ }
61
+ 100% {
62
+ box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
63
+ }
64
+ }
65
+ div[data-testid="audio"] {
66
+ min-height: 60px !important;
67
+ max-height: 80px !important;
68
+ }
69
+ div[data-testid="audio"] > div {
70
+ height: auto !important;
71
+ min-height: auto !important;
72
+ }
73
+ .wrap.wrap.wrap.svelte-1w6y6zl {
74
+ height: auto !important;
75
+ min-height: auto !important;
76
+ }
77
+ .gradio-row {
78
+ min-height: auto !important;
79
+ }
80
  footer {
81
  visibility: hidden !important;
82
  height: 50px !important;
 
94
 
95
  # Backend API endpoint (ngrok URL)
96
  # You can update this via Hugging Face Space Secrets
97
+ API_ENDPOINT = os.environ.get("STT_API_ENDPOINT", "http://13.234.40.75:7861")
98
+
99
 
100
  class RinggSTTClient:
101
  """Client for Ringg STT API"""
102
+
103
  def __init__(self, api_endpoint: str):
104
+ self.api_endpoint = api_endpoint.rstrip("/")
105
  self.session = requests.Session()
106
+ self.session.headers.update({"User-Agent": "RinggSTT-HF-Space/1.0"})
107
+
 
 
108
  def check_health(self) -> dict:
109
  """Check if the API is available"""
110
  try:
111
+ response = self.session.get(f"{self.api_endpoint}/health", timeout=5)
 
 
 
112
  if response.status_code == 200:
113
  return {"status": "healthy", "message": "βœ… API is online"}
114
  else:
115
+ return {
116
+ "status": "error",
117
+ "message": f"❌ API returned status {response.status_code}",
118
+ }
119
  except requests.exceptions.Timeout:
120
  return {"status": "error", "message": "⏱️ API request timed out"}
121
  except requests.exceptions.ConnectionError:
122
  return {"status": "error", "message": "❌ Cannot connect to API"}
123
  except Exception as e:
124
  return {"status": "error", "message": f"❌ Error: {str(e)}"}
125
+
126
  def transcribe_audio(self, audio_file_path: str) -> str:
127
  """Transcribe audio file via API"""
128
  try:
129
  # Read audio file and encode as base64
130
+ with open(audio_file_path, "rb") as f:
131
  audio_data = f.read()
132
+
133
+ audio_base64 = base64.b64encode(audio_data).decode("utf-8")
134
+
135
  # Make API request
136
  response = self.session.post(
137
  f"{self.api_endpoint}/transcribe",
138
+ json={"audio_data": audio_base64, "sample_rate": 16000},
139
+ timeout=30,
 
 
 
140
  )
141
+
142
  if response.status_code == 200:
143
  result = response.json()
144
  return result.get("transcription", "No transcription received")
145
  else:
146
  return f"❌ API Error: {response.status_code} - {response.text}"
147
+
148
  except requests.exceptions.Timeout:
149
  return "⏱️ Request timed out. The audio file might be too long."
150
  except requests.exceptions.ConnectionError:
151
  return "❌ Cannot connect to the transcription service. Please try again later."
152
  except Exception as e:
153
  return f"❌ Error: {str(e)}"
154
+
155
+
156
  # Initialize API client
157
  print(f"πŸ”— Connecting to STT API: {API_ENDPOINT}")
158
  stt_client = RinggSTTClient(API_ENDPOINT)
 
164
 
165
  def create_interface():
166
  """Create Gradio interface"""
167
+
168
  def transcribe_audio(audio_file):
169
  """Transcribe uploaded audio"""
170
  if audio_file is None:
171
  return "Please upload an audio file!"
172
+
173
  return stt_client.transcribe_audio(audio_file)
174
+
175
  def check_api_status():
176
  """Check API health status"""
177
  health = stt_client.check_health()
178
  return health["message"]
179
+
180
  # Create interface
181
+ with gr.Blocks(
182
+ title="Ringg STT V0", theme=gr.themes.Soft(), css=custom_css
183
+ ) as demo:
184
+ status_class = "healthy" if health_status["status"] == "healthy" else "error"
185
+ gr.Markdown(f"""
186
  <div class="main-header">
187
  <h1>πŸŽ™οΈ Ringg STT V0</h1>
188
+ <p>High-Accuracy Hindi Speech-to-Text <span class="status-dot {status_class}"></span></p>
189
  </div>
190
  """)
191
 
192
+ gr.Markdown(
193
+ """ # 🎯 Performance Benchmarks \n #### **Ringg STT V0** Ranks **2nd** Among Top Models, Outperforming OpenAI Whisper Large-v3 and Other leading Solutions."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  )
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  with gr.Row():
197
  gr.DataFrame(
198
  value=[
 
206
  datatype=["str", "str", "str"],
207
  row_count=5,
208
  col_count=(3, "fixed"),
 
209
  )
210
+
211
  gr.Markdown("""
212
+ -----------------
213
+ # πŸ“ Upload an audio file for transcription (supports WAV, MP3, FLAC, M4A, etc.)
214
 
 
215
  """)
216
 
217
+ with gr.Row():
218
+ audio_input = gr.Audio(
219
+ label="πŸ“ Upload Audio File",
220
+ type="filepath",
221
+ sources=["upload"],
222
+ scale=3,
223
+ )
224
+
225
+ transcribe_btn = gr.Button(
226
+ "Transcribe", variant="primary", size="sm", scale=1
227
+ )
228
+
229
+ file_output = gr.Textbox(
230
+ label="Transcription Result",
231
+ lines=3,
232
+ interactive=True,
233
+ placeholder="Upload a file and click Transcribe...",
234
+ )
235
+
236
+ transcribe_btn.click(transcribe_audio, inputs=audio_input, outputs=file_output)
237
+
238
+ # gr.Markdown("""
239
+ # ### ✨ Features
240
+ # - 🌐 **Hindi Support**: Accurate transcription for Hindi audio
241
+ # - 🎯 **High Accuracy**: Competitive with leading ASR models
242
+ # - πŸ“ **File Upload**: Support for various audio formats (WAV, MP3, FLAC, etc.)
243
+ # - ⚑ **Fast Processing**: Optimized for quick transcription
244
+ # """)
245
+
246
  gr.Markdown("""
247
+ # πŸ™ Acknowledgements
248
  - Special thanks to [@jeremylee12](https://huggingface.co/jeremylee12) for their contributions
249
+ - Built with [NVIDIA NeMo](https://github.com/NVIDIA/NeMo) models
250
  """)
251
+
252
  return demo
253
 
254