Spaces:

MrSimple01
/

Demo_ScribeAPI_Trancription

Sleeping

App Files Files Community

MrSimple01 commited on Mar 25

Commit

f7df8d8

verified ·

1 Parent(s): a3967ea

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -8

app.py CHANGED Viewed

@@ -69,30 +69,74 @@ def process_video_url(video_url, output_format, api_key, model_id):
     else:
         return None, message, None, "Audio extraction failed, cannot transcribe"
-def transcribe_audio(audio_file, api_key, model_id="scribe_v1"):
     if not api_key:
         return {"error": "Please provide an API key"}
     url = "https://api.elevenlabs.io/v1/speech-to-text"
     headers = {
-        "xi-api-key": api_key
     }
     try:
-        with open(audio_file, "rb") as f:
             files = {
-                "file": f,
                 "model_id": (None, model_id)
             }
-            response = requests.post(url, headers=headers, files=files)
-            response.raise_for_status()
             result = response.json()
-        return result
     except requests.exceptions.RequestException as e:
         return {"error": f"API request failed: {str(e)}"}
     except json.JSONDecodeError:
         return {"error": "Failed to parse API response"}
 with gr.Blocks(title="Video to Audio to Transcription") as app:
     gr.Markdown("# Video => Audio => Transcription")

     else:
         return None, message, None, "Audio extraction failed, cannot transcribe"
+def transcribe_audio(audio_path, api_key, model_id="scribe_v1"):
+    start_time = time.time()
     if not api_key:
         return {"error": "Please provide an API key"}
     url = "https://api.elevenlabs.io/v1/speech-to-text"
     headers = {
+        "xi-api-key": api_key,
+        "Content-Type": "multipart/form-data"  # Explicitly set content type
     }
     try:
+        with open(audio_path, "rb") as f:
             files = {
+                "file": (os.path.basename(audio_path), f, "audio/mpeg"),
                 "model_id": (None, model_id)
             }
+            response = requests.post(
+                url,
+                headers=headers,
+                files=files
+            )
+            # More detailed error handling
+            if response.status_code != 200:
+                return {
+                    "error": f"API request failed with status {response.status_code}",
+                    "response_text": response.text
+                }
             result = response.json()
     except requests.exceptions.RequestException as e:
         return {"error": f"API request failed: {str(e)}"}
     except json.JSONDecodeError:
         return {"error": "Failed to parse API response"}
+    except Exception as e:
+        return {"error": f"Unexpected error: {str(e)}"}
+    end_time = time.time()
+    processing_time = end_time - start_time
+    # File size calculation
+    file_size = os.path.getsize(audio_path) / (1024 * 1024)
+    # Audio duration calculation with fallback
+    try:
+        # Attempt to get audio duration using soundfile
+        audio_data, sample_rate = sf.read(audio_path)
+        audio_duration = len(audio_data) / sample_rate
+    except ImportError:
+        try:
+            import librosa
+            audio_duration = librosa.get_duration(filename=audio_path)
+        except:
+            audio_duration = 0
+    # Prepare comprehensive return dictionary
+    return {
+        "service": "ElevenLabs Scribe",
+        "text": result.get('text', ''),
+        "processing_time": processing_time,
+        "file_size_mb": round(file_size, 2),
+        "audio_duration": round(audio_duration, 2),
+        "real_time_factor": round(processing_time / audio_duration, 2) if audio_duration > 0 else None,
+        "processing_speed": round(audio_duration / processing_time, 2) if processing_time > 0 else None,
+        "raw_response": result
+    }
 with gr.Blocks(title="Video to Audio to Transcription") as app:
     gr.Markdown("# Video => Audio => Transcription")