MrSimple01 commited on
Commit
f7df8d8
·
verified ·
1 Parent(s): a3967ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -8
app.py CHANGED
@@ -69,30 +69,74 @@ def process_video_url(video_url, output_format, api_key, model_id):
69
  else:
70
  return None, message, None, "Audio extraction failed, cannot transcribe"
71
 
72
- def transcribe_audio(audio_file, api_key, model_id="scribe_v1"):
 
 
73
  if not api_key:
74
  return {"error": "Please provide an API key"}
75
 
76
  url = "https://api.elevenlabs.io/v1/speech-to-text"
77
  headers = {
78
- "xi-api-key": api_key
 
79
  }
80
 
81
  try:
82
- with open(audio_file, "rb") as f:
83
  files = {
84
- "file": f,
85
  "model_id": (None, model_id)
86
  }
87
- response = requests.post(url, headers=headers, files=files)
88
- response.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
89
  result = response.json()
90
- return result
91
  except requests.exceptions.RequestException as e:
92
  return {"error": f"API request failed: {str(e)}"}
93
  except json.JSONDecodeError:
94
  return {"error": "Failed to parse API response"}
95
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  with gr.Blocks(title="Video to Audio to Transcription") as app:
97
  gr.Markdown("# Video => Audio => Transcription")
98
 
 
69
  else:
70
  return None, message, None, "Audio extraction failed, cannot transcribe"
71
 
72
+ def transcribe_audio(audio_path, api_key, model_id="scribe_v1"):
73
+ start_time = time.time()
74
+
75
  if not api_key:
76
  return {"error": "Please provide an API key"}
77
 
78
  url = "https://api.elevenlabs.io/v1/speech-to-text"
79
  headers = {
80
+ "xi-api-key": api_key,
81
+ "Content-Type": "multipart/form-data" # Explicitly set content type
82
  }
83
 
84
  try:
85
+ with open(audio_path, "rb") as f:
86
  files = {
87
+ "file": (os.path.basename(audio_path), f, "audio/mpeg"),
88
  "model_id": (None, model_id)
89
  }
90
+ response = requests.post(
91
+ url,
92
+ headers=headers,
93
+ files=files
94
+ )
95
+
96
+ # More detailed error handling
97
+ if response.status_code != 200:
98
+ return {
99
+ "error": f"API request failed with status {response.status_code}",
100
+ "response_text": response.text
101
+ }
102
+
103
  result = response.json()
 
104
  except requests.exceptions.RequestException as e:
105
  return {"error": f"API request failed: {str(e)}"}
106
  except json.JSONDecodeError:
107
  return {"error": "Failed to parse API response"}
108
+ except Exception as e:
109
+ return {"error": f"Unexpected error: {str(e)}"}
110
+
111
+ end_time = time.time()
112
+ processing_time = end_time - start_time
113
+
114
+ # File size calculation
115
+ file_size = os.path.getsize(audio_path) / (1024 * 1024)
116
+
117
+ # Audio duration calculation with fallback
118
+ try:
119
+ # Attempt to get audio duration using soundfile
120
+ audio_data, sample_rate = sf.read(audio_path)
121
+ audio_duration = len(audio_data) / sample_rate
122
+ except ImportError:
123
+ try:
124
+ import librosa
125
+ audio_duration = librosa.get_duration(filename=audio_path)
126
+ except:
127
+ audio_duration = 0
128
+
129
+ # Prepare comprehensive return dictionary
130
+ return {
131
+ "service": "ElevenLabs Scribe",
132
+ "text": result.get('text', ''),
133
+ "processing_time": processing_time,
134
+ "file_size_mb": round(file_size, 2),
135
+ "audio_duration": round(audio_duration, 2),
136
+ "real_time_factor": round(processing_time / audio_duration, 2) if audio_duration > 0 else None,
137
+ "processing_speed": round(audio_duration / processing_time, 2) if processing_time > 0 else None,
138
+ "raw_response": result
139
+ }
140
  with gr.Blocks(title="Video to Audio to Transcription") as app:
141
  gr.Markdown("# Video => Audio => Transcription")
142