Spaces:
Running
Running
New transcribe object
Browse files- tools/transcribe_youtube +57 -21
tools/transcribe_youtube
CHANGED
@@ -1,29 +1,65 @@
|
|
1 |
-
from
|
|
|
|
|
2 |
import whisper
|
3 |
import io
|
4 |
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
# Use a BytesIO buffer to store the audio in memory
|
13 |
-
audio_buffer = io.BytesIO()
|
14 |
-
audio_stream.stream_to_buffer(audio_buffer)
|
15 |
-
audio_buffer.seek(0) # Reset buffer position to the beginning
|
16 |
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
# Step 3: Transcribe audio from memory
|
21 |
-
result = model.transcribe(audio_buffer)
|
22 |
-
return result["text"]
|
23 |
-
except Exception as e:
|
24 |
-
return f"An error occurred: {str(e)}"
|
25 |
|
26 |
# Example usage
|
27 |
-
youtube_url = "https://www.youtube.com/watch?v=example"
|
28 |
-
lyrics = transcribe_youtube_audio(youtube_url)
|
29 |
-
print("Lyrics:", lyrics)
|
|
|
1 |
+
from typing import Any, Optional
|
2 |
+
from smolagents.tools import Tool
|
3 |
+
from pytube import youtube
|
4 |
import whisper
|
5 |
import io
|
6 |
|
7 |
|
8 |
+
class TranscribeYouTubeTool(Tool):
|
9 |
+
name = "transcribe_youtube"
|
10 |
+
description = "Returns a youtube transcript."
|
11 |
+
inputs = {'query': {'type': 'string', 'description': 'A YouTube URL.'}}
|
12 |
+
output_type = "string"
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
def __init__(self, max_results=10, **kwargs):
|
15 |
+
super().__init__()
|
16 |
+
self.max_results = max_results
|
17 |
+
try:
|
18 |
+
from pytub import YouTube
|
19 |
+
except ImportError as e:
|
20 |
+
raise ImportError(
|
21 |
+
"You must install package `pytube` to run this tool: for instance run `pip install pytube`."
|
22 |
+
) from e
|
23 |
+
|
24 |
+
self.yt = YouTube(**kwargs)
|
25 |
+
self.audio_buff = get_youtube_audio(yt = yt()
|
26 |
+
|
27 |
+
def forward(self, query: str) -> str:
|
28 |
+
results = self.ddgs.text(query, max_results=self.max_results)
|
29 |
+
if len(results) == 0:
|
30 |
+
raise Exception("No results found! Try a less restrictive/shorter query.")
|
31 |
+
postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
|
32 |
+
return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
|
33 |
+
|
34 |
+
|
35 |
+
def get_audio():
|
36 |
+
try:
|
37 |
+
|
38 |
+
audio_stream = self.yt.streams.filter(only_audio=True).first()
|
39 |
+
|
40 |
+
# Use a BytesIO buffer to store the audio in memory
|
41 |
+
audio_buffer = io.BytesIO()
|
42 |
+
audio_stream.stream_to_buffer(audio_buffer)
|
43 |
+
audio_buffer.seek(0) # Reset buffer position to the beginning
|
44 |
+
return audio_buffer
|
45 |
+
|
46 |
+
except Exception as e:
|
47 |
+
return f"An error occurred: {str(e)}"
|
48 |
+
|
49 |
+
def get_text():
|
50 |
+
try:
|
51 |
+
|
52 |
+
# Step 2: Load Whisper model
|
53 |
+
model = whisper.load_model("base") # Use "small", "medium", or "large" for better accuracy
|
54 |
+
|
55 |
+
# Step 3: Transcribe audio from memory
|
56 |
+
result = model.transcribe(self.audio_buffer)
|
57 |
+
return result["text"]
|
58 |
+
except Exception as e:
|
59 |
+
return f"An error occurred: {str(e)}"
|
60 |
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
# Example usage
|
63 |
+
#youtube_url = "https://www.youtube.com/watch?v=example"
|
64 |
+
#lyrics = transcribe_youtube_audio(youtube_url)
|
65 |
+
#print("Lyrics:", lyrics)
|