Spaces:
Sleeping
Sleeping
File size: 1,523 Bytes
16ed15e e3ae64b 16ed15e e3ae64b 16ed15e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import traceback
import sys
from youtube_transcript_api import YouTubeTranscriptApi
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
def add_watch_word(link):
if 'watch?' in link or 'embed/' in link or 'v=' in link:
return link
else:
split_link = link.split('/')
last_part = split_link[-1]
return '/'.join(split_link[:-1]) + '/watch?v=' + last_part
def Summarizer(link, model):
link=add_watch_word(link)
video_id = link.split("=")[1]
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
FinalTranscript = ' '.join([i['text'] for i in transcript])
if model == "Pegasus":
checkpoint = "google/pegasus-large"
elif model == "mT5":
checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
elif model == "BART":
checkpoint = "sshleifer/distilbart-cnn-12-6"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
inputs = tokenizer(FinalTranscript,
max_length=1024,
truncation=True,
return_tensors="pt")
summary_ids = model.generate(inputs["input_ids"])
summary = tokenizer.batch_decode(summary_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False)
return summary[0]
except Exception:
print(traceback.format_exc())
# or
print(sys.exc_info()[2])
|