Spaces:
Build error
Build error
Commit
·
ba125ce
1
Parent(s):
8fe73ad
Update app.py
Browse files
app.py
CHANGED
@@ -141,19 +141,19 @@ def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_
|
|
141 |
summary_source = 'The summary was generated using {} hosted locally.'.format(llm_model_id)
|
142 |
|
143 |
|
144 |
-
# Map
|
145 |
map_template = """
|
146 |
-
|
147 |
-
You do not provide information that is not mentioned in the video. You only provide information that you are absolutely sure about.\n
|
148 |
-
Reply with the language same as the title.\n
|
149 |
-
Summarize the following text in a clear and concise way:\n
|
150 |
----------------------- \n
|
151 |
TITLE: `{title}`\n
|
152 |
TEXT:\n
|
153 |
-
`{docs}`\n
|
154 |
----------------------- \n
|
155 |
-
|
|
|
156 |
"""
|
|
|
|
|
157 |
map_prompt = PromptTemplate(
|
158 |
template = map_template,
|
159 |
input_variables = ['title','docs']
|
@@ -162,19 +162,17 @@ def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_
|
|
162 |
|
163 |
# Reduce - Collapse
|
164 |
collapse_template = """
|
165 |
-
|
166 |
-
You do not provide information that is not mentioned in the video. You only provide information that you are absolutely sure about.\n
|
167 |
-
Reply with the language same as the title.\n
|
168 |
-
The following is set of partial summaries of a video:\n
|
169 |
----------------------- \n
|
170 |
TITLE: `{title}`\n
|
171 |
PARTIAL SUMMARIES:\n
|
172 |
`{doc_summaries}`\n
|
173 |
----------------------- \n
|
174 |
-
|
175 |
-
SUMMARY:\n
|
176 |
"""
|
177 |
|
|
|
178 |
collapse_prompt = PromptTemplate(
|
179 |
template = collapse_template,
|
180 |
input_variables = ['title','doc_summaries']
|
@@ -187,18 +185,17 @@ def summarize_text(title,text,temperature,words,use_api=False,api_token=None,do_
|
|
187 |
)
|
188 |
|
189 |
# Final Reduce - Combine
|
190 |
-
combine_template = """
|
191 |
-
|
192 |
-
You do not provide information that is not mentioned in the video. You only provide information that you are absolutely sure about.\n
|
193 |
-
The following is a set of partial summaries of a video:\n
|
194 |
----------------------- \n
|
195 |
TITLE: `{title}`\n
|
196 |
PARTIAL SUMMARIES:\n
|
197 |
`{doc_summaries}`\n
|
198 |
----------------------- \n
|
199 |
-
|
200 |
EXECUTIVE SUMMARY:\n
|
201 |
"""
|
|
|
202 |
combine_prompt = PromptTemplate(
|
203 |
template = combine_template,
|
204 |
input_variables = ['title','doc_summaries','words']
|
@@ -271,7 +268,7 @@ def summarize_youtube_video(url,force_transcribe,api_token="",
|
|
271 |
if api_token == "":
|
272 |
api_token = HF_TOKEN
|
273 |
title,text,transcript_source = transcribe_youtube_video(url,force_transcribe,True,api_token)
|
274 |
-
print("Transcript:",text
|
275 |
summary, summary_source = summarize_text(title,text,temperature,words,True,api_token,do_sample)
|
276 |
print("Summary:",summary)
|
277 |
return summary, text, transcript_source, summary_source
|
@@ -298,9 +295,7 @@ with gr.Blocks() as demo:
|
|
298 |
with gr.Row():
|
299 |
with gr.Column(scale=4):
|
300 |
url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=",info="The video must not be age-restricted. Otherwise, the transcription will fail. The demo supports videos in English language only.")
|
301 |
-
with gr.Column(scale=
|
302 |
-
api_token = gr.Textbox(label="Paste your Hugging Face API token here (Optional):",placeholder="hf_...",visible=True,show_label=True,info='The API token passed via this field is not stored. It is only passed through the Hugging Face Hub API for inference.')
|
303 |
-
with gr.Column(scale=1):
|
304 |
sum_btn = gr.Button("Summarize!")
|
305 |
gr.Markdown("## Please like the repo if you find this helpful.")
|
306 |
|
@@ -330,14 +325,16 @@ with gr.Blocks() as demo:
|
|
330 |
with gr.Group():
|
331 |
transcript = gr.Textbox(label="Full Transcript",placeholder="transcript...",show_label=True)
|
332 |
transcript_source = gr.Textbox(visible=False)
|
333 |
-
with gr.Accordion("
|
334 |
gr.Markdown("""
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
|
|
|
|
339 |
""")
|
340 |
-
with gr.Accordion("
|
341 |
gr.Markdown("""
|
342 |
1. This app attempts to download the transcript from Youtube first. If the transcript is not available, or the prompts require, the video will be transcribed.\n
|
343 |
2. The app performs best on videos in which the number of speakers is limited or when the YouTube transcript includes annotations of the speakers.\n
|
|
|
141 |
summary_source = 'The summary was generated using {} hosted locally.'.format(llm_model_id)
|
142 |
|
143 |
|
144 |
+
# Map templates
|
145 |
map_template = """
|
146 |
+
As an AI tasked with summarizing a video, your objective is to distill the key insights without introducing new information. This prompt aims to provide a concise summary.\n
|
|
|
|
|
|
|
147 |
----------------------- \n
|
148 |
TITLE: `{title}`\n
|
149 |
TEXT:\n
|
150 |
+
`{docs}`\n
|
151 |
----------------------- \n
|
152 |
+
Summarize the provided content, emphasizing main points, key arguments, and relevant details. Keep the summary clear and succinct.\n
|
153 |
+
SUMMARY:\n
|
154 |
"""
|
155 |
+
|
156 |
+
|
157 |
map_prompt = PromptTemplate(
|
158 |
template = map_template,
|
159 |
input_variables = ['title','docs']
|
|
|
162 |
|
163 |
# Reduce - Collapse
|
164 |
collapse_template = """
|
165 |
+
As an AI tasked with combining partial summaries, your goal is to create a cohesive, comprehensive summary without duplications.\n
|
|
|
|
|
|
|
166 |
----------------------- \n
|
167 |
TITLE: `{title}`\n
|
168 |
PARTIAL SUMMARIES:\n
|
169 |
`{doc_summaries}`\n
|
170 |
----------------------- \n
|
171 |
+
Synthesize the information from the partial summaries into a consolidated, coherent summary. Ensure that the final summary covers all essential points without repeating redundant information.\n
|
172 |
+
CONSOLIDATED SUMMARY:\n
|
173 |
"""
|
174 |
|
175 |
+
|
176 |
collapse_prompt = PromptTemplate(
|
177 |
template = collapse_template,
|
178 |
input_variables = ['title','doc_summaries']
|
|
|
185 |
)
|
186 |
|
187 |
# Final Reduce - Combine
|
188 |
+
combine_template = """
|
189 |
+
As an AI tasked with summarizing a video, your goal is to distill the main insights without introducing new information. This prompt aims to generate a concise executive summary.\n
|
|
|
|
|
190 |
----------------------- \n
|
191 |
TITLE: `{title}`\n
|
192 |
PARTIAL SUMMARIES:\n
|
193 |
`{doc_summaries}`\n
|
194 |
----------------------- \n
|
195 |
+
Extract the most critical information from the partial summaries provided. Craft an executive summary in {words} words, focusing on the main arguments, key takeaways, and supporting evidence presented in the video. Aim for clarity, brevity, and avoid repeating redundant points. Ensure the summary encapsulates the essence of the content.\n
|
196 |
EXECUTIVE SUMMARY:\n
|
197 |
"""
|
198 |
+
|
199 |
combine_prompt = PromptTemplate(
|
200 |
template = combine_template,
|
201 |
input_variables = ['title','doc_summaries','words']
|
|
|
268 |
if api_token == "":
|
269 |
api_token = HF_TOKEN
|
270 |
title,text,transcript_source = transcribe_youtube_video(url,force_transcribe,True,api_token)
|
271 |
+
print("Transcript:",text)
|
272 |
summary, summary_source = summarize_text(title,text,temperature,words,True,api_token,do_sample)
|
273 |
print("Summary:",summary)
|
274 |
return summary, text, transcript_source, summary_source
|
|
|
295 |
with gr.Row():
|
296 |
with gr.Column(scale=4):
|
297 |
url = gr.Textbox(label="Enter YouTube video URL here:",placeholder="https://www.youtube.com/watch?v=",info="The video must not be age-restricted. Otherwise, the transcription will fail. The demo supports videos in English language only.")
|
298 |
+
with gr.Column(scale=2):
|
|
|
|
|
299 |
sum_btn = gr.Button("Summarize!")
|
300 |
gr.Markdown("## Please like the repo if you find this helpful.")
|
301 |
|
|
|
325 |
with gr.Group():
|
326 |
transcript = gr.Textbox(label="Full Transcript",placeholder="transcript...",show_label=True)
|
327 |
transcript_source = gr.Textbox(visible=False)
|
328 |
+
with gr.Accordion("Acknoledgement",open=True):
|
329 |
gr.Markdown("""
|
330 |
+
I sincerely appreciate the open source tools shared by [smakamali](https://huggingface.co/smakamali) (summary_method) and [Sanchit Gandhi](https://huggingface.co/sanchit-gandhi) (Whisper-Jax API)
|
331 |
+
which were instrumental in developing this project. Their publicly available innovations in AI model training and speech recognition directly enabled key capabilities. Please view their exceptional repositories on HuggingFace for additional details.\n
|
332 |
+
|
333 |
+
[summarize_youtube](https://huggingface.co/spaces/smakamali/summarize_youtube)\n
|
334 |
+
Detailed instructions for recreating this tool are provided [here](https://pub.towardsai.net/a-complete-guide-for-creating-an-ai-assistant-for-summarizing-youtube-videos-part-1-32fbadabc2cc?sk=34269402931178039c4c3589df4a6ec5) and [here](https://pub.towardsai.net/a-complete-guide-for-creating-an-ai-assistant-for-summarizing-youtube-videos-part-2-a008ee18f341?sk=d59046b36a52c74dfa8befa99183e5b6).\n
|
335 |
+
[Whisper-Jax-api](https://sanchit-gandhi-whisper-jax.hf.space/)\n
|
336 |
""")
|
337 |
+
with gr.Accordion("Disclaimer",open=False):
|
338 |
gr.Markdown("""
|
339 |
1. This app attempts to download the transcript from Youtube first. If the transcript is not available, or the prompts require, the video will be transcribed.\n
|
340 |
2. The app performs best on videos in which the number of speakers is limited or when the YouTube transcript includes annotations of the speakers.\n
|