Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import streamlit as st
|
|
2 |
import re
|
3 |
import string
|
4 |
from transformers import pipeline
|
|
|
5 |
|
6 |
def img2text(url):
|
7 |
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", use_fast=True)
|
@@ -50,7 +51,12 @@ def text2story(text):
|
|
50 |
|
51 |
return final_story
|
52 |
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
def main():
|
56 |
st.set_page_config(page_title="Once Upon A Time - Storytelling Application", page_icon="📖")
|
@@ -73,7 +79,9 @@ def main():
|
|
73 |
story = text2story(scenario)
|
74 |
st.write(story)
|
75 |
|
76 |
-
|
|
|
|
|
77 |
|
78 |
if __name__ == "__main__":
|
79 |
main()
|
|
|
2 |
import re
|
3 |
import string
|
4 |
from transformers import pipeline
|
5 |
+
from datasets import load_dataset
|
6 |
|
7 |
def img2text(url):
|
8 |
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", use_fast=True)
|
|
|
51 |
|
52 |
return final_story
|
53 |
|
54 |
+
def text2audio(story_text):
|
55 |
+
audio_generator = pipeline("text-to-speech", "microsoft/speecht5_tts")
|
56 |
+
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
57 |
+
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
|
58 |
+
audio_output = audio_generator(story_text, forward_params={"speaker_embeddings": speaker_embedding})
|
59 |
+
return audio_output
|
60 |
|
61 |
def main():
|
62 |
st.set_page_config(page_title="Once Upon A Time - Storytelling Application", page_icon="📖")
|
|
|
79 |
story = text2story(scenario)
|
80 |
st.write(story)
|
81 |
|
82 |
+
st.text('Your story is going to be told...🎧')
|
83 |
+
audio_data = text2audio(story)
|
84 |
+
st.audio(audio_data['audio'], format="audio/wav", start_time=0, sample_rate=audio_data['sampling_rate'])
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
main()
|