Spaces:

ElvisTsang
/

OnceUponATime

Running

App Files Files Community

ElvisTsang commited on Mar 3

Commit

e0d7f16

verified ·

1 Parent(s): 42d3434

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -30

app.py CHANGED Viewed

@@ -1,47 +1,57 @@
 import streamlit as st
 from transformers import pipeline
 def img2text(url):
-    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-    text = image_to_text_model(url)[0]["generated_text"]
     return text
 def text2story(text):
-    story_generator = pipeline("text-generation", model="deepseek-ai/DeepSeek-V3", trust_remote_code=True)
     prompt = f"Create a story of maximum 100 words based upon {text}"
-    generated = story_generator(prompt, max_length=100)
-    story_text = generated[0]['generated_text']
-    return story_text
-def text2audio(story_text):
-    audio_data = pipeline("text-to-speech", model="facebook/mms-tts-eng")
-    return audio_data
-st.set_page_config(page_title="Once Upon A Time - Storytelling Application", page_icon="📖🏰🦄🧙")
 st.header("Create a story of yours with an image!")
 uploaded_file = st.file_uploader("Upload an image of your story!")
 if uploaded_file is not None:
-    print(uploaded_file)
-    bytes_data = uploaded_file.getvalue()
-    with open(uploaded_file.name, "wb") as file:
-        file.write(bytes_data)
     st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
-    st.text('Processing img2text...')
-    scenario = img2text(uploaded_file.name)
-    st.write(scenario)
-    st.text('Generating a story...')
-    story = text2story(scenario)
-    st.write(story)
-    st.text('Generating audio data...')
-    audio_data =text2audio(story)
-    if st.button("Story Time!"):
-        st.audio(audio_data['audio'],
-                    format="audio/wav",
-                    start_time=0,
-                    sample_rate = audio_data['sampling_rate'])
-        st.audio(audio_data)

+import torch
 import streamlit as st
 from transformers import pipeline
 def img2text(url):
+    image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+    text = image_to_text(url)[0]["generated_text"]
     return text
 def text2story(text):
+    generator = pipeline(
+        "text-generation",
+        model="deepseek-ai/DeepSeek-V3",
+        trust_remote_code=True,
+        torch_dtype=torch.float16,
+        device_map="auto")
     prompt = f"Create a story of maximum 100 words based upon {text}"
+    story = generator(prompt, max_new_tokens=100)[0]['generated_text']
+    return story
+def text2audio(text):
+    synthesizer = pipeline(
+        "text-to-speech",
+        model="facebook/mms-tts-eng",
+        device_map="auto")
+    audio = synthesizer(text)
+    return audio
+st.set_page_config(page_title="Once Upon A Time - Storytelling Application", page_icon="📖")
 st.header("Create a story of yours with an image!")
 uploaded_file = st.file_uploader("Upload an image of your story!")
 if uploaded_file is not None:
+    with open(uploaded_file.name, "wb") as f:
+        f.write(uploaded_file.getbuffer())
     st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
+    with st.spinner('Analyzing image...'):
+        scenario = img2text(uploaded_file.name)
+        st.subheader("Image Description")
+        st.write(scenario)
+    with st.spinner('Crafting your story...'):
+        story = text2story(scenario)
+        st.subheader("Your Story")
+        st.write(story)
+    with st.spinner('Generating audio...'):
+        audio_data = text2audio(story)
+    st.subheader("Story Time!")
+    st.audio(
+        audio_data["audio"],
+        format="audio/wav",
+        sample_rate=audio_data["sampling_rate"])