Spaces:

czhaobt
/

ISOM5240

Sleeping

File size: 1,928 Bytes

221ba92
 
 
 
fc48446
 
 
 
dadddbe
 
 
fc48446
221ba92
fc48446
dadddbe
 
 
fc48446
 
 
dadddbe
 
 
 
 
 
fc48446
 
 
 
 
 
 
dadddbe
221ba92
dadddbe
221ba92
dadddbe
 
df4ad39
cdac439
fc48446
dadddbe
 
fc48446
cdac439
dadddbe
 
 
fc48446
cdac439
dadddbe
 
df4ad39
dadddbe
 
 
 
 
 
 
 
 
cdac439

import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os

# function part
# img2text
def img2text(url):
    image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
    text = image_to_text_model(url)[0]["generated_text"]
    return text

# text2story
def text2story(text):
    story_generator = pipeline("text-generation", model="agentica-org/DeepScaleR-1.5B-Preview")
    story = story_generator(text, max_length=200, num_return_sequences=1)[0]['generated_text']
    return story

# text2audio
def text2audio(story_text):
    # 创建 gTTS 对象
    tts = gTTS(text=story_text, lang='en')
    # 保存音频文件
    audio_file = "story_audio.wav"
    tts.save(audio_file)
    return audio_file

st.set_page_config(page_title="Your Image to Audio Story",
                   page_icon="🦜")
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...")

if uploaded_file is not None:
    print(uploaded_file)
    bytes_data = uploaded_file.getvalue()
    with open(uploaded_file.name, "wb") as file:
        file.write(bytes_data)
    st.image(uploaded_file, caption="Uploaded Image",
             use_column_width=True)

    # Stage 1: Image to Text
    st.text('Processing img2text...')
    scenario = img2text(uploaded_file.name)
    st.write(scenario)

    # Stage 2: Text to Story
    st.text('Generating a story...')
    story = text2story(scenario)
    st.write(story)

    # Stage 3: Story to Audio data
    st.text('Generating audio data...')
    audio_file = text2audio(story)

    # Play button
    if st.button("Play Audio"):
        with open(audio_file, "rb") as audio:
            audio_bytes = audio.read()
        st.audio(audio_bytes,
                 format="audio/wav",
                 start_time=0)
    # 删除临时音频文件
    if os.path.exists(audio_file):
        os.remove(audio_file)