Spaces:
Running
Running
File size: 3,254 Bytes
60897a4 8a64db3 9c0ac9b 92c86ce bbd9992 c868ac1 60897a4 f210a0f 6416df7 60897a4 b732924 5f4bf89 37b42cd 5e51a2a 5c5debf 6254442 9abd09b 9c0ac9b 5e51a2a 6416df7 c868ac1 e0d7f16 5c5debf 60897a4 5c5debf 6416df7 5c5debf 45e82e6 5c5debf 6416df7 5c5debf c868ac1 5c5debf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import streamlit as st
import re
import string
import torch
from transformers import pipeline
from datasets import load_dataset
def img2text(url):
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", use_fast=True)
text = image_to_text_model(url)[0]["generated_text"]
return text
def text2story(text):
story_generator = pipeline("text-generation", model="Qwen/Qwen2.5-1.5B", device_map="auto", return_full_text=False)
prompt = f"Give me a story under 100 words based upon: {text}."
generated = story_generator(prompt, max_new_tokens=140, do_sample=True)
story_text = generated[0]['generated_text']
# Remove leading punctuation if present
story_text = re.sub(r'^\s*[.,!?;:]+\s*', '', story_text)
story_text = story_text.lstrip()
# Capitalize the first alphabetic character if it's lowercase
for i, char in enumerate(story_text):
if char.isalpha():
if char.islower():
story_text = story_text[:i] + char.upper() + story_text[i+1:]
break
# Split into sentences
sentences = re.split(r'(?<=[.!?])\s+', story_text.strip())
# Initialize variables
current_word_count = 0
final_sentences = []
# Iterate through each sentence and accumulate until the word count is within 100
for sentence in sentences:
words = sentence.split()
word_count = len(words)
if current_word_count + word_count > 100:
break
final_sentences.append(sentence)
current_word_count += word_count
# Join the final sentences to form the story
final_story = ' '.join(final_sentences)
# Ensure it ends with a punctuation mark
if not final_story.endswith(('.', '!', '?')):
final_story += '.'
return final_story
def text2audio(story_text):
audio_generator = pipeline("text-to-speech", "microsoft/speecht5_tts")
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
audio_output = audio_generator(story_text, forward_params={"speaker_embeddings": speaker_embedding})
return audio_output
def main():
st.set_page_config(page_title="Once Upon A Time - Storytelling Application", page_icon="π")
st.header("Create a story of yours with an image!π§")
uploaded_file = st.file_uploader("Upload an image for creating your story!")
if uploaded_file is not None:
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
st.text('Entering the scene...π°')
scenario = img2text(uploaded_file.name)
st.write(scenario)
st.text('Your story is going to begin...π¦')
story = text2story(scenario)
st.write(story)
st.text('Your story is going to be told...π§')
audio_data = text2audio(story)
st.audio(audio_data['audio'], format="audio/wav", start_time=0, sample_rate=audio_data['sampling_rate'])
if __name__ == "__main__":
main() |