ElvisTsang commited on
Commit
6416df7
Β·
verified Β·
1 Parent(s): c401df4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -45
app.py CHANGED
@@ -1,57 +1,47 @@
1
- import torch
2
  import streamlit as st
3
  from transformers import pipeline
4
 
5
  def img2text(url):
6
- image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
7
- text = image_to_text(url)[0]["generated_text"]
8
  return text
9
 
10
  def text2story(text):
11
- generator = pipeline(
12
- "text-generation",
13
- model="deepseek-ai/DeepSeek-V3",
14
- trust_remote_code=True,
15
- torch_dtype=torch.float16,
16
- device_map="auto")
17
  prompt = f"Create a story of maximum 100 words based upon {text}"
18
- story = generator(prompt, max_new_tokens=100)[0]['generated_text']
19
- return story
20
-
21
- def text2audio(text):
22
- synthesizer = pipeline(
23
- "text-to-speech",
24
- model="facebook/mms-tts-eng",
25
- device_map="auto")
26
- audio = synthesizer(text)
27
- return audio
28
-
29
- st.set_page_config(page_title="Once Upon A Time - Storytelling Application", page_icon="πŸ“–")
30
- st.header("Create a story of yours with an image!")
31
 
 
 
32
  uploaded_file = st.file_uploader("Upload an image of your story!")
33
 
34
  if uploaded_file is not None:
35
- with open(uploaded_file.name, "wb") as f:
36
- f.write(uploaded_file.getbuffer())
37
-
38
- st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
39
-
40
- with st.spinner('Analyzing image...'):
41
- scenario = img2text(uploaded_file.name)
42
- st.subheader("Image Description")
43
- st.write(scenario)
44
-
45
- with st.spinner('Crafting your story...'):
46
- story = text2story(scenario)
47
- st.subheader("Your Story")
48
- st.write(story)
49
-
50
- with st.spinner('Generating audio...'):
51
- audio_data = text2audio(story)
52
-
53
- st.subheader("Story Time!")
54
- st.audio(
55
- audio_data["audio"],
56
- format="audio/wav",
57
- sample_rate=audio_data["sampling_rate"])
 
 
1
  import streamlit as st
2
  from transformers import pipeline
3
 
4
  def img2text(url):
5
+ image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
6
+ text = image_to_text_model(url)[0]["generated_text"]
7
  return text
8
 
9
  def text2story(text):
10
+ story_generator = pipeline("text-generation", model="google/flan-t5-base")
 
 
 
 
 
11
  prompt = f"Create a story of maximum 100 words based upon {text}"
12
+ generated = story_generator(prompt, max_length=100)
13
+ story_text = generated[0]['generated_text']
14
+ return story_text
15
+
16
+ def text2audio(story_text):
17
+ audio_data = pipeline("text-to-speech", model="facebook/mms-tts-eng")
18
+ return audio_data
 
 
 
 
 
 
19
 
20
+ st.set_page_config(page_title="Once Upon A Time - Storytelling Application", page_icon="πŸ“–πŸ°πŸ¦„πŸ§™")
21
+ st.header("Create a story of yours with an image!")
22
  uploaded_file = st.file_uploader("Upload an image of your story!")
23
 
24
  if uploaded_file is not None:
25
+ print(uploaded_file)
26
+ bytes_data = uploaded_file.getvalue()
27
+ with open(uploaded_file.name, "wb") as file:
28
+ file.write(bytes_data)
29
+ st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)
30
+
31
+ st.text('Processing img2text...')
32
+ scenario = img2text(uploaded_file.name)
33
+ st.write(scenario)
34
+
35
+ st.text('Generating a story...')
36
+ story = text2story(scenario)
37
+ st.write(story)
38
+
39
+ st.text('Generating audio data...')
40
+ audio_data =text2audio(story)
41
+
42
+ if st.button("Story Time!"):
43
+ st.audio(audio_data['audio'],
44
+ format="audio/wav",
45
+ start_time=0,
46
+ sample_rate = audio_data['sampling_rate'])
47
+ st.audio(audio_data)