ElvisTsang commited on
Commit
e0d7f16
Β·
verified Β·
1 Parent(s): 42d3434

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -30
app.py CHANGED
@@ -1,47 +1,57 @@
 
1
  import streamlit as st
2
  from transformers import pipeline
3
 
4
  def img2text(url):
5
- image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
6
- text = image_to_text_model(url)[0]["generated_text"]
7
  return text
8
 
9
  def text2story(text):
10
- story_generator = pipeline("text-generation", model="deepseek-ai/DeepSeek-V3", trust_remote_code=True)
 
 
 
 
 
11
  prompt = f"Create a story of maximum 100 words based upon {text}"
12
- generated = story_generator(prompt, max_length=100)
13
- story_text = generated[0]['generated_text']
14
- return story_text
15
-
16
- def text2audio(story_text):
17
- audio_data = pipeline("text-to-speech", model="facebook/mms-tts-eng")
18
- return audio_data
19
-
20
- st.set_page_config(page_title="Once Upon A Time - Storytelling Application", page_icon="πŸ“–πŸ°πŸ¦„πŸ§™")
 
 
 
21
  st.header("Create a story of yours with an image!")
 
22
  uploaded_file = st.file_uploader("Upload an image of your story!")
23
 
24
  if uploaded_file is not None:
25
- print(uploaded_file)
26
- bytes_data = uploaded_file.getvalue()
27
- with open(uploaded_file.name, "wb") as file:
28
- file.write(bytes_data)
29
  st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
30
 
31
- st.text('Processing img2text...')
32
- scenario = img2text(uploaded_file.name)
33
- st.write(scenario)
 
34
 
35
- st.text('Generating a story...')
36
- story = text2story(scenario)
37
- st.write(story)
 
38
 
39
- st.text('Generating audio data...')
40
- audio_data =text2audio(story)
41
 
42
- if st.button("Story Time!"):
43
- st.audio(audio_data['audio'],
44
- format="audio/wav",
45
- start_time=0,
46
- sample_rate = audio_data['sampling_rate'])
47
- st.audio(audio_data)
 
1
+ import torch
2
  import streamlit as st
3
  from transformers import pipeline
4
 
5
  def img2text(url):
6
+ image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
7
+ text = image_to_text(url)[0]["generated_text"]
8
  return text
9
 
10
  def text2story(text):
11
+ generator = pipeline(
12
+ "text-generation",
13
+ model="deepseek-ai/DeepSeek-V3",
14
+ trust_remote_code=True,
15
+ torch_dtype=torch.float16,
16
+ device_map="auto")
17
  prompt = f"Create a story of maximum 100 words based upon {text}"
18
+ story = generator(prompt, max_new_tokens=100)[0]['generated_text']
19
+ return story
20
+
21
+ def text2audio(text):
22
+ synthesizer = pipeline(
23
+ "text-to-speech",
24
+ model="facebook/mms-tts-eng",
25
+ device_map="auto")
26
+ audio = synthesizer(text)
27
+ return audio
28
+
29
+ st.set_page_config(page_title="Once Upon A Time - Storytelling Application", page_icon="πŸ“–")
30
  st.header("Create a story of yours with an image!")
31
+
32
  uploaded_file = st.file_uploader("Upload an image of your story!")
33
 
34
  if uploaded_file is not None:
35
+ with open(uploaded_file.name, "wb") as f:
36
+ f.write(uploaded_file.getbuffer())
37
+
 
38
  st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
39
 
40
+ with st.spinner('Analyzing image...'):
41
+ scenario = img2text(uploaded_file.name)
42
+ st.subheader("Image Description")
43
+ st.write(scenario)
44
 
45
+ with st.spinner('Crafting your story...'):
46
+ story = text2story(scenario)
47
+ st.subheader("Your Story")
48
+ st.write(story)
49
 
50
+ with st.spinner('Generating audio...'):
51
+ audio_data = text2audio(story)
52
 
53
+ st.subheader("Story Time!")
54
+ st.audio(
55
+ audio_data["audio"],
56
+ format="audio/wav",
57
+ sample_rate=audio_data["sampling_rate"])