Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,21 +1,19 @@
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
-
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
4 |
from qwen_vl_utils import process_vision_info
|
5 |
from PIL import Image
|
6 |
import torch
|
7 |
import os, time
|
8 |
-
from peft import PeftModel
|
9 |
|
10 |
# Load the model and processor
|
11 |
-
model =
|
12 |
"daniel3303/QwenStoryteller",
|
13 |
-
torch_dtype=torch.
|
14 |
device_map="auto"
|
15 |
)
|
16 |
processor = AutoProcessor.from_pretrained("daniel3303/QwenStoryteller")
|
17 |
|
18 |
-
|
19 |
@spaces.GPU()
|
20 |
@torch.no_grad()
|
21 |
def generate_story(file_paths, progress=gr.Progress(track_tqdm=True)):
|
@@ -23,7 +21,7 @@ def generate_story(file_paths, progress=gr.Progress(track_tqdm=True)):
|
|
23 |
images = [Image.open(file_path) for file_path in file_paths]
|
24 |
|
25 |
image_content = []
|
26 |
-
for img in images[:
|
27 |
image_content.append({
|
28 |
"type": "image",
|
29 |
"image": img,
|
@@ -34,7 +32,7 @@ def generate_story(file_paths, progress=gr.Progress(track_tqdm=True)):
|
|
34 |
messages = [
|
35 |
{
|
36 |
"role": "system",
|
37 |
-
"content": "You are an
|
38 |
},
|
39 |
{
|
40 |
"role": "user",
|
@@ -74,52 +72,48 @@ def generate_story(file_paths, progress=gr.Progress(track_tqdm=True)):
|
|
74 |
|
75 |
|
76 |
with gr.Blocks(fill_height=True) as demo:
|
77 |
-
gr.Markdown("# Qwen Storyteller \n## Upload up to
|
78 |
|
79 |
with gr.Row():
|
80 |
with gr.Column():
|
81 |
-
upload_button = gr.UploadButton("Upload up to
|
82 |
output_file = gr.File(label="Uploaded Files")
|
83 |
gen_button = gr.Button("Generate", variant="primary")
|
84 |
|
85 |
with gr.Column():
|
86 |
-
outputs = gr.Markdown(label="Generated Story", show_copy_button=True
|
87 |
|
88 |
with gr.Row():
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
"""
|
116 |
-
)
|
117 |
-
with gr.Column():
|
118 |
-
gr.Markdown("")
|
119 |
|
120 |
upload_button.upload(lambda files: [f.name for f in files], upload_button, output_file)
|
121 |
|
122 |
gen_button.click(generate_story, upload_button, outputs)
|
123 |
|
124 |
if __name__ == "__main__":
|
125 |
-
demo.queue().launch(show_error=True)
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
+
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
4 |
from qwen_vl_utils import process_vision_info
|
5 |
from PIL import Image
|
6 |
import torch
|
7 |
import os, time
|
|
|
8 |
|
9 |
# Load the model and processor
|
10 |
+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
11 |
"daniel3303/QwenStoryteller",
|
12 |
+
torch_dtype=torch.float16,
|
13 |
device_map="auto"
|
14 |
)
|
15 |
processor = AutoProcessor.from_pretrained("daniel3303/QwenStoryteller")
|
16 |
|
|
|
17 |
@spaces.GPU()
|
18 |
@torch.no_grad()
|
19 |
def generate_story(file_paths, progress=gr.Progress(track_tqdm=True)):
|
|
|
21 |
images = [Image.open(file_path) for file_path in file_paths]
|
22 |
|
23 |
image_content = []
|
24 |
+
for img in images[:10]: # Limit to 6 images
|
25 |
image_content.append({
|
26 |
"type": "image",
|
27 |
"image": img,
|
|
|
32 |
messages = [
|
33 |
{
|
34 |
"role": "system",
|
35 |
+
"content": "You are an AI storyteller that can analyze sequences of images and create creative narratives. First think step-by-step to analyze characters, objects, settings, and narrative structure. Then create a grounded story that maintains consistent character identity and object references across frames. Use π§ tags to show your reasoning process before writing the final story."
|
36 |
},
|
37 |
{
|
38 |
"role": "user",
|
|
|
72 |
|
73 |
|
74 |
with gr.Blocks(fill_height=True) as demo:
|
75 |
+
gr.Markdown("# Qwen Storyteller \n## Upload up to 10 images to generate a creative story.")
|
76 |
|
77 |
with gr.Row():
|
78 |
with gr.Column():
|
79 |
+
upload_button = gr.UploadButton("Upload up to 10 images", file_types=["image"], file_count="multiple")
|
80 |
output_file = gr.File(label="Uploaded Files")
|
81 |
gen_button = gr.Button("Generate", variant="primary")
|
82 |
|
83 |
with gr.Column():
|
84 |
+
outputs = gr.Markdown(label="Generated Story", show_copy_button=True)
|
85 |
|
86 |
with gr.Row():
|
87 |
+
gr.Markdown(
|
88 |
+
"""
|
89 |
+
### Key Features
|
90 |
+
* Cross-Frame Consistency: Maintains consistent character and object identity across multiple frames through visual similarity and face recognition techniques
|
91 |
+
* Structured Reasoning: Employs chain-of-thought reasoning to analyze scenes with explicit modeling of characters, objects, settings, and narrative structure
|
92 |
+
* Grounded Storytelling: Uses specialized XML tags to link narrative elements directly to visual entities
|
93 |
+
* Reduced Hallucinations: Achieves 12.3% fewer hallucinations compared to the non-fine-tuned base model
|
94 |
+
|
95 |
+
Model trained by daniel3303, [repository here.](https://huggingface.co/daniel3303/QwenStoryteller)
|
96 |
+
"""
|
97 |
+
)
|
98 |
+
gr.Markdown(
|
99 |
+
"""
|
100 |
+
```
|
101 |
+
@misc{oliveira2025storyreasoningdatasetusingchainofthought,
|
102 |
+
title={StoryReasoning Dataset: Using Chain-of-Thought for Scene Understanding and Grounded Story Generation},
|
103 |
+
author={Daniel A. P. Oliveira and David Martins de Matos},
|
104 |
+
year={2025},
|
105 |
+
eprint={2505.10292},
|
106 |
+
archivePrefix={arXiv},
|
107 |
+
primaryClass={cs.CV},
|
108 |
+
url={https://arxiv.org/abs/2505.10292},
|
109 |
+
}
|
110 |
+
```
|
111 |
+
"""
|
112 |
+
)
|
|
|
|
|
|
|
|
|
113 |
|
114 |
upload_button.upload(lambda files: [f.name for f in files], upload_button, output_file)
|
115 |
|
116 |
gen_button.click(generate_story, upload_button, outputs)
|
117 |
|
118 |
if __name__ == "__main__":
|
119 |
+
demo.queue().launch(show_error=True)
|