Spaces:

LPX55
/

QwenStoryteller

Running on Zero

App Files Files Community

LPX55 commited on May 22

Commit

24db381

verified ·

1 Parent(s): 54fce6e

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -41

app.py CHANGED Viewed

@@ -1,21 +1,19 @@
 import spaces
 import gradio as gr
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, AutoModelForVision2Seq
 from qwen_vl_utils import process_vision_info
 from PIL import Image
 import torch
 import os, time
-from peft import PeftModel
 # Load the model and processor
-model = AutoModelForVision2Seq.from_pretrained(
     "daniel3303/QwenStoryteller",
-    torch_dtype=torch.bfloat16,
     device_map="auto"
 )
 processor = AutoProcessor.from_pretrained("daniel3303/QwenStoryteller")
 @spaces.GPU()
 @torch.no_grad()
 def generate_story(file_paths, progress=gr.Progress(track_tqdm=True)):
@@ -23,7 +21,7 @@ def generate_story(file_paths, progress=gr.Progress(track_tqdm=True)):
     images = [Image.open(file_path) for file_path in file_paths]
     image_content = []
-    for img in images[:12]:  # Limit to 6 images
         image_content.append({
             "type": "image",
             "image": img,
@@ -34,7 +32,7 @@ def generate_story(file_paths, progress=gr.Progress(track_tqdm=True)):
     messages = [
         {
             "role": "system",
-            "content": "You are an English speaking AI storyteller that can analyze sequences of images and create creative narratives. First think step-by-step to analyze characters, objects, settings, and narrative structure. Then create a grounded story that maintains consistent character identity and object references across frames. Use `<think>` tags to show your reasoning process before writing the final story."
         },
         {
             "role": "user",
@@ -74,52 +72,48 @@ def generate_story(file_paths, progress=gr.Progress(track_tqdm=True)):
 with gr.Blocks(fill_height=True) as demo:
-    gr.Markdown("# Qwen Storyteller \n## Upload up to 6 images to generate a creative story.")
     with gr.Row():
         with gr.Column():
-            upload_button = gr.UploadButton("Upload up to 12 images", file_types=["image"], file_count="multiple")
             output_file = gr.File(label="Uploaded Files")
             gen_button = gr.Button("Generate", variant="primary")
         with gr.Column():
-            outputs = gr.Markdown(label="Generated Story", show_copy_button=True, container=True)
     with gr.Row():
-        with gr.Column():
-            gr.Markdown(
-                """
-                ### Key Features
-                * Cross-Frame Consistency: Maintains consistent character and object identity across multiple frames through visual similarity and face recognition techniques
-                * Structured Reasoning: Employs chain-of-thought reasoning to analyze scenes with explicit modeling of characters, objects, settings, and narrative structure
-                * Grounded Storytelling: Uses specialized XML tags to link narrative elements directly to visual entities
-                * Reduced Hallucinations: Achieves 12.3% fewer hallucinations compared to the non-fine-tuned base model
-                Model trained by daniel3303, [repository here.](https://huggingface.co/daniel3303/QwenStoryteller)
-                """
-            )
-            with gr.Accordion():
-                gr.Markdown(
-                    """
-                    ```
-                    @misc{oliveira2025storyreasoningdatasetusingchainofthought,
-                          title={StoryReasoning Dataset: Using Chain-of-Thought for Scene Understanding and Grounded Story Generation},
-                          author={Daniel A. P. Oliveira and David Martins de Matos},
-                          year={2025},
-                          eprint={2505.10292},
-                          archivePrefix={arXiv},
-                          primaryClass={cs.CV},
-                          url={https://arxiv.org/abs/2505.10292},
-                    }
-                    ```
-                    """
-                )
-        with gr.Column():
-            gr.Markdown("")
     upload_button.upload(lambda files: [f.name for f in files], upload_button, output_file)
     gen_button.click(generate_story, upload_button, outputs)
 if __name__ == "__main__":
-    demo.queue().launch(show_error=True)

 import spaces
 import gradio as gr
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
 from qwen_vl_utils import process_vision_info
 from PIL import Image
 import torch
 import os, time
 # Load the model and processor
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     "daniel3303/QwenStoryteller",
+    torch_dtype=torch.float16,
     device_map="auto"
 )
 processor = AutoProcessor.from_pretrained("daniel3303/QwenStoryteller")
 @spaces.GPU()
 @torch.no_grad()
 def generate_story(file_paths, progress=gr.Progress(track_tqdm=True)):
     images = [Image.open(file_path) for file_path in file_paths]
     image_content = []
+    for img in images[:10]:  # Limit to 6 images
         image_content.append({
             "type": "image",
             "image": img,
     messages = [
         {
             "role": "system",
+            "content": "You are an AI storyteller that can analyze sequences of images and create creative narratives. First think step-by-step to analyze characters, objects, settings, and narrative structure. Then create a grounded story that maintains consistent character identity and object references across frames. Use 🧠 tags to show your reasoning process before writing the final story."
         },
         {
             "role": "user",
 with gr.Blocks(fill_height=True) as demo:
+    gr.Markdown("# Qwen Storyteller \n## Upload up to 10 images to generate a creative story.")
     with gr.Row():
         with gr.Column():
+            upload_button = gr.UploadButton("Upload up to 10 images", file_types=["image"], file_count="multiple")
             output_file = gr.File(label="Uploaded Files")
             gen_button = gr.Button("Generate", variant="primary")
         with gr.Column():
+            outputs = gr.Markdown(label="Generated Story", show_copy_button=True)
     with gr.Row():
+        gr.Markdown(
+            """
+            ### Key Features
+            * Cross-Frame Consistency: Maintains consistent character and object identity across multiple frames through visual similarity and face recognition techniques
+            * Structured Reasoning: Employs chain-of-thought reasoning to analyze scenes with explicit modeling of characters, objects, settings, and narrative structure
+            * Grounded Storytelling: Uses specialized XML tags to link narrative elements directly to visual entities
+            * Reduced Hallucinations: Achieves 12.3% fewer hallucinations compared to the non-fine-tuned base model
+            Model trained by daniel3303, [repository here.](https://huggingface.co/daniel3303/QwenStoryteller)
+            """
+        )
+        gr.Markdown(
+            """
+            ```
+            @misc{oliveira2025storyreasoningdatasetusingchainofthought,
+                  title={StoryReasoning Dataset: Using Chain-of-Thought for Scene Understanding and Grounded Story Generation},
+                  author={Daniel A. P. Oliveira and David Martins de Matos},
+                  year={2025},
+                  eprint={2505.10292},
+                  archivePrefix={arXiv},
+                  primaryClass={cs.CV},
+                  url={https://arxiv.org/abs/2505.10292},
+            }
+            ```
+            """
+        )
     upload_button.upload(lambda files: [f.name for f in files], upload_button, output_file)
     gen_button.click(generate_story, upload_button, outputs)
 if __name__ == "__main__":
+    demo.queue().launch(show_error=True)