Spaces:

taesiri
/

PhotoshopRequests-Preview

Running

App Files Files Community

taesiri commited on Jan 31

Commit

632758a

verified ·

1 Parent(s): c384cc2

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -180

app.py CHANGED Viewed

@@ -1,192 +1,113 @@
 import gradio as gr
 from datasets import load_dataset
-from PIL import Image
-import io
-import time
-import os
-from datetime import datetime, timedelta
 import json
 access_token = os.environ.get("HUGGINGFACE_TOKEN")
-# Global variables
-dataset = None
-dataset_size = "Unknown"
-last_refresh_time = None
-REFRESH_INTERVAL = timedelta(hours=24)
-def load_and_prepare_dataset():
-    global dataset, dataset_size, last_refresh_time
-    dataset = load_dataset(
-        "taesiri/PhotoshopRequest-DailyDump-January-2025-RandomSample",
-        split="train",
-        streaming=True,
-        token=access_token,
-    )
-    # Get dataset info
-    dataset_info = dataset.info
-    dataset_size = (
-        dataset_info.splits["train"].num_examples
-        if dataset_info.splits.get("train")
-        else "Unknown"
-    )
-    last_refresh_time = datetime.now()
-def check_and_refresh_dataset():
-    global last_refresh_time
-    current_time = datetime.now()
-    if (
-        last_refresh_time is None
-        or (current_time - last_refresh_time) >= REFRESH_INTERVAL
-    ):
-        load_and_prepare_dataset()
-# Initial dataset load
-load_and_prepare_dataset()
-# (Optional) Load a different dataset for variety, as in your original code:
-dataset = load_dataset(
-    "taesiri/PhotoshopRequest-DailyDump",
-    split="train",
-    streaming=True,
-    token=access_token,
-)
-# Get dataset info
-dataset_info = dataset.info
-dataset_size = (
-    dataset_info.splits["train"].num_examples
-    if dataset_info.splits.get("train")
-    else "Unknown"
-)
-BUFFER_SIZE = 1
-sample_iterator = None
-sample_count = 0
-def reshuffle_dataset():
-    global sample_iterator, sample_count
-    seed = int(time.time())  # Convert current time to an integer for randomness
-    shuffled_dataset = dataset.shuffle(seed=seed, buffer_size=BUFFER_SIZE)
-    sample_iterator = iter(shuffled_dataset)
-    sample_count = 0
-reshuffle_dataset()  # Initial shuffle
-def get_next_samples(num_samples=5):
-    """
-    Fetch 'num_samples' items from the dataset and return
-    the text + source/edited images for each sample.
-    This yields 3 * num_samples outputs in a fixed order.
-    """
-    check_and_refresh_dataset()
-    global sample_count
-    results = []
-    for _ in range(num_samples):
-        if sample_count >= BUFFER_SIZE:
-            reshuffle_dataset()
-        sample = next(sample_iterator)
-        sample_count += 1
-        print(sample)
-        post_id = sample["post_id"]
-        title = sample["title"]
-        reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}"
-        selftext = ""
-        try:
-            selftext = json.loads(sample["json_data"])["post"]["selftext"]
-        except:
-            print("No selftext found")
-        markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})"
-        # Append the triple (post_info, source_image, edited_image)
-        results.append(markdown_text)
-        results.append(sample["source_image"])
-        results.append(sample["edited_image"])
-    return tuple(results)
-def update_info():
-    """
-    Return a small HTML snippet with dataset stats and last refresh time.
-    """
-    return f"""
-    <div style="text-align: center;">
-        <hr>
-        Dataset Size: {dataset_size} items<br>
-        Last Refreshed: {last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC') if last_refresh_time else 'Unknown'}
-    </div>
-    """
-# Build the Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# PhotoshopRequest Dataset Sampler")
-    gr.Markdown(
-        """
-        This is a preview of the PhotoshopRequest dataset. Each sample represents a Photoshop editing request post.
-        Click the 'Sample New Item' button to retrieve **5 random samples** from the dataset.
-        **Layout**: For each sample, you'll see:
-        1. A text block with the post info.
-        2. A row with two images (source on the left, edited on the right).
         """
-    )
-    # We'll define 5 sets of outputs, each is: (Markdown, source_image, edited_image).
-    # Sample 1
-    post_info1 = gr.Markdown()
-    with gr.Row():
-        source_image1 = gr.Image(label="Source Image 1")
-        edited_image1 = gr.Image(label="Edited Image 1")
-    # Sample 2
-    post_info2 = gr.Markdown()
-    with gr.Row():
-        source_image2 = gr.Image(label="Source Image 2")
-        edited_image2 = gr.Image(label="Edited Image 2")
-    # Sample 3
-    post_info3 = gr.Markdown()
-    with gr.Row():
-        source_image3 = gr.Image(label="Source Image 3")
-        edited_image3 = gr.Image(label="Edited Image 3")
-    # Sample 4
-    post_info4 = gr.Markdown()
-    with gr.Row():
-        source_image4 = gr.Image(label="Source Image 4")
-        edited_image4 = gr.Image(label="Edited Image 4")
-    # Sample 5
-    post_info5 = gr.Markdown()
-    with gr.Row():
-        source_image5 = gr.Image(label="Source Image 5")
-        edited_image5 = gr.Image(label="Edited Image 5")
-    sample_button = gr.Button("Sample New Item")
-    info_md = gr.Markdown()
-    # Map the outputs in the same order they are returned by get_next_samples
-    sample_button.click(
-        get_next_samples,
-        outputs=[
-            post_info1, source_image1, edited_image1,
-            post_info2, source_image2, edited_image2,
-            post_info3, source_image3, edited_image3,
-            post_info4, source_image4, edited_image4,
-            post_info5, source_image5, edited_image5
-        ]
-    ).then(update_info, outputs=[info_md])
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from datasets import load_dataset
 import json
+import random
+from datetime import datetime
+import os
+# Get access token from environment
 access_token = os.environ.get("HUGGINGFACE_TOKEN")
+class DatasetViewer:
+    def __init__(self):
+        self.dataset = None
+        self.dataset_size = 0
+        self.last_refresh_time = None
+        self.load_dataset()
+    def load_dataset(self):
+        """Load the complete dataset into memory"""
+        # Load the full dataset (non-streaming)
+        self.dataset = load_dataset(
+            "taesiri/PhotoshopRequest-DailyDump-January-2025-RandomSample",
+            split="train",
+            token=access_token
+        )
+        self.dataset_size = len(self.dataset)
+        self.last_refresh_time = datetime.now()
+    def get_next_samples(self, num_samples=5):
+        """Get random samples from the dataset"""
+        # Generate random indices
+        indices = random.sample(range(self.dataset_size), min(num_samples, self.dataset_size))
+        results = []
+        for idx in indices:
+            sample = self.dataset[idx]
+            # Get post information
+            post_id = sample["post_id"]
+            title = sample["title"]
+            reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}"
+            # Extract selftext if available
+            selftext = ""
+            try:
+                selftext = json.loads(sample["json_data"])["post"]["selftext"]
+            except:
+                print(f"No selftext found for post {post_id}")
+            # Create markdown text
+            markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})"
+            # Append the triple (post_info, source_image, edited_image)
+            results.append(markdown_text)
+            results.append(sample["source_image"])
+            results.append(sample["edited_image"])
+        return tuple(results)
+    def get_info(self):
+        """Return dataset information"""
+        return f"""
+        <div style="text-align: center;">
+            <hr>
+            Dataset Size: {self.dataset_size} items<br>
+            Last Refreshed: {self.last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC')}
+        </div>
         """
+def create_interface():
+    viewer = DatasetViewer()
+    with gr.Blocks() as demo:
+        gr.Markdown("# PhotoshopRequest Dataset Viewer")
+        gr.Markdown("""
+            This is a viewer for the PhotoshopRequest dataset. Each sample shows a Photoshop editing request post.
+            Click the 'Show New Samples' button to see **5 random samples** from the dataset.
+            **Layout**: For each sample, you'll see:
+            1. The post title and description
+            2. The source image (left) and edited result (right)
+        """)
+        # Create 5 sets of outputs
+        outputs = []
+        for i in range(5):
+            post_info = gr.Markdown()
+            outputs.append(post_info)
+            with gr.Row():
+                source = gr.Image(label=f"Source Image {i+1}")
+                edited = gr.Image(label=f"Edited Image {i+1}")
+                outputs.extend([source, edited])
+        sample_button = gr.Button("Show New Samples")
+        info_md = gr.Markdown()
+        # Set up event handlers
+        sample_button.click(
+            viewer.get_next_samples,
+            outputs=outputs
+        ).then(
+            viewer.get_info,
+            outputs=[info_md]
+        )
+    return demo
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()