Spaces:

taesiri
/

PhotoshopRequests-Preview

Running

App Files Files Community

PhotoshopRequests-Preview / app.py

taesiri

Update app.py

c384cc2 verified 5 months ago

raw

history blame

5.54 kB

	import gradio as gr
	from datasets import load_dataset
	from PIL import Image
	import io
	import time
	import os
	from datetime import datetime, timedelta
	import json

	access_token = os.environ.get("HUGGINGFACE_TOKEN")

	# Global variables
	dataset = None
	dataset_size = "Unknown"
	last_refresh_time = None
	REFRESH_INTERVAL = timedelta(hours=24)

	def load_and_prepare_dataset():
	global dataset, dataset_size, last_refresh_time

	dataset = load_dataset(
	"taesiri/PhotoshopRequest-DailyDump-January-2025-RandomSample",
	split="train",
	streaming=True,
	token=access_token,
	)

	# Get dataset info
	dataset_info = dataset.info
	dataset_size = (
	dataset_info.splits["train"].num_examples
	if dataset_info.splits.get("train")
	else "Unknown"
	)

	last_refresh_time = datetime.now()

	def check_and_refresh_dataset():
	global last_refresh_time
	current_time = datetime.now()
	if (
	last_refresh_time is None
	or (current_time - last_refresh_time) >= REFRESH_INTERVAL
	):
	load_and_prepare_dataset()

	# Initial dataset load
	load_and_prepare_dataset()

	# (Optional) Load a different dataset for variety, as in your original code:
	dataset = load_dataset(
	"taesiri/PhotoshopRequest-DailyDump",
	split="train",
	streaming=True,
	token=access_token,
	)

	# Get dataset info
	dataset_info = dataset.info
	dataset_size = (
	dataset_info.splits["train"].num_examples
	if dataset_info.splits.get("train")
	else "Unknown"
	)

	BUFFER_SIZE = 1
	sample_iterator = None
	sample_count = 0

	def reshuffle_dataset():
	global sample_iterator, sample_count
	seed = int(time.time()) # Convert current time to an integer for randomness
	shuffled_dataset = dataset.shuffle(seed=seed, buffer_size=BUFFER_SIZE)
	sample_iterator = iter(shuffled_dataset)
	sample_count = 0

	reshuffle_dataset() # Initial shuffle

	def get_next_samples(num_samples=5):
	"""
	Fetch 'num_samples' items from the dataset and return
	the text + source/edited images for each sample.
	This yields 3 * num_samples outputs in a fixed order.
	"""
	check_and_refresh_dataset()

	global sample_count

	results = []
	for _ in range(num_samples):
	if sample_count >= BUFFER_SIZE:
	reshuffle_dataset()

	sample = next(sample_iterator)
	sample_count += 1
	print(sample)

	post_id = sample["post_id"]
	title = sample["title"]
	reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}"

	selftext = ""
	try:
	selftext = json.loads(sample["json_data"])["post"]["selftext"]
	except:
	print("No selftext found")

	markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})"

	# Append the triple (post_info, source_image, edited_image)
	results.append(markdown_text)
	results.append(sample["source_image"])
	results.append(sample["edited_image"])

	return tuple(results)

	def update_info():
	"""
	Return a small HTML snippet with dataset stats and last refresh time.
	"""
	return f"""
	<div style="text-align: center;">
	<hr>
	Dataset Size: {dataset_size} items<br>
	Last Refreshed: {last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC') if last_refresh_time else 'Unknown'}
	</div>
	"""

	# Build the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# PhotoshopRequest Dataset Sampler")

	gr.Markdown(
	"""
	This is a preview of the PhotoshopRequest dataset. Each sample represents a Photoshop editing request post.
	Click the 'Sample New Item' button to retrieve 5 random samples from the dataset.

	Layout: For each sample, you'll see:
	1. A text block with the post info.
	2. A row with two images (source on the left, edited on the right).
	"""
	)

	# We'll define 5 sets of outputs, each is: (Markdown, source_image, edited_image).

	# Sample 1
	post_info1 = gr.Markdown()
	with gr.Row():
	source_image1 = gr.Image(label="Source Image 1")
	edited_image1 = gr.Image(label="Edited Image 1")

	# Sample 2
	post_info2 = gr.Markdown()
	with gr.Row():
	source_image2 = gr.Image(label="Source Image 2")
	edited_image2 = gr.Image(label="Edited Image 2")

	# Sample 3
	post_info3 = gr.Markdown()
	with gr.Row():
	source_image3 = gr.Image(label="Source Image 3")
	edited_image3 = gr.Image(label="Edited Image 3")

	# Sample 4
	post_info4 = gr.Markdown()
	with gr.Row():
	source_image4 = gr.Image(label="Source Image 4")
	edited_image4 = gr.Image(label="Edited Image 4")

	# Sample 5
	post_info5 = gr.Markdown()
	with gr.Row():
	source_image5 = gr.Image(label="Source Image 5")
	edited_image5 = gr.Image(label="Edited Image 5")

	sample_button = gr.Button("Sample New Item")
	info_md = gr.Markdown()

	# Map the outputs in the same order they are returned by get_next_samples
	sample_button.click(
	get_next_samples,
	outputs=[
	post_info1, source_image1, edited_image1,
	post_info2, source_image2, edited_image2,
	post_info3, source_image3, edited_image3,
	post_info4, source_image4, edited_image4,
	post_info5, source_image5, edited_image5
	]
	).then(update_info, outputs=[info_md])

	if __name__ == "__main__":
	demo.launch()