Spaces:

blanchon
/

HiDream-ai-dev

Runtime error

App Files Files Community

HiDream-ai-dev / inference.py

blanchon

first commit

755bbb7 10 days ago

raw

history blame contribute delete

4.38 kB

	import torch
	import argparse
	from hi_diffusers import HiDreamImagePipeline
	from hi_diffusers import HiDreamImageTransformer2DModel
	from hi_diffusers.schedulers.fm_solvers_unipc import FlowUniPCMultistepScheduler
	from hi_diffusers.schedulers.flash_flow_match import FlashFlowMatchEulerDiscreteScheduler
	from transformers import LlamaForCausalLM, PreTrainedTokenizerFast
	parser = argparse.ArgumentParser()
	parser.add_argument("--model_type", type=str, default="dev")
	args = parser.parse_args()
	model_type = args.model_type
	MODEL_PREFIX = "HiDream-ai"
	LLAMA_MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct"

	# Model configurations
	MODEL_CONFIGS = {
	"dev": {
	"path": f"{MODEL_PREFIX}/HiDream-I1-Dev",
	"guidance_scale": 0.0,
	"num_inference_steps": 28,
	"shift": 6.0,
	"scheduler": FlashFlowMatchEulerDiscreteScheduler
	},
	"full": {
	"path": f"{MODEL_PREFIX}/HiDream-I1-Full",
	"guidance_scale": 5.0,
	"num_inference_steps": 50,
	"shift": 3.0,
	"scheduler": FlowUniPCMultistepScheduler
	},
	"fast": {
	"path": f"{MODEL_PREFIX}/HiDream-I1-Fast",
	"guidance_scale": 0.0,
	"num_inference_steps": 16,
	"shift": 3.0,
	"scheduler": FlashFlowMatchEulerDiscreteScheduler
	}
	}

	# Resolution options
	RESOLUTION_OPTIONS = [
	"1024 × 1024 (Square)",
	"768 × 1360 (Portrait)",
	"1360 × 768 (Landscape)",
	"880 × 1168 (Portrait)",
	"1168 × 880 (Landscape)",
	"1248 × 832 (Landscape)",
	"832 × 1248 (Portrait)"
	]

	# Load models
	def load_models(model_type):
	config = MODEL_CONFIGS[model_type]
	pretrained_model_name_or_path = config["path"]
	scheduler = FlowUniPCMultistepScheduler(num_train_timesteps=1000, shift=config["shift"], use_dynamic_shifting=False)

	tokenizer_4 = PreTrainedTokenizerFast.from_pretrained(
	LLAMA_MODEL_NAME,
	use_fast=False)

	text_encoder_4 = LlamaForCausalLM.from_pretrained(
	LLAMA_MODEL_NAME,
	output_hidden_states=True,
	output_attentions=True,
	torch_dtype=torch.bfloat16).to("cuda")

	transformer = HiDreamImageTransformer2DModel.from_pretrained(
	pretrained_model_name_or_path,
	subfolder="transformer",
	torch_dtype=torch.bfloat16).to("cuda")

	pipe = HiDreamImagePipeline.from_pretrained(
	pretrained_model_name_or_path,
	scheduler=scheduler,
	tokenizer_4=tokenizer_4,
	text_encoder_4=text_encoder_4,
	torch_dtype=torch.bfloat16
	).to("cuda", torch.bfloat16)
	pipe.transformer = transformer

	return pipe, config

	# Parse resolution string to get height and width
	def parse_resolution(resolution_str):
	if "1024 × 1024" in resolution_str:
	return 1024, 1024
	elif "768 × 1360" in resolution_str:
	return 768, 1360
	elif "1360 × 768" in resolution_str:
	return 1360, 768
	elif "880 × 1168" in resolution_str:
	return 880, 1168
	elif "1168 × 880" in resolution_str:
	return 1168, 880
	elif "1248 × 832" in resolution_str:
	return 1248, 832
	elif "832 × 1248" in resolution_str:
	return 832, 1248
	else:
	return 1024, 1024 # Default fallback

	# Generate image function
	def generate_image(pipe, model_type, prompt, resolution, seed):
	# Get configuration for current model
	config = MODEL_CONFIGS[model_type]
	guidance_scale = config["guidance_scale"]
	num_inference_steps = config["num_inference_steps"]

	# Parse resolution
	height, width = parse_resolution(resolution)

	# Handle seed
	if seed == -1:
	seed = torch.randint(0, 1000000, (1,)).item()

	generator = torch.Generator("cuda").manual_seed(seed)

	images = pipe(
	prompt,
	height=height,
	width=width,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	num_images_per_prompt=1,
	generator=generator
	).images

	return images[0], seed

	# Initialize with default model
	print("Loading default model (full)...")
	pipe, _ = load_models(model_type)
	print("Model loaded successfully!")
	prompt = "A cat holding a sign that says \"Hi-Dreams.ai\"."
	resolution = "1024 × 1024 (Square)"
	seed = -1
	image, seed = generate_image(pipe, model_type, prompt, resolution, seed)
	image.save("output.png")