import os import random import sys import json import argparse import contextlib from typing import Sequence, Mapping, Any, Union import torch import numpy as np import time from PIL import Image, ImageOps, ImageSequence from PIL.PngImagePlugin import PngInfo import datetime import uuid import gradio as gr from huggingface_hub import hf_hub_download import spaces token = os.environ.get("HF_TOKEN") hf_hub_download(repo_id="oimoyu/model", filename="chkp1.safetensors", local_dir="models/checkpoints") hf_hub_download(repo_id="oimoyu/model", filename="lora1.safetensors", local_dir="models/loras") hf_hub_download(repo_id="oimoyu/model", filename="lora2.safetensors", local_dir="models/loras") hf_hub_download(repo_id="oimoyu/model", filename="lora3.safetensors", local_dir="models/loras") def get_script_directory(): script_path = os.path.abspath(__file__) script_dir = os.path.dirname(script_path) return script_dir def safe_execute(func, *args, **kwargs): try: result = func(*args, **kwargs) return result except Exception as e: print(f"Error executing {func.__name__}: {e}") return None def cleanup_output(): trigger_probability = 0.015 keep_minutes = 30 min_files_threshold = 100 # at least keep n files if random.random() > trigger_probability: return None # print(list(os.walk("/tmp/gradio"))) for output_dir in ["/tmp/gradio", os.path.join(get_script_directory(), "temp")]: try: if not os.path.exists(output_dir): continue all_files = [] for root, dirs, files in os.walk(output_dir): # traverse all subdirectories for filename in files: filepath = os.path.join(root, filename) all_files.append(filepath) total_files = len(all_files) if total_files < min_files_threshold: # skip if too few files return current_time = time.time() time_threshold = current_time - (keep_minutes * 60) deleted_count = 0 deleted_files = [] for file_path in all_files: try: file_mtime = os.path.getctime(file_path) filename = os.path.basename(file_path) if file_mtime < time_threshold: # delete if older than threshold os.remove(file_path) deleted_files.append(filename) deleted_count += 1 except Exception as e: pass # ignore individual file errors # Remove empty directories (bottom-up traversal) deleted_dirs = 0 for root, dirs, files in os.walk(output_dir, topdown=False): if root == output_dir.rstrip('/'): # Skip the root output directory itself continue try: # Try to remove directory if it's empty if not os.listdir(root): # Check if directory is empty os.rmdir(root) deleted_dirs += 1 except Exception as e: pass # ignore directory removal errors print(f"cleanup done: dir: {output_dir}, deleted {deleted_count} files, {deleted_dirs} empty directories") except Exception as e: print(f"cleanup error:dir: {output_dir}, error: {str(e)}") def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any: # print(10000000000000000) try: # print(2000000000000000) return obj[index] except KeyError: # print(2000000000000000) return obj["result"][index] def find_path(name: str, path: str = None) -> str: """ Recursively looks at parent folders starting from the given path until it finds the given name. Returns the path as a Path object if found, or None otherwise. """ # If no path is given, use the current working directory if path is None: path = os.getcwd() # Check if the current directory contains the name if name in os.listdir(path): path_name = os.path.join(path, name) print(f"{name} found: {path_name}") return path_name # Get the parent directory parent_directory = os.path.dirname(path) # If the parent directory is the same as the current directory, we've reached the root and stop the search if parent_directory == path: return None # Recursively call the function with the parent directory return find_path(name, parent_directory) def add_comfyui_directory_to_sys_path() -> None: """ Add 'ComfyUI' to the sys.path """ comfyui_path = find_path("ComfyUI") if comfyui_path is not None and os.path.isdir(comfyui_path): sys.path.append(comfyui_path) import __main__ if getattr(__main__, "__file__", None) is None: __main__.__file__ = os.path.join(comfyui_path, "main.py") print(f"'{comfyui_path}' added to sys.path") def add_extra_model_paths() -> None: """ Parse the optional extra_model_paths.yaml file and add the parsed paths to the sys.path. """ from utils.extra_config import load_extra_path_config extra_model_paths = find_path("extra_model_paths.yaml") if extra_model_paths is not None: load_extra_path_config(extra_model_paths) else: print("Could not find the extra_model_paths config file.") def import_custom_nodes() -> None: """Find all custom nodes in the custom_nodes folder and add those node objects to NODE_CLASS_MAPPINGS This function sets up a new asyncio event loop, initializes the PromptServer, creates a PromptQueue, and initializes the custom nodes. """ import asyncio import execution from nodes import init_extra_nodes import server # Creating a new event loop and setting it as the default loop loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) # Creating an instance of PromptServer with the loop server_instance = server.PromptServer(loop) execution.PromptQueue(server_instance) # Initializing custom nodes init_extra_nodes(init_custom_nodes=True) def pil_to_tensor(image): if image.mode != 'RGB': image = image.convert('RGB') img_array = np.array(image, dtype=np.float32) / 255.0 img_tensor = torch.from_numpy(img_array).unsqueeze(0) return img_tensor PROMPT_DATA = json.loads("{}") add_comfyui_directory_to_sys_path() add_extra_model_paths() from nodes import NODE_CLASS_MAPPINGS import_custom_nodes() smz_cliptextencode = NODE_CLASS_MAPPINGS["smZ CLIPTextEncode"]() imagescaleby = NODE_CLASS_MAPPINGS["ImageScaleBy"]() vaeencode = NODE_CLASS_MAPPINGS["VAEEncode"]() applyfbcacheonmodel = NODE_CLASS_MAPPINGS["ApplyFBCacheOnModel"]() ksampler_efficient = NODE_CLASS_MAPPINGS["KSampler (Efficient)"]() cliptextencode = NODE_CLASS_MAPPINGS["CLIPTextEncode"]() ksampler = NODE_CLASS_MAPPINGS["KSampler"]() vaedecode = NODE_CLASS_MAPPINGS["VAEDecode"]() saveimage = NODE_CLASS_MAPPINGS["SaveImage"]() checkpointloadersimple = NODE_CLASS_MAPPINGS["CheckpointLoaderSimple"]() checkpointloadersimple_4 = checkpointloadersimple.load_checkpoint( ckpt_name="chkp1.safetensors" ) emptylatentimage = NODE_CLASS_MAPPINGS["EmptyLatentImage"]() clipsetlastlayer = NODE_CLASS_MAPPINGS["CLIPSetLastLayer"]() clipsetlastlayer_14 = clipsetlastlayer.set_last_layer( stop_at_clip_layer=-2, clip=get_value_at_index(checkpointloadersimple_4, 1) ) loraloader = NODE_CLASS_MAPPINGS["LoraLoader"]() loraloader_11 = loraloader.load_lora( lora_name="lora1.safetensors", strength_model=0.3, strength_clip=0.3, model=get_value_at_index(checkpointloadersimple_4, 0), clip=get_value_at_index(clipsetlastlayer_14, 0), ) loraloader_12 = loraloader.load_lora( lora_name="lora2.safetensors", strength_model=0.5, strength_clip=0.5, model=get_value_at_index(loraloader_11, 0), clip=get_value_at_index(loraloader_11, 1), ) loraloader_13 = loraloader.load_lora( lora_name="lora3.safetensors", strength_model=0.5, strength_clip=0.5, model=get_value_at_index(loraloader_12, 0), clip=get_value_at_index(loraloader_12, 1), ) applyfbcacheonmodel_16 = applyfbcacheonmodel.patch( object_to_patch="diffusion_model", residual_diff_threshold=0.2, start=0.7, end=1, max_consecutive_cache_hits=-1, model=get_value_at_index(loraloader_13, 0), ) wd14taggerpysssss = NODE_CLASS_MAPPINGS["WD14Tagger|pysssss"]() from comfy import model_management model_loaders = [checkpointloadersimple_4, loraloader_11, loraloader_12, loraloader_13, applyfbcacheonmodel_16] # model_loaders = [applyfbcacheonmodel_16] model_management.load_models_gpu([ loader[0].patcher if hasattr(loader[0], 'patcher') else loader[0] for loader in model_loaders ]) LOADED_MODEL = get_value_at_index(loraloader_13, 0) LOADED_CLIP = get_value_at_index(loraloader_13, 1) LOADED_VAE = get_value_at_index(checkpointloadersimple_4, 2) LOADED_WAVESPEED_MODEL = get_value_at_index(applyfbcacheonmodel_16, 0) @spaces.GPU(duration=60) def infer(prompt_input, negative_prompt_input, seed, width, height, guidance_scale, num_inference_steps): safe_execute(cleanup_output) start_time = time.time() consume_time_list = [] if seed <=0 : seed = random.randint(1, 2**64) with torch.inference_mode(): consume_time_list.append(time.time() - start_time - sum(consume_time_list)) emptylatentimage_5 = emptylatentimage.generate( width=width, height=height, batch_size=1 ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) # cliptextencode_6 = cliptextencode.encode( # text=prompt_input, # clip=LOADED_CLIP, # ) # consume_time_list.append(time.time() - start_time - sum(consume_time_list)) # cliptextencode_7 = cliptextencode.encode( # text=negative_prompt_input, # clip=LOADED_CLIP, # ) cliptextencode_6 = smz_cliptextencode.encode( text=prompt_input, parser="A1111", mean_normalization=True, multi_conditioning=True, use_old_emphasis_implementation=False, with_SDXL=False, # if use two text encode ascore=6, # Aesthetic Score width=1024, # unkonw height=1024, # unkonw crop_w=0, # unkonw crop_h=0, # unkonw target_width=1024, # unkonw target_height=1024, # unkonw text_g="", # Global Prompt text_l="", # Local Prompt smZ_steps=1, # unkonw clip=LOADED_CLIP, ) cliptextencode_7 = smz_cliptextencode.encode( text=negative_prompt_input, parser="A1111", mean_normalization=True, multi_conditioning=False, use_old_emphasis_implementation=False, with_SDXL=False, # if use two text encode ascore=6,# Aesthetic Score width=1024, # unkonw height=1024, # unkonw crop_w=0, # unkonw crop_h=0, # unkonw target_width=1024, # unkonw target_height=1024, # unkonw text_g="", # Global Prompt text_l="", # Local Prompt smZ_steps=1, # unkonw clip=LOADED_CLIP, ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) ksampler_efficient_23 = ksampler_efficient.sample( seed=seed, steps=num_inference_steps, cfg=guidance_scale, sampler_name="dpmpp_2m", scheduler="karras", denoise=1, preview_method="auto", vae_decode="true", model=LOADED_MODEL, positive=get_value_at_index(cliptextencode_6, 0), negative=get_value_at_index(cliptextencode_7, 0), latent_image=get_value_at_index(emptylatentimage_5, 0), optional_vae=LOADED_VAE, prompt=PROMPT_DATA, ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) if width < 1024 and height < 1024: image_tensor = get_value_at_index(ksampler_efficient_23, 5)[0] image_tensor = torch.clamp(image_tensor * 255.0, 0, 255) # calc to 255 on gpu image_uint8 = image_tensor.cpu().numpy().astype(np.uint8) # pillow_img = Image.fromarray(image_uint8) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) consume_time = time.time() - start_time print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] consume:{consume_time:.1f}s ({[f'{t:.1f}' for t in consume_time_list]})") return image_uint8 imagescaleby_17 = imagescaleby.upscale( upscale_method="bicubic", scale_by=1.5, image=get_value_at_index(ksampler_efficient_23, 5), ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) vaeencode_26 = vaeencode.encode( pixels=get_value_at_index(imagescaleby_17, 0), vae=LOADED_VAE, ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) ksampler_efficient_24 = ksampler_efficient.sample( seed=seed, steps=num_inference_steps, cfg=guidance_scale, sampler_name="dpmpp_2m", scheduler="karras", denoise=0.39, preview_method="auto", vae_decode="true", model=LOADED_WAVESPEED_MODEL, positive=get_value_at_index(ksampler_efficient_23, 1), negative=get_value_at_index(ksampler_efficient_23, 2), latent_image=get_value_at_index(vaeencode_26, 0), optional_vae=LOADED_VAE, prompt=PROMPT_DATA, ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) image_tensor = get_value_at_index(ksampler_efficient_24, 5)[0] image_tensor = torch.clamp(image_tensor * 255.0, 0, 255) # calc to 255 on gpu image_uint8 = image_tensor.cpu().numpy().astype(np.uint8) # pillow_img = Image.fromarray(image_uint8) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) consume_time = time.time() - start_time print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] consume:{consume_time:.1f}s ({[f'{t:.1f}' for t in consume_time_list]})") return image_uint8 def clamp_image_size(image, max_size=1280): width, height = image.size # 如果图片尺寸都小于等于max_size,使用原尺寸 if width > max_size or height > max_size: # 计算缩放比例 if width > height: # 宽度较大,以宽度为准 new_width = max_size new_height = int(height * max_size / width) else: # 高度较大,以高度为准 new_height = max_size new_width = int(width * max_size / height) # 使用LANCZOS重采样算法进行高质量缩放 image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) # 确保是RGB模式 if image.mode != 'RGB': image = image.convert('RGB') # 转换为numpy数组,然后转为PyTorch tensor img_array = np.array(image, dtype=np.float32) / 255.0 # 转为PyTorch tensor img_tensor = torch.from_numpy(img_array).unsqueeze(0) return img_tensor def string_to_pil(image): if image.startswith('data:image'): # 移除前缀 base64_str = image.split(',', 1)[1] # 解码base64 image_data = base64.b64decode(base64_str) # 转换为PIL图像 image_stream = io.BytesIO(image_data) pil_image = Image.open(image_stream) else: # 处理文件路径 pil_image = Image.open(image) return pil_image @spaces.GPU(duration=60) def infer_i2i(prompt_input, negative_prompt_input, image, seed, denoise_strength, guidance_scale, num_inference_steps): safe_execute(cleanup_output) start_time = time.time() consume_time_list = [] # image = string_to_pil(image) if seed <= 0: seed = random.randint(1, 2**64) with torch.inference_mode(): consume_time_list.append(time.time() - start_time - sum(consume_time_list)) # 钳制图片 image = clamp_image_size(image) emptylatentimage_5 = vaeencode.encode( pixels=image, vae=LOADED_VAE, ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) # cliptextencode_6 = cliptextencode.encode( # text=prompt_input, # clip=LOADED_CLIP, # ) # consume_time_list.append(time.time() - start_time - sum(consume_time_list)) # cliptextencode_7 = cliptextencode.encode( # text=negative_prompt_input, # clip=LOADED_CLIP, # ) cliptextencode_6 = smz_cliptextencode.encode( text=prompt_input, parser="A1111", mean_normalization=True, multi_conditioning=True, use_old_emphasis_implementation=False, with_SDXL=False, # if use two text encode ascore=6, # Aesthetic Score width=1024, # unkonw height=1024, # unkonw crop_w=0, # unkonw crop_h=0, # unkonw target_width=1024, # unkonw target_height=1024, # unkonw text_g="", # Global Prompt text_l="", # Local Prompt smZ_steps=1, # unkonw clip=LOADED_CLIP, ) cliptextencode_7 = smz_cliptextencode.encode( text=negative_prompt_input, parser="A1111", mean_normalization=True, multi_conditioning=False, use_old_emphasis_implementation=False, with_SDXL=False, # if use two text encode ascore=6,# Aesthetic Score width=1024, # unkonw height=1024, # unkonw crop_w=0, # unkonw crop_h=0, # unkonw target_width=1024, # unkonw target_height=1024, # unkonw text_g="", # Global Prompt text_l="", # Local Prompt smZ_steps=1, # unkonw clip=LOADED_CLIP, ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) ksampler_efficient_23 = ksampler_efficient.sample( seed=seed, steps=num_inference_steps, cfg=guidance_scale, sampler_name="dpmpp_2m", scheduler="karras", denoise=denoise_strength, preview_method="auto", vae_decode="true", model=LOADED_MODEL, positive=get_value_at_index(cliptextencode_6, 0), negative=get_value_at_index(cliptextencode_7, 0), latent_image=get_value_at_index(emptylatentimage_5, 0), optional_vae=LOADED_VAE, prompt=PROMPT_DATA, ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) imagescaleby_17 = imagescaleby.upscale( upscale_method="bicubic", scale_by=1.5, image=get_value_at_index(ksampler_efficient_23, 5), ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) vaeencode_26 = vaeencode.encode( pixels=get_value_at_index(imagescaleby_17, 0), vae=LOADED_VAE, ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) ksampler_efficient_24 = ksampler_efficient.sample( seed=seed, steps=num_inference_steps, cfg=guidance_scale, sampler_name="dpmpp_2m", scheduler="karras", denoise=0.39, preview_method="auto", vae_decode="true", model=get_value_at_index(applyfbcacheonmodel_16, 0), positive=get_value_at_index(ksampler_efficient_23, 1), negative=get_value_at_index(ksampler_efficient_23, 2), latent_image=get_value_at_index(vaeencode_26, 0), optional_vae=LOADED_VAE, prompt=PROMPT_DATA, ) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) image_tensor = get_value_at_index(ksampler_efficient_24, 5)[0] image_tensor = torch.clamp(image_tensor * 255.0, 0, 255) # calc to 255 on gpu image_uint8 = image_tensor.cpu().numpy().astype(np.uint8) # pillow_img = Image.fromarray(image_uint8) consume_time_list.append(time.time() - start_time - sum(consume_time_list)) consume_time = time.time() - start_time print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] consume:{consume_time:.1f}s ({[f'{t:.1f}' for t in consume_time_list]})") return image_uint8 from fastapi import HTTPException, Request expected_secret = os.environ.get("API_SECRET", "") print(expected_secret) def dep(request: Request): secret = request.headers.get("X-Secret") if expected_secret and secret != expected_secret: raise HTTPException( status_code=401, detail="Invalid secret", headers={"WWW-Authenticate": "X-Secret"} ) return {"authenticated": True} # @spaces.GPU(duration=60) def infer_wd14tagger(image): if image is None: return "Please upload an image first." with torch.inference_mode(): image_tensor = pil_to_tensor(image) wd14taggerpysssss_10 = wd14taggerpysssss.tag( model="wd-v1-4-moat-tagger-v2", threshold=0.35, character_threshold=0.85, replace_underscore=False, trailing_comma=False, exclude_tags="", image=image_tensor, ) wd14_result = get_value_at_index(wd14taggerpysssss_10, 0) result = "" if wd14_result: result = wd14_result[0] return result default_concurrency_limit = 6 if __name__ == "__main__": # 开启 Gradio 程序 # 开启 Gradio 程序 with gr.Blocks() as app: # 添加标题 gr.Markdown("# Your dream wifi generator") with gr.Tabs(): # Text-to-Image Tab with gr.TabItem("Text-to-Image"): with gr.Row(): # 添加输入 prompt_input = gr.Textbox( label="Prompt", placeholder="Enter your prompt here...", value="1boy" ) negative_prompt_input = gr.Textbox( label="Negative Prompt", placeholder="Enter your negative prompt here...", value="nsfw, lowres, {bad}, error, fewer, extra, missing, worst quality, jpeg artifacts, bad quality, watermark, unfinished, displeasing, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract]" ) use_negative_prompt = gr.Checkbox(label="Is use negative", value=True, visible=False) seed = gr.Slider( label="Seed", minimum=0, maximum=np.iinfo(np.int32).max, step=1, value=0, ) with gr.Row(visible=True): width = gr.Slider( label="Width", minimum=512, maximum=1280, step=64, value=832, ) height = gr.Slider( label="Height", minimum=512, maximum=1280, step=64, value=832, ) guidance_scale = gr.Slider( label="Guidance Scale", minimum=0.1, maximum=10, step=0.1, value=7.0, ) num_inference_steps = gr.Slider( label="Step", minimum=1, maximum=50, step=1, value=28, ) # 生成按钮 generate_btn = gr.Button("Generate") with gr.Column(): # 输出图像 output_image = gr.Image(label="Generated Image", show_label=False, format="JPEG") # 当点击按钮时,它将触发"generate_image"函数,该函数带有相应的输入 # 并且输出是一张图像 generate_btn.click( fn=infer, inputs=[prompt_input, negative_prompt_input, seed, width, height, guidance_scale, num_inference_steps], outputs=[output_image], concurrency_id="inference_queue" ) # Image-to-Image Tab with gr.TabItem("Image-to-Image"): with gr.Row(): # 添加输入 i2i_prompt_input = gr.Textbox( label="Prompt", placeholder="Enter your prompt here...", value="1boy" ) i2i_negative_prompt_input = gr.Textbox( label="Negative Prompt", placeholder="Enter your negative prompt here...", value="nsfw, lowres, {bad}, error, fewer, extra, missing, worst quality, jpeg artifacts, bad quality, watermark, unfinished, displeasing, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract]" ) input_image_component = gr.Image(type="pil", label="Input Image") i2i_use_negative_prompt = gr.Checkbox(label="Is use negative", value=True, visible=False) i2i_seed = gr.Slider( label="Seed", minimum=0, maximum=np.iinfo(np.int32).max, step=1, value=0, ) # Denoise strength for I2I denoise_strength = gr.Slider( label="Denoise Strength", minimum=0, maximum=1.0, step=0.05, value=0.75, info="Higher values will change the image more" ) i2i_guidance_scale = gr.Slider( label="Guidance Scale", minimum=0.1, maximum=10, step=0.1, value=7.0, ) i2i_num_inference_steps = gr.Slider( label="Step", minimum=1, maximum=50, step=1, value=28, ) # 生成按钮 i2i_generate_btn = gr.Button("Generate") with gr.Column(): # 输出图像 i2i_output_image = gr.Image(label="Generated Image", show_label=False, format="JPEG") i2i_generate_btn.click( fn=infer_i2i, inputs=[i2i_prompt_input, i2i_negative_prompt_input, input_image_component, i2i_seed, denoise_strength, i2i_guidance_scale, i2i_num_inference_steps], outputs=[i2i_output_image], concurrency_id="inference_queue" ) # WD14-Tagger with gr.TabItem("WD14-Tagger"): with gr.Row(): input_image = gr.Image(type="pil", label="Extract Image Tags",) generate_btn = gr.Button("Generate Tags") with gr.Column(): output_tags = gr.TextArea(label="Generated Tags", show_label=True) generate_btn.click( fn=infer_wd14tagger, inputs=[input_image], outputs=[output_tags], concurrency_id="inference_queue" ) app.queue( default_concurrency_limit=default_concurrency_limit, # 默认并发数,可以被单独事件设置覆盖 max_size=15 # 全局队列大小,不能被覆盖 ) app.launch(server_port=7860, auth_dependency=dep,server_name="0.0.0.0" )