Upload 20 files
Browse files- README.md +57 -1
- app.py +154 -0
- feature_extractor/preprocessor_config.json +20 -0
- model_index.json +32 -0
- pipeline.py +22 -0
- requirements.txt +6 -0
- safety_checker/config.json +179 -0
- safety_checker/pytorch_model.bin +3 -0
- scheduler/scheduler_config.json +13 -0
- text_encoder/config.json +25 -0
- text_encoder/pytorch_model.bin +3 -0
- tokenizer/merges.txt +0 -0
- tokenizer/special_tokens_map.json +24 -0
- tokenizer/tokenizer_config.json +34 -0
- tokenizer/vocab.json +0 -0
- unet/config.json +36 -0
- unet/diffusion_pytorch_model.bin +3 -0
- vae/config.json +29 -0
- vae/diffusion_pytorch_model.bin +3 -0
README.md
CHANGED
@@ -1,3 +1,59 @@
|
|
1 |
---
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Hassanblend1.5
|
3 |
+
emoji: 📚
|
4 |
+
colorFrom: green
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 3.11.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
thumbnail: https://i.imgur.com/PVThZvk.png
|
11 |
+
license: creativeml-openrail-m
|
12 |
+
tags:
|
13 |
+
- text-to-image
|
14 |
+
inference: true
|
15 |
---
|
16 |
+
|
17 |
+
|
18 |
+
# HassanBlend1.5
|
19 |
+
I am hassan, I created HassansBlend, the latest version currently is 1.5. I continue to iterate and improve on this model over time. Feel free to check out our discord or rentry page for more examples with prompts and outputs generated.
|
20 |
+
|
21 |
+
This blend is finetuned over SD1.5 with thousands of images included in the dataset it was trained with. Along with that there are some minor merges added in just to soften it up and increase the creativity.
|
22 |
+
I have also some custom created content such as enhancement hypernetworks/embeddings etc for patreons or KoFi subscribers only on my pages below
|
23 |
+
<b> Links </b><br>
|
24 |
+
<b>Patreon</b>
|
25 |
+
<a href="https://www.patreon.com/sd_hassan" target="_blank"><img src="https://i.imgur.com/sR32SqJ.jpg"></img></a>
|
26 |
+
<b>KoFi</b>
|
27 |
+
<a href="https://ko-fi.com/sdhassan" target="_blank"><img src="https://i.imgur.com/0P7CTN4.png"></img></a>
|
28 |
+
<b>Discord</b>
|
29 |
+
<a href="https://discord.gg/sdmodelers" target="_blank"><img src="https://i.imgur.com/HC1iHwg.png"></img></a>
|
30 |
+
### Quicklinks:
|
31 |
+
|
32 |
+
* [Latest Setup](https://rentry.org/sdhassan#current-setup)
|
33 |
+
* [HassanBlend Model Finetune Updates](https://rentry.org/sdhassan#hassanblend-finetuning-updates)
|
34 |
+
* [Latest Patreon Posts](https://rentry.org/sdhassan#patreon-posts)
|
35 |
+
* [Models](https://rentry.org/sdhassan#models)
|
36 |
+
* [HassanBlend1.5](https://rentry.org/sdhassan#hassanblend15-downloads)
|
37 |
+
* [HassanBlend1.4](https://rentry.org/sdhassan#hassanblend14-downloads)
|
38 |
+
* [Prompts](https://rentry.org/sdhassan#prompts)
|
39 |
+
* [Photorealistic Tips](https://rentry.org/sdhassan#tips-for-photorealistic-images)
|
40 |
+
* [Embeddings](https://rentry.org/sdhassan#embeddings)
|
41 |
+
* [Hypernetworks](https://rentry.org/sdhassan#hypernetworks)
|
42 |
+
* [Wildcards](https://rentry.org/sdhassan#wildcards-i-made)
|
43 |
+
* [MyTools](https://rentry.org/sdhassan#my-tools)
|
44 |
+
* [Settings I use](https://rentry.org/sdhassan#settings)
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
Model details and examples with sample prompts: https://rentry.org/sdhassan
|
49 |
+
|
50 |
+
|
51 |
+
## License
|
52 |
+
|
53 |
+
This model is open access and available to all, with a CreativeML OpenRAIL-M license further specifying rights and usage.
|
54 |
+
The CreativeML OpenRAIL License specifies:
|
55 |
+
|
56 |
+
1. You can't use the model to deliberately produce nor share illegal or harmful outputs or content
|
57 |
+
2. The authors claims no rights on the outputs you generate, you are free to use them and are accountable for their use which must not go against the provisions set in the license
|
58 |
+
3. You may re-distribute the weights and use the model commercially and/or as a service. If you do, please be aware you have to include the same use restrictions as the ones in the license and share a copy of the CreativeML OpenRAIL-M to all your users (please read the license entirely and carefully)
|
59 |
+
[Please read the full license here](https://huggingface.co/spaces/CompVis/stable-diffusion-license)
|
app.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, DPMSolverMultistepScheduler
|
2 |
+
import gradio as gr
|
3 |
+
import torch
|
4 |
+
from PIL import Image
|
5 |
+
|
6 |
+
model_id = 'hassanblend/HassanBlend1.5'
|
7 |
+
prefix = ''
|
8 |
+
|
9 |
+
scheduler = DPMSolverMultistepScheduler(
|
10 |
+
beta_start=0.00085,
|
11 |
+
beta_end=0.012,
|
12 |
+
beta_schedule="scaled_linear",
|
13 |
+
num_train_timesteps=1000,
|
14 |
+
trained_betas=None,
|
15 |
+
predict_epsilon=True,
|
16 |
+
thresholding=False,
|
17 |
+
algorithm_type="dpmsolver++",
|
18 |
+
solver_type="midpoint",
|
19 |
+
lower_order_final=True,
|
20 |
+
)
|
21 |
+
|
22 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
23 |
+
model_id,
|
24 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
25 |
+
scheduler=scheduler)
|
26 |
+
|
27 |
+
pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(
|
28 |
+
model_id,
|
29 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
30 |
+
scheduler=scheduler)
|
31 |
+
|
32 |
+
if torch.cuda.is_available():
|
33 |
+
pipe = pipe.to("cuda")
|
34 |
+
pipe_i2i = pipe_i2i.to("cuda")
|
35 |
+
|
36 |
+
def error_str(error, title="Error"):
|
37 |
+
return f"""#### {title}
|
38 |
+
{error}""" if error else ""
|
39 |
+
|
40 |
+
def inference(prompt, guidance, steps, width=512, height=512, seed=0, img=None, strength=0.5, neg_prompt="", auto_prefix=True):
|
41 |
+
|
42 |
+
generator = torch.Generator('cuda').manual_seed(seed) if seed != 0 else None
|
43 |
+
prompt = f"{prefix} {prompt}" if auto_prefix else prompt
|
44 |
+
|
45 |
+
try:
|
46 |
+
if img is not None:
|
47 |
+
return img_to_img(prompt, neg_prompt, img, strength, guidance, steps, width, height, generator), None
|
48 |
+
else:
|
49 |
+
return txt_to_img(prompt, neg_prompt, guidance, steps, width, height, generator), None
|
50 |
+
except Exception as e:
|
51 |
+
return None, error_str(e)
|
52 |
+
|
53 |
+
def txt_to_img(prompt, neg_prompt, guidance, steps, width, height, generator):
|
54 |
+
|
55 |
+
result = pipe(
|
56 |
+
prompt,
|
57 |
+
negative_prompt = neg_prompt,
|
58 |
+
num_inference_steps = int(steps),
|
59 |
+
guidance_scale = guidance,
|
60 |
+
width = width,
|
61 |
+
height = height,
|
62 |
+
generator = generator)
|
63 |
+
|
64 |
+
return replace_nsfw_images(result)
|
65 |
+
|
66 |
+
def img_to_img(prompt, neg_prompt, img, strength, guidance, steps, width, height, generator):
|
67 |
+
|
68 |
+
ratio = min(height / img.height, width / img.width)
|
69 |
+
img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
|
70 |
+
result = pipe_i2i(
|
71 |
+
prompt,
|
72 |
+
negative_prompt = neg_prompt,
|
73 |
+
init_image = img,
|
74 |
+
num_inference_steps = int(steps),
|
75 |
+
strength = strength,
|
76 |
+
guidance_scale = guidance,
|
77 |
+
width = width,
|
78 |
+
height = height,
|
79 |
+
generator = generator)
|
80 |
+
|
81 |
+
return replace_nsfw_images(result)
|
82 |
+
|
83 |
+
def replace_nsfw_images(results):
|
84 |
+
|
85 |
+
for i in range(len(results.images)):
|
86 |
+
if results.nsfw_content_detected[i]:
|
87 |
+
results.images[i] = Image.open("nsfw.png")
|
88 |
+
return results.images[0]
|
89 |
+
|
90 |
+
css = """.main-div div{display:inline-flex;align-items:center;gap:.8rem;font-size:1.75rem}.main-div div h1{font-weight:900;margin-bottom:7px}.main-div p{margin-bottom:10px;font-size:94%}a{text-decoration:underline}.tabs{margin-top:0;margin-bottom:0}#gallery{min-height:20rem}
|
91 |
+
"""
|
92 |
+
with gr.Blocks(css=css) as demo:
|
93 |
+
gr.HTML(
|
94 |
+
f"""
|
95 |
+
<div class="main-div">
|
96 |
+
<div>
|
97 |
+
<h1>Hassanblend1.5</h1>
|
98 |
+
</div>
|
99 |
+
<p>
|
100 |
+
Demo for <a href="https://huggingface.co/hassanblend/HassanBlend1.5">Hassanblend1.5</a> Stable Diffusion model.<br>
|
101 |
+
Add the following tokens to your prompts for the model to work properly: <b></b>.
|
102 |
+
</p>
|
103 |
+
Running on <b>{"GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"}</b>
|
104 |
+
</div>
|
105 |
+
"""
|
106 |
+
)
|
107 |
+
with gr.Row():
|
108 |
+
|
109 |
+
with gr.Column(scale=55):
|
110 |
+
with gr.Group():
|
111 |
+
with gr.Row():
|
112 |
+
prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder=f"{prefix} [your prompt]").style(container=False)
|
113 |
+
generate = gr.Button(value="Generate").style(rounded=(False, True, True, False))
|
114 |
+
|
115 |
+
image_out = gr.Image(height=512)
|
116 |
+
error_output = gr.Markdown()
|
117 |
+
|
118 |
+
with gr.Column(scale=45):
|
119 |
+
with gr.Tab("Options"):
|
120 |
+
with gr.Group():
|
121 |
+
neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")
|
122 |
+
auto_prefix = gr.Checkbox(label="Prefix styling tokens automatically ()", value=True)
|
123 |
+
|
124 |
+
with gr.Row():
|
125 |
+
guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
|
126 |
+
steps = gr.Slider(label="Steps", value=25, minimum=2, maximum=75, step=1)
|
127 |
+
|
128 |
+
with gr.Row():
|
129 |
+
width = gr.Slider(label="Width", value=512, minimum=64, maximum=1024, step=8)
|
130 |
+
height = gr.Slider(label="Height", value=512, minimum=64, maximum=1024, step=8)
|
131 |
+
|
132 |
+
seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)
|
133 |
+
|
134 |
+
with gr.Tab("Image to image"):
|
135 |
+
with gr.Group():
|
136 |
+
image = gr.Image(label="Image", height=256, tool="editor", type="pil")
|
137 |
+
strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)
|
138 |
+
|
139 |
+
auto_prefix.change(lambda x: gr.update(placeholder=f"{prefix} [your prompt]" if x else "[Your prompt]"), inputs=auto_prefix, outputs=prompt, queue=False)
|
140 |
+
|
141 |
+
inputs = [prompt, guidance, steps, width, height, seed, image, strength, neg_prompt, auto_prefix]
|
142 |
+
outputs = [image_out, error_output]
|
143 |
+
prompt.submit(inference, inputs=inputs, outputs=outputs)
|
144 |
+
generate.click(inference, inputs=inputs, outputs=outputs)
|
145 |
+
|
146 |
+
gr.HTML("""
|
147 |
+
<div style="border-top: 1px solid #303030;">
|
148 |
+
<br>
|
149 |
+
<p>This space was created using <a href="https://huggingface.co/spaces/anzorq/sd-space-creator">SD Space Creator</a>.</p>
|
150 |
+
</div>
|
151 |
+
""")
|
152 |
+
|
153 |
+
demo.queue(concurrency_count=1)
|
154 |
+
demo.launch()
|
feature_extractor/preprocessor_config.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": 224,
|
3 |
+
"do_center_crop": true,
|
4 |
+
"do_convert_rgb": true,
|
5 |
+
"do_normalize": true,
|
6 |
+
"do_resize": true,
|
7 |
+
"feature_extractor_type": "CLIPFeatureExtractor",
|
8 |
+
"image_mean": [
|
9 |
+
0.48145466,
|
10 |
+
0.4578275,
|
11 |
+
0.40821073
|
12 |
+
],
|
13 |
+
"image_std": [
|
14 |
+
0.26862954,
|
15 |
+
0.26130258,
|
16 |
+
0.27577711
|
17 |
+
],
|
18 |
+
"resample": 3,
|
19 |
+
"size": 224
|
20 |
+
}
|
model_index.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "StableDiffusionPipeline",
|
3 |
+
"_diffusers_version": "0.7.2",
|
4 |
+
"feature_extractor": [
|
5 |
+
"transformers",
|
6 |
+
"CLIPFeatureExtractor"
|
7 |
+
],
|
8 |
+
"safety_checker": [
|
9 |
+
"stable_diffusion",
|
10 |
+
"StableDiffusionSafetyChecker"
|
11 |
+
],
|
12 |
+
"scheduler": [
|
13 |
+
"diffusers",
|
14 |
+
"PNDMScheduler"
|
15 |
+
],
|
16 |
+
"text_encoder": [
|
17 |
+
"transformers",
|
18 |
+
"CLIPTextModel"
|
19 |
+
],
|
20 |
+
"tokenizer": [
|
21 |
+
"transformers",
|
22 |
+
"CLIPTokenizer"
|
23 |
+
],
|
24 |
+
"unet": [
|
25 |
+
"diffusers",
|
26 |
+
"UNet2DConditionModel"
|
27 |
+
],
|
28 |
+
"vae": [
|
29 |
+
"diffusers",
|
30 |
+
"AutoencoderKL"
|
31 |
+
]
|
32 |
+
}
|
pipeline.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class PreTrainedPipeline():
|
2 |
+
def __init__(self, path=""):
|
3 |
+
# IMPLEMENT_THIS
|
4 |
+
# Preload all the elements you are going to need at inference.
|
5 |
+
# For instance your model, processors, tokenizer that might be needed.
|
6 |
+
# This function is only called once, so do all the heavy processing I/O here"""
|
7 |
+
raise NotImplementedError(
|
8 |
+
"Please implement PreTrainedPipeline __init__ function"
|
9 |
+
)
|
10 |
+
|
11 |
+
def __call__(self, inputs: str):
|
12 |
+
"""
|
13 |
+
Args:
|
14 |
+
inputs (:obj:`str`):
|
15 |
+
a string containing some text
|
16 |
+
Return:
|
17 |
+
A :obj:`PIL.Image` with the raw image representation as PIL.
|
18 |
+
"""
|
19 |
+
# IMPLEMENT_THIS
|
20 |
+
raise NotImplementedError(
|
21 |
+
"Please implement PreTrainedPipeline __call__ function"
|
22 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--extra-index-url https://download.pytorch.org/whl/cu113
|
2 |
+
torch
|
3 |
+
diffusers
|
4 |
+
transformers
|
5 |
+
accelerate
|
6 |
+
ftfy
|
safety_checker/config.json
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_commit_hash": "4bb648a606ef040e7685bde262611766a5fdd67b",
|
3 |
+
"_name_or_path": "CompVis/stable-diffusion-safety-checker",
|
4 |
+
"architectures": [
|
5 |
+
"StableDiffusionSafetyChecker"
|
6 |
+
],
|
7 |
+
"initializer_factor": 1.0,
|
8 |
+
"logit_scale_init_value": 2.6592,
|
9 |
+
"model_type": "clip",
|
10 |
+
"projection_dim": 768,
|
11 |
+
"text_config": {
|
12 |
+
"_name_or_path": "",
|
13 |
+
"add_cross_attention": false,
|
14 |
+
"architectures": null,
|
15 |
+
"attention_dropout": 0.0,
|
16 |
+
"bad_words_ids": null,
|
17 |
+
"begin_suppress_tokens": null,
|
18 |
+
"bos_token_id": 0,
|
19 |
+
"chunk_size_feed_forward": 0,
|
20 |
+
"cross_attention_hidden_size": null,
|
21 |
+
"decoder_start_token_id": null,
|
22 |
+
"diversity_penalty": 0.0,
|
23 |
+
"do_sample": false,
|
24 |
+
"dropout": 0.0,
|
25 |
+
"early_stopping": false,
|
26 |
+
"encoder_no_repeat_ngram_size": 0,
|
27 |
+
"eos_token_id": 2,
|
28 |
+
"exponential_decay_length_penalty": null,
|
29 |
+
"finetuning_task": null,
|
30 |
+
"forced_bos_token_id": null,
|
31 |
+
"forced_eos_token_id": null,
|
32 |
+
"hidden_act": "quick_gelu",
|
33 |
+
"hidden_size": 768,
|
34 |
+
"id2label": {
|
35 |
+
"0": "LABEL_0",
|
36 |
+
"1": "LABEL_1"
|
37 |
+
},
|
38 |
+
"initializer_factor": 1.0,
|
39 |
+
"initializer_range": 0.02,
|
40 |
+
"intermediate_size": 3072,
|
41 |
+
"is_decoder": false,
|
42 |
+
"is_encoder_decoder": false,
|
43 |
+
"label2id": {
|
44 |
+
"LABEL_0": 0,
|
45 |
+
"LABEL_1": 1
|
46 |
+
},
|
47 |
+
"layer_norm_eps": 1e-05,
|
48 |
+
"length_penalty": 1.0,
|
49 |
+
"max_length": 20,
|
50 |
+
"max_position_embeddings": 77,
|
51 |
+
"min_length": 0,
|
52 |
+
"model_type": "clip_text_model",
|
53 |
+
"no_repeat_ngram_size": 0,
|
54 |
+
"num_attention_heads": 12,
|
55 |
+
"num_beam_groups": 1,
|
56 |
+
"num_beams": 1,
|
57 |
+
"num_hidden_layers": 12,
|
58 |
+
"num_return_sequences": 1,
|
59 |
+
"output_attentions": false,
|
60 |
+
"output_hidden_states": false,
|
61 |
+
"output_scores": false,
|
62 |
+
"pad_token_id": 1,
|
63 |
+
"prefix": null,
|
64 |
+
"problem_type": null,
|
65 |
+
"pruned_heads": {},
|
66 |
+
"remove_invalid_values": false,
|
67 |
+
"repetition_penalty": 1.0,
|
68 |
+
"return_dict": true,
|
69 |
+
"return_dict_in_generate": false,
|
70 |
+
"sep_token_id": null,
|
71 |
+
"suppress_tokens": null,
|
72 |
+
"task_specific_params": null,
|
73 |
+
"temperature": 1.0,
|
74 |
+
"tf_legacy_loss": false,
|
75 |
+
"tie_encoder_decoder": false,
|
76 |
+
"tie_word_embeddings": true,
|
77 |
+
"tokenizer_class": null,
|
78 |
+
"top_k": 50,
|
79 |
+
"top_p": 1.0,
|
80 |
+
"torch_dtype": null,
|
81 |
+
"torchscript": false,
|
82 |
+
"transformers_version": "4.24.0",
|
83 |
+
"typical_p": 1.0,
|
84 |
+
"use_bfloat16": false,
|
85 |
+
"vocab_size": 49408
|
86 |
+
},
|
87 |
+
"text_config_dict": {
|
88 |
+
"hidden_size": 768,
|
89 |
+
"intermediate_size": 3072,
|
90 |
+
"num_attention_heads": 12,
|
91 |
+
"num_hidden_layers": 12
|
92 |
+
},
|
93 |
+
"torch_dtype": "float32",
|
94 |
+
"transformers_version": null,
|
95 |
+
"vision_config": {
|
96 |
+
"_name_or_path": "",
|
97 |
+
"add_cross_attention": false,
|
98 |
+
"architectures": null,
|
99 |
+
"attention_dropout": 0.0,
|
100 |
+
"bad_words_ids": null,
|
101 |
+
"begin_suppress_tokens": null,
|
102 |
+
"bos_token_id": null,
|
103 |
+
"chunk_size_feed_forward": 0,
|
104 |
+
"cross_attention_hidden_size": null,
|
105 |
+
"decoder_start_token_id": null,
|
106 |
+
"diversity_penalty": 0.0,
|
107 |
+
"do_sample": false,
|
108 |
+
"dropout": 0.0,
|
109 |
+
"early_stopping": false,
|
110 |
+
"encoder_no_repeat_ngram_size": 0,
|
111 |
+
"eos_token_id": null,
|
112 |
+
"exponential_decay_length_penalty": null,
|
113 |
+
"finetuning_task": null,
|
114 |
+
"forced_bos_token_id": null,
|
115 |
+
"forced_eos_token_id": null,
|
116 |
+
"hidden_act": "quick_gelu",
|
117 |
+
"hidden_size": 1024,
|
118 |
+
"id2label": {
|
119 |
+
"0": "LABEL_0",
|
120 |
+
"1": "LABEL_1"
|
121 |
+
},
|
122 |
+
"image_size": 224,
|
123 |
+
"initializer_factor": 1.0,
|
124 |
+
"initializer_range": 0.02,
|
125 |
+
"intermediate_size": 4096,
|
126 |
+
"is_decoder": false,
|
127 |
+
"is_encoder_decoder": false,
|
128 |
+
"label2id": {
|
129 |
+
"LABEL_0": 0,
|
130 |
+
"LABEL_1": 1
|
131 |
+
},
|
132 |
+
"layer_norm_eps": 1e-05,
|
133 |
+
"length_penalty": 1.0,
|
134 |
+
"max_length": 20,
|
135 |
+
"min_length": 0,
|
136 |
+
"model_type": "clip_vision_model",
|
137 |
+
"no_repeat_ngram_size": 0,
|
138 |
+
"num_attention_heads": 16,
|
139 |
+
"num_beam_groups": 1,
|
140 |
+
"num_beams": 1,
|
141 |
+
"num_channels": 3,
|
142 |
+
"num_hidden_layers": 24,
|
143 |
+
"num_return_sequences": 1,
|
144 |
+
"output_attentions": false,
|
145 |
+
"output_hidden_states": false,
|
146 |
+
"output_scores": false,
|
147 |
+
"pad_token_id": null,
|
148 |
+
"patch_size": 14,
|
149 |
+
"prefix": null,
|
150 |
+
"problem_type": null,
|
151 |
+
"pruned_heads": {},
|
152 |
+
"remove_invalid_values": false,
|
153 |
+
"repetition_penalty": 1.0,
|
154 |
+
"return_dict": true,
|
155 |
+
"return_dict_in_generate": false,
|
156 |
+
"sep_token_id": null,
|
157 |
+
"suppress_tokens": null,
|
158 |
+
"task_specific_params": null,
|
159 |
+
"temperature": 1.0,
|
160 |
+
"tf_legacy_loss": false,
|
161 |
+
"tie_encoder_decoder": false,
|
162 |
+
"tie_word_embeddings": true,
|
163 |
+
"tokenizer_class": null,
|
164 |
+
"top_k": 50,
|
165 |
+
"top_p": 1.0,
|
166 |
+
"torch_dtype": null,
|
167 |
+
"torchscript": false,
|
168 |
+
"transformers_version": "4.24.0",
|
169 |
+
"typical_p": 1.0,
|
170 |
+
"use_bfloat16": false
|
171 |
+
},
|
172 |
+
"vision_config_dict": {
|
173 |
+
"hidden_size": 1024,
|
174 |
+
"intermediate_size": 4096,
|
175 |
+
"num_attention_heads": 16,
|
176 |
+
"num_hidden_layers": 24,
|
177 |
+
"patch_size": 14
|
178 |
+
}
|
179 |
+
}
|
safety_checker/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16d28f2b37109f222cdc33620fdd262102ac32112be0352a7f77e9614b35a394
|
3 |
+
size 1216064769
|
scheduler/scheduler_config.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "PNDMScheduler",
|
3 |
+
"_diffusers_version": "0.7.2",
|
4 |
+
"beta_end": 0.012,
|
5 |
+
"beta_schedule": "scaled_linear",
|
6 |
+
"beta_start": 0.00085,
|
7 |
+
"clip_sample": false,
|
8 |
+
"num_train_timesteps": 1000,
|
9 |
+
"set_alpha_to_one": false,
|
10 |
+
"skip_prk_steps": true,
|
11 |
+
"steps_offset": 1,
|
12 |
+
"trained_betas": null
|
13 |
+
}
|
text_encoder/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "openai/clip-vit-large-patch14",
|
3 |
+
"architectures": [
|
4 |
+
"CLIPTextModel"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "quick_gelu",
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_factor": 1.0,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 3072,
|
15 |
+
"layer_norm_eps": 1e-05,
|
16 |
+
"max_position_embeddings": 77,
|
17 |
+
"model_type": "clip_text_model",
|
18 |
+
"num_attention_heads": 12,
|
19 |
+
"num_hidden_layers": 12,
|
20 |
+
"pad_token_id": 1,
|
21 |
+
"projection_dim": 768,
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.24.0",
|
24 |
+
"vocab_size": 49408
|
25 |
+
}
|
text_encoder/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83b9e059a2af92f8bdff755be76dcaa30b7473346541aadee05707a601cc616e
|
3 |
+
size 492307041
|
tokenizer/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|startoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|endoftext|>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<|endoftext|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": true,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
tokenizer/tokenizer_config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": {
|
4 |
+
"__type": "AddedToken",
|
5 |
+
"content": "<|startoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false
|
10 |
+
},
|
11 |
+
"do_lower_case": true,
|
12 |
+
"eos_token": {
|
13 |
+
"__type": "AddedToken",
|
14 |
+
"content": "<|endoftext|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false
|
19 |
+
},
|
20 |
+
"errors": "replace",
|
21 |
+
"model_max_length": 77,
|
22 |
+
"name_or_path": "openai/clip-vit-large-patch14",
|
23 |
+
"pad_token": "<|endoftext|>",
|
24 |
+
"special_tokens_map_file": "./special_tokens_map.json",
|
25 |
+
"tokenizer_class": "CLIPTokenizer",
|
26 |
+
"unk_token": {
|
27 |
+
"__type": "AddedToken",
|
28 |
+
"content": "<|endoftext|>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false
|
33 |
+
}
|
34 |
+
}
|
tokenizer/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
unet/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.7.2",
|
4 |
+
"act_fn": "silu",
|
5 |
+
"attention_head_dim": 8,
|
6 |
+
"block_out_channels": [
|
7 |
+
320,
|
8 |
+
640,
|
9 |
+
1280,
|
10 |
+
1280
|
11 |
+
],
|
12 |
+
"center_input_sample": false,
|
13 |
+
"cross_attention_dim": 768,
|
14 |
+
"down_block_types": [
|
15 |
+
"CrossAttnDownBlock2D",
|
16 |
+
"CrossAttnDownBlock2D",
|
17 |
+
"CrossAttnDownBlock2D",
|
18 |
+
"DownBlock2D"
|
19 |
+
],
|
20 |
+
"downsample_padding": 1,
|
21 |
+
"flip_sin_to_cos": true,
|
22 |
+
"freq_shift": 0,
|
23 |
+
"in_channels": 4,
|
24 |
+
"layers_per_block": 2,
|
25 |
+
"mid_block_scale_factor": 1,
|
26 |
+
"norm_eps": 1e-05,
|
27 |
+
"norm_num_groups": 32,
|
28 |
+
"out_channels": 4,
|
29 |
+
"sample_size": 64,
|
30 |
+
"up_block_types": [
|
31 |
+
"UpBlock2D",
|
32 |
+
"CrossAttnUpBlock2D",
|
33 |
+
"CrossAttnUpBlock2D",
|
34 |
+
"CrossAttnUpBlock2D"
|
35 |
+
]
|
36 |
+
}
|
unet/diffusion_pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9579b198d3f04fc72f2b0423f72f581848abf37fb620da0ea5eee214ddfc0b5
|
3 |
+
size 3438366373
|
vae/config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "AutoencoderKL",
|
3 |
+
"_diffusers_version": "0.7.2",
|
4 |
+
"act_fn": "silu",
|
5 |
+
"block_out_channels": [
|
6 |
+
128,
|
7 |
+
256,
|
8 |
+
512,
|
9 |
+
512
|
10 |
+
],
|
11 |
+
"down_block_types": [
|
12 |
+
"DownEncoderBlock2D",
|
13 |
+
"DownEncoderBlock2D",
|
14 |
+
"DownEncoderBlock2D",
|
15 |
+
"DownEncoderBlock2D"
|
16 |
+
],
|
17 |
+
"in_channels": 3,
|
18 |
+
"latent_channels": 4,
|
19 |
+
"layers_per_block": 2,
|
20 |
+
"norm_num_groups": 32,
|
21 |
+
"out_channels": 3,
|
22 |
+
"sample_size": 256,
|
23 |
+
"up_block_types": [
|
24 |
+
"UpDecoderBlock2D",
|
25 |
+
"UpDecoderBlock2D",
|
26 |
+
"UpDecoderBlock2D",
|
27 |
+
"UpDecoderBlock2D"
|
28 |
+
]
|
29 |
+
}
|
vae/diffusion_pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b35db0323c11c073d28be039e178331d36efe1fd40f4a9d2dd68b74b62040b4
|
3 |
+
size 334711857
|