minathor commited on
Commit
d4f2a59
·
1 Parent(s): afd06ec

Upload 20 files

Browse files
README.md CHANGED
@@ -1,3 +1,59 @@
1
  ---
2
- license: openrail
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Hassanblend1.5
3
+ emoji: 📚
4
+ colorFrom: green
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.11.0
8
+ app_file: app.py
9
+ pinned: false
10
+ thumbnail: https://i.imgur.com/PVThZvk.png
11
+ license: creativeml-openrail-m
12
+ tags:
13
+ - text-to-image
14
+ inference: true
15
  ---
16
+
17
+
18
+ # HassanBlend1.5
19
+ I am hassan, I created HassansBlend, the latest version currently is 1.5. I continue to iterate and improve on this model over time. Feel free to check out our discord or rentry page for more examples with prompts and outputs generated.
20
+
21
+ This blend is finetuned over SD1.5 with thousands of images included in the dataset it was trained with. Along with that there are some minor merges added in just to soften it up and increase the creativity.
22
+ I have also some custom created content such as enhancement hypernetworks/embeddings etc for patreons or KoFi subscribers only on my pages below
23
+ <b> Links </b><br>
24
+ <b>Patreon</b>
25
+ <a href="https://www.patreon.com/sd_hassan" target="_blank"><img src="https://i.imgur.com/sR32SqJ.jpg"></img></a>
26
+ <b>KoFi</b>
27
+ <a href="https://ko-fi.com/sdhassan" target="_blank"><img src="https://i.imgur.com/0P7CTN4.png"></img></a>
28
+ <b>Discord</b>
29
+ <a href="https://discord.gg/sdmodelers" target="_blank"><img src="https://i.imgur.com/HC1iHwg.png"></img></a>
30
+ ### Quicklinks:
31
+
32
+ * [Latest Setup](https://rentry.org/sdhassan#current-setup)
33
+ * [HassanBlend Model Finetune Updates](https://rentry.org/sdhassan#hassanblend-finetuning-updates)
34
+ * [Latest Patreon Posts](https://rentry.org/sdhassan#patreon-posts)
35
+ * [Models](https://rentry.org/sdhassan#models)
36
+ * [HassanBlend1.5](https://rentry.org/sdhassan#hassanblend15-downloads)
37
+ * [HassanBlend1.4](https://rentry.org/sdhassan#hassanblend14-downloads)
38
+ * [Prompts](https://rentry.org/sdhassan#prompts)
39
+ * [Photorealistic Tips](https://rentry.org/sdhassan#tips-for-photorealistic-images)
40
+ * [Embeddings](https://rentry.org/sdhassan#embeddings)
41
+ * [Hypernetworks](https://rentry.org/sdhassan#hypernetworks)
42
+ * [Wildcards](https://rentry.org/sdhassan#wildcards-i-made)
43
+ * [MyTools](https://rentry.org/sdhassan#my-tools)
44
+ * [Settings I use](https://rentry.org/sdhassan#settings)
45
+
46
+
47
+
48
+ Model details and examples with sample prompts: https://rentry.org/sdhassan
49
+
50
+
51
+ ## License
52
+
53
+ This model is open access and available to all, with a CreativeML OpenRAIL-M license further specifying rights and usage.
54
+ The CreativeML OpenRAIL License specifies:
55
+
56
+ 1. You can't use the model to deliberately produce nor share illegal or harmful outputs or content
57
+ 2. The authors claims no rights on the outputs you generate, you are free to use them and are accountable for their use which must not go against the provisions set in the license
58
+ 3. You may re-distribute the weights and use the model commercially and/or as a service. If you do, please be aware you have to include the same use restrictions as the ones in the license and share a copy of the CreativeML OpenRAIL-M to all your users (please read the license entirely and carefully)
59
+ [Please read the full license here](https://huggingface.co/spaces/CompVis/stable-diffusion-license)
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, DPMSolverMultistepScheduler
2
+ import gradio as gr
3
+ import torch
4
+ from PIL import Image
5
+
6
+ model_id = 'hassanblend/HassanBlend1.5'
7
+ prefix = ''
8
+
9
+ scheduler = DPMSolverMultistepScheduler(
10
+ beta_start=0.00085,
11
+ beta_end=0.012,
12
+ beta_schedule="scaled_linear",
13
+ num_train_timesteps=1000,
14
+ trained_betas=None,
15
+ predict_epsilon=True,
16
+ thresholding=False,
17
+ algorithm_type="dpmsolver++",
18
+ solver_type="midpoint",
19
+ lower_order_final=True,
20
+ )
21
+
22
+ pipe = StableDiffusionPipeline.from_pretrained(
23
+ model_id,
24
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
25
+ scheduler=scheduler)
26
+
27
+ pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(
28
+ model_id,
29
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
30
+ scheduler=scheduler)
31
+
32
+ if torch.cuda.is_available():
33
+ pipe = pipe.to("cuda")
34
+ pipe_i2i = pipe_i2i.to("cuda")
35
+
36
+ def error_str(error, title="Error"):
37
+ return f"""#### {title}
38
+ {error}""" if error else ""
39
+
40
+ def inference(prompt, guidance, steps, width=512, height=512, seed=0, img=None, strength=0.5, neg_prompt="", auto_prefix=True):
41
+
42
+ generator = torch.Generator('cuda').manual_seed(seed) if seed != 0 else None
43
+ prompt = f"{prefix} {prompt}" if auto_prefix else prompt
44
+
45
+ try:
46
+ if img is not None:
47
+ return img_to_img(prompt, neg_prompt, img, strength, guidance, steps, width, height, generator), None
48
+ else:
49
+ return txt_to_img(prompt, neg_prompt, guidance, steps, width, height, generator), None
50
+ except Exception as e:
51
+ return None, error_str(e)
52
+
53
+ def txt_to_img(prompt, neg_prompt, guidance, steps, width, height, generator):
54
+
55
+ result = pipe(
56
+ prompt,
57
+ negative_prompt = neg_prompt,
58
+ num_inference_steps = int(steps),
59
+ guidance_scale = guidance,
60
+ width = width,
61
+ height = height,
62
+ generator = generator)
63
+
64
+ return replace_nsfw_images(result)
65
+
66
+ def img_to_img(prompt, neg_prompt, img, strength, guidance, steps, width, height, generator):
67
+
68
+ ratio = min(height / img.height, width / img.width)
69
+ img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
70
+ result = pipe_i2i(
71
+ prompt,
72
+ negative_prompt = neg_prompt,
73
+ init_image = img,
74
+ num_inference_steps = int(steps),
75
+ strength = strength,
76
+ guidance_scale = guidance,
77
+ width = width,
78
+ height = height,
79
+ generator = generator)
80
+
81
+ return replace_nsfw_images(result)
82
+
83
+ def replace_nsfw_images(results):
84
+
85
+ for i in range(len(results.images)):
86
+ if results.nsfw_content_detected[i]:
87
+ results.images[i] = Image.open("nsfw.png")
88
+ return results.images[0]
89
+
90
+ css = """.main-div div{display:inline-flex;align-items:center;gap:.8rem;font-size:1.75rem}.main-div div h1{font-weight:900;margin-bottom:7px}.main-div p{margin-bottom:10px;font-size:94%}a{text-decoration:underline}.tabs{margin-top:0;margin-bottom:0}#gallery{min-height:20rem}
91
+ """
92
+ with gr.Blocks(css=css) as demo:
93
+ gr.HTML(
94
+ f"""
95
+ <div class="main-div">
96
+ <div>
97
+ <h1>Hassanblend1.5</h1>
98
+ </div>
99
+ <p>
100
+ Demo for <a href="https://huggingface.co/hassanblend/HassanBlend1.5">Hassanblend1.5</a> Stable Diffusion model.<br>
101
+ Add the following tokens to your prompts for the model to work properly: <b></b>.
102
+ </p>
103
+ Running on <b>{"GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"}</b>
104
+ </div>
105
+ """
106
+ )
107
+ with gr.Row():
108
+
109
+ with gr.Column(scale=55):
110
+ with gr.Group():
111
+ with gr.Row():
112
+ prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder=f"{prefix} [your prompt]").style(container=False)
113
+ generate = gr.Button(value="Generate").style(rounded=(False, True, True, False))
114
+
115
+ image_out = gr.Image(height=512)
116
+ error_output = gr.Markdown()
117
+
118
+ with gr.Column(scale=45):
119
+ with gr.Tab("Options"):
120
+ with gr.Group():
121
+ neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")
122
+ auto_prefix = gr.Checkbox(label="Prefix styling tokens automatically ()", value=True)
123
+
124
+ with gr.Row():
125
+ guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
126
+ steps = gr.Slider(label="Steps", value=25, minimum=2, maximum=75, step=1)
127
+
128
+ with gr.Row():
129
+ width = gr.Slider(label="Width", value=512, minimum=64, maximum=1024, step=8)
130
+ height = gr.Slider(label="Height", value=512, minimum=64, maximum=1024, step=8)
131
+
132
+ seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)
133
+
134
+ with gr.Tab("Image to image"):
135
+ with gr.Group():
136
+ image = gr.Image(label="Image", height=256, tool="editor", type="pil")
137
+ strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)
138
+
139
+ auto_prefix.change(lambda x: gr.update(placeholder=f"{prefix} [your prompt]" if x else "[Your prompt]"), inputs=auto_prefix, outputs=prompt, queue=False)
140
+
141
+ inputs = [prompt, guidance, steps, width, height, seed, image, strength, neg_prompt, auto_prefix]
142
+ outputs = [image_out, error_output]
143
+ prompt.submit(inference, inputs=inputs, outputs=outputs)
144
+ generate.click(inference, inputs=inputs, outputs=outputs)
145
+
146
+ gr.HTML("""
147
+ <div style="border-top: 1px solid #303030;">
148
+ <br>
149
+ <p>This space was created using <a href="https://huggingface.co/spaces/anzorq/sd-space-creator">SD Space Creator</a>.</p>
150
+ </div>
151
+ """)
152
+
153
+ demo.queue(concurrency_count=1)
154
+ demo.launch()
feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": 224,
3
+ "do_center_crop": true,
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_resize": true,
7
+ "feature_extractor_type": "CLIPFeatureExtractor",
8
+ "image_mean": [
9
+ 0.48145466,
10
+ 0.4578275,
11
+ 0.40821073
12
+ ],
13
+ "image_std": [
14
+ 0.26862954,
15
+ 0.26130258,
16
+ 0.27577711
17
+ ],
18
+ "resample": 3,
19
+ "size": 224
20
+ }
model_index.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.7.2",
4
+ "feature_extractor": [
5
+ "transformers",
6
+ "CLIPFeatureExtractor"
7
+ ],
8
+ "safety_checker": [
9
+ "stable_diffusion",
10
+ "StableDiffusionSafetyChecker"
11
+ ],
12
+ "scheduler": [
13
+ "diffusers",
14
+ "PNDMScheduler"
15
+ ],
16
+ "text_encoder": [
17
+ "transformers",
18
+ "CLIPTextModel"
19
+ ],
20
+ "tokenizer": [
21
+ "transformers",
22
+ "CLIPTokenizer"
23
+ ],
24
+ "unet": [
25
+ "diffusers",
26
+ "UNet2DConditionModel"
27
+ ],
28
+ "vae": [
29
+ "diffusers",
30
+ "AutoencoderKL"
31
+ ]
32
+ }
pipeline.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class PreTrainedPipeline():
2
+ def __init__(self, path=""):
3
+ # IMPLEMENT_THIS
4
+ # Preload all the elements you are going to need at inference.
5
+ # For instance your model, processors, tokenizer that might be needed.
6
+ # This function is only called once, so do all the heavy processing I/O here"""
7
+ raise NotImplementedError(
8
+ "Please implement PreTrainedPipeline __init__ function"
9
+ )
10
+
11
+ def __call__(self, inputs: str):
12
+ """
13
+ Args:
14
+ inputs (:obj:`str`):
15
+ a string containing some text
16
+ Return:
17
+ A :obj:`PIL.Image` with the raw image representation as PIL.
18
+ """
19
+ # IMPLEMENT_THIS
20
+ raise NotImplementedError(
21
+ "Please implement PreTrainedPipeline __call__ function"
22
+ )
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu113
2
+ torch
3
+ diffusers
4
+ transformers
5
+ accelerate
6
+ ftfy
safety_checker/config.json ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": "4bb648a606ef040e7685bde262611766a5fdd67b",
3
+ "_name_or_path": "CompVis/stable-diffusion-safety-checker",
4
+ "architectures": [
5
+ "StableDiffusionSafetyChecker"
6
+ ],
7
+ "initializer_factor": 1.0,
8
+ "logit_scale_init_value": 2.6592,
9
+ "model_type": "clip",
10
+ "projection_dim": 768,
11
+ "text_config": {
12
+ "_name_or_path": "",
13
+ "add_cross_attention": false,
14
+ "architectures": null,
15
+ "attention_dropout": 0.0,
16
+ "bad_words_ids": null,
17
+ "begin_suppress_tokens": null,
18
+ "bos_token_id": 0,
19
+ "chunk_size_feed_forward": 0,
20
+ "cross_attention_hidden_size": null,
21
+ "decoder_start_token_id": null,
22
+ "diversity_penalty": 0.0,
23
+ "do_sample": false,
24
+ "dropout": 0.0,
25
+ "early_stopping": false,
26
+ "encoder_no_repeat_ngram_size": 0,
27
+ "eos_token_id": 2,
28
+ "exponential_decay_length_penalty": null,
29
+ "finetuning_task": null,
30
+ "forced_bos_token_id": null,
31
+ "forced_eos_token_id": null,
32
+ "hidden_act": "quick_gelu",
33
+ "hidden_size": 768,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1"
37
+ },
38
+ "initializer_factor": 1.0,
39
+ "initializer_range": 0.02,
40
+ "intermediate_size": 3072,
41
+ "is_decoder": false,
42
+ "is_encoder_decoder": false,
43
+ "label2id": {
44
+ "LABEL_0": 0,
45
+ "LABEL_1": 1
46
+ },
47
+ "layer_norm_eps": 1e-05,
48
+ "length_penalty": 1.0,
49
+ "max_length": 20,
50
+ "max_position_embeddings": 77,
51
+ "min_length": 0,
52
+ "model_type": "clip_text_model",
53
+ "no_repeat_ngram_size": 0,
54
+ "num_attention_heads": 12,
55
+ "num_beam_groups": 1,
56
+ "num_beams": 1,
57
+ "num_hidden_layers": 12,
58
+ "num_return_sequences": 1,
59
+ "output_attentions": false,
60
+ "output_hidden_states": false,
61
+ "output_scores": false,
62
+ "pad_token_id": 1,
63
+ "prefix": null,
64
+ "problem_type": null,
65
+ "pruned_heads": {},
66
+ "remove_invalid_values": false,
67
+ "repetition_penalty": 1.0,
68
+ "return_dict": true,
69
+ "return_dict_in_generate": false,
70
+ "sep_token_id": null,
71
+ "suppress_tokens": null,
72
+ "task_specific_params": null,
73
+ "temperature": 1.0,
74
+ "tf_legacy_loss": false,
75
+ "tie_encoder_decoder": false,
76
+ "tie_word_embeddings": true,
77
+ "tokenizer_class": null,
78
+ "top_k": 50,
79
+ "top_p": 1.0,
80
+ "torch_dtype": null,
81
+ "torchscript": false,
82
+ "transformers_version": "4.24.0",
83
+ "typical_p": 1.0,
84
+ "use_bfloat16": false,
85
+ "vocab_size": 49408
86
+ },
87
+ "text_config_dict": {
88
+ "hidden_size": 768,
89
+ "intermediate_size": 3072,
90
+ "num_attention_heads": 12,
91
+ "num_hidden_layers": 12
92
+ },
93
+ "torch_dtype": "float32",
94
+ "transformers_version": null,
95
+ "vision_config": {
96
+ "_name_or_path": "",
97
+ "add_cross_attention": false,
98
+ "architectures": null,
99
+ "attention_dropout": 0.0,
100
+ "bad_words_ids": null,
101
+ "begin_suppress_tokens": null,
102
+ "bos_token_id": null,
103
+ "chunk_size_feed_forward": 0,
104
+ "cross_attention_hidden_size": null,
105
+ "decoder_start_token_id": null,
106
+ "diversity_penalty": 0.0,
107
+ "do_sample": false,
108
+ "dropout": 0.0,
109
+ "early_stopping": false,
110
+ "encoder_no_repeat_ngram_size": 0,
111
+ "eos_token_id": null,
112
+ "exponential_decay_length_penalty": null,
113
+ "finetuning_task": null,
114
+ "forced_bos_token_id": null,
115
+ "forced_eos_token_id": null,
116
+ "hidden_act": "quick_gelu",
117
+ "hidden_size": 1024,
118
+ "id2label": {
119
+ "0": "LABEL_0",
120
+ "1": "LABEL_1"
121
+ },
122
+ "image_size": 224,
123
+ "initializer_factor": 1.0,
124
+ "initializer_range": 0.02,
125
+ "intermediate_size": 4096,
126
+ "is_decoder": false,
127
+ "is_encoder_decoder": false,
128
+ "label2id": {
129
+ "LABEL_0": 0,
130
+ "LABEL_1": 1
131
+ },
132
+ "layer_norm_eps": 1e-05,
133
+ "length_penalty": 1.0,
134
+ "max_length": 20,
135
+ "min_length": 0,
136
+ "model_type": "clip_vision_model",
137
+ "no_repeat_ngram_size": 0,
138
+ "num_attention_heads": 16,
139
+ "num_beam_groups": 1,
140
+ "num_beams": 1,
141
+ "num_channels": 3,
142
+ "num_hidden_layers": 24,
143
+ "num_return_sequences": 1,
144
+ "output_attentions": false,
145
+ "output_hidden_states": false,
146
+ "output_scores": false,
147
+ "pad_token_id": null,
148
+ "patch_size": 14,
149
+ "prefix": null,
150
+ "problem_type": null,
151
+ "pruned_heads": {},
152
+ "remove_invalid_values": false,
153
+ "repetition_penalty": 1.0,
154
+ "return_dict": true,
155
+ "return_dict_in_generate": false,
156
+ "sep_token_id": null,
157
+ "suppress_tokens": null,
158
+ "task_specific_params": null,
159
+ "temperature": 1.0,
160
+ "tf_legacy_loss": false,
161
+ "tie_encoder_decoder": false,
162
+ "tie_word_embeddings": true,
163
+ "tokenizer_class": null,
164
+ "top_k": 50,
165
+ "top_p": 1.0,
166
+ "torch_dtype": null,
167
+ "torchscript": false,
168
+ "transformers_version": "4.24.0",
169
+ "typical_p": 1.0,
170
+ "use_bfloat16": false
171
+ },
172
+ "vision_config_dict": {
173
+ "hidden_size": 1024,
174
+ "intermediate_size": 4096,
175
+ "num_attention_heads": 16,
176
+ "num_hidden_layers": 24,
177
+ "patch_size": 14
178
+ }
179
+ }
safety_checker/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16d28f2b37109f222cdc33620fdd262102ac32112be0352a7f77e9614b35a394
3
+ size 1216064769
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "PNDMScheduler",
3
+ "_diffusers_version": "0.7.2",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "num_train_timesteps": 1000,
9
+ "set_alpha_to_one": false,
10
+ "skip_prk_steps": true,
11
+ "steps_offset": 1,
12
+ "trained_betas": null
13
+ }
text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/clip-vit-large-patch14",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 768,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.24.0",
24
+ "vocab_size": 49408
25
+ }
text_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83b9e059a2af92f8bdff755be76dcaa30b7473346541aadee05707a601cc616e
3
+ size 492307041
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "do_lower_case": true,
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 77,
22
+ "name_or_path": "openai/clip-vit-large-patch14",
23
+ "pad_token": "<|endoftext|>",
24
+ "special_tokens_map_file": "./special_tokens_map.json",
25
+ "tokenizer_class": "CLIPTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
unet/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.7.2",
4
+ "act_fn": "silu",
5
+ "attention_head_dim": 8,
6
+ "block_out_channels": [
7
+ 320,
8
+ 640,
9
+ 1280,
10
+ 1280
11
+ ],
12
+ "center_input_sample": false,
13
+ "cross_attention_dim": 768,
14
+ "down_block_types": [
15
+ "CrossAttnDownBlock2D",
16
+ "CrossAttnDownBlock2D",
17
+ "CrossAttnDownBlock2D",
18
+ "DownBlock2D"
19
+ ],
20
+ "downsample_padding": 1,
21
+ "flip_sin_to_cos": true,
22
+ "freq_shift": 0,
23
+ "in_channels": 4,
24
+ "layers_per_block": 2,
25
+ "mid_block_scale_factor": 1,
26
+ "norm_eps": 1e-05,
27
+ "norm_num_groups": 32,
28
+ "out_channels": 4,
29
+ "sample_size": 64,
30
+ "up_block_types": [
31
+ "UpBlock2D",
32
+ "CrossAttnUpBlock2D",
33
+ "CrossAttnUpBlock2D",
34
+ "CrossAttnUpBlock2D"
35
+ ]
36
+ }
unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9579b198d3f04fc72f2b0423f72f581848abf37fb620da0ea5eee214ddfc0b5
3
+ size 3438366373
vae/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.7.2",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 128,
7
+ 256,
8
+ 512,
9
+ 512
10
+ ],
11
+ "down_block_types": [
12
+ "DownEncoderBlock2D",
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D"
16
+ ],
17
+ "in_channels": 3,
18
+ "latent_channels": 4,
19
+ "layers_per_block": 2,
20
+ "norm_num_groups": 32,
21
+ "out_channels": 3,
22
+ "sample_size": 256,
23
+ "up_block_types": [
24
+ "UpDecoderBlock2D",
25
+ "UpDecoderBlock2D",
26
+ "UpDecoderBlock2D",
27
+ "UpDecoderBlock2D"
28
+ ]
29
+ }
vae/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b35db0323c11c073d28be039e178331d36efe1fd40f4a9d2dd68b74b62040b4
3
+ size 334711857