Release sd-nano 2.1

Browse files

Files changed (15) hide show

README.md +37 -0
images_0.png +0 -0
images_1.png +0 -0
model_index.json +32 -0
scheduler/scheduler_config.json +14 -0
text_encoder/config.json +25 -0
text_encoder/flax_model.msgpack +3 -0
tokenizer/merges.txt +0 -0
tokenizer/special_tokens_map.json +24 -0
tokenizer/tokenizer_config.json +34 -0
tokenizer/vocab.json +0 -0
unet/config.json +47 -0
unet/diffusion_flax_model.msgpack +3 -0
vae/config.json +30 -0
vae/diffusion_flax_model.msgpack +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,40 @@
 ---
 license: creativeml-openrail-m
 ---

 ---
 license: creativeml-openrail-m
+base_model: stabilityai/stable-diffusion-2-1-base
+tags:
+- stable-diffusion
+- stable-diffusion-diffusers
+- text-to-image
+- diffusers
+- jax-diffusers-event
+inference: true
 ---
+# Stable Diffusion Nano 2.1
+prompt: A watercolor painting of an otter
+![images_0)](./images_0.png)
+prompt: Marvel MCU deadpool, red mask, red shirt, red gloves, black shoulders, black elbow pads, black legs, gold buckle, black belt, black mask, white eyes, black boots, fuji low light color 35mm film, downtown Osaka alley at night out of focus in background, neon lights
+![images_1)](./images_1.png)
+## Training details
+All parameters were initialized from the [stabilityai/stable-diffusion-2-1-base](sd21) model. The unet was fine tuned as follows:
+U-net fine-tuning:
+- 200,000 steps, learning rate = 1e-5, batch size = 992 (248 per TPU).
+- 100,000 steps, SNR gamma = 5.0, learning rate = 1e-5, batch size = 992 (248 per TPU).
+- Trained on [LAION Improved Aesthetics 6plus](laion).
+sd21: https://huggingface.co/stabilityai/stable-diffusion-2-1-base
+laion: https://huggingface.co/datasets/ChristophSchuhmann/improved_aesthetics_6plus
+## License
+This model is open access and available to all, with a CreativeML OpenRAIL-M license further specifying rights and usage. The CreativeML OpenRAIL License specifies:
+- You can't use the model to deliberately produce nor share illegal or harmful outputs or content.
+- The authors claims no rights on the outputs you generate, you are free to use them and are accountable for their use which must not go against the provisions set in the license.
+- You may re-distribute the weights and use the model commercially and/or as a service. If you do, please be aware you have to include the same use restrictions as the ones in the license and share a copy of the CreativeML OpenRAIL-M to all your users (please read the license entirely and carefully) Please read the full license here.

images_0.png ADDED Viewed

images_1.png ADDED Viewed

model_index.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_class_name": "FlaxStableDiffusionPipeline",
+  "_diffusers_version": "0.16.0.dev0",
+  "feature_extractor": [
+    null,
+    null
+  ],
+  "safety_checker": [
+    null,
+    null
+  ],
+  "scheduler": [
+    "diffusers",
+    "FlaxPNDMScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "FlaxCLIPTextModel"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "FlaxUNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "FlaxAutoencoderKL"
+  ]
+}

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "_class_name": "FlaxPNDMScheduler",
+  "_diffusers_version": "0.16.0.dev0",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "num_train_timesteps": 1000,
+  "prediction_type": "epsilon",
+  "set_alpha_to_one": false,
+  "skip_prk_steps": true,
+  "steps_offset": 1,
+  "trained_betas": null
+}

text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "stabilityai/stable-diffusion-2",
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_size": 1024,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 23,
+  "pad_token_id": 1,
+  "projection_dim": 512,
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.1",
+  "vocab_size": 49408
+}

text_encoder/flax_model.msgpack ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60f914bf080600a7d6678c70339679c7997d03faec449d2bd2e5269673b25762
+size 1361564130

tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+    "bos_token": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    "eos_token": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    "pad_token": "!",
+    "unk_token": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    }
+  }

tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+    "add_prefix_space": false,
+    "bos_token": {
+      "__type": "AddedToken",
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    "do_lower_case": true,
+    "eos_token": {
+      "__type": "AddedToken",
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    "errors": "replace",
+    "model_max_length": 77,
+    "name_or_path": "stabilityai/stable-diffusion-2",
+    "pad_token": "<|endoftext|>",
+    "special_tokens_map_file": "./special_tokens_map.json",
+    "tokenizer_class": "CLIPTokenizer",
+    "unk_token": {
+      "__type": "AddedToken",
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    }
+  }

tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

unet/config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_class_name": "FlaxUNet2DConditionModel",
+  "_diffusers_version": "0.16.0.dev0",
+  "act_fn": "silu",
+  "attention_head_dim": [
+    5,
+    10,
+    20,
+    20
+  ],
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 1024,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "dual_cross_attention": false,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "sample_size": 64,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ],
+  "use_linear_projection": true,
+  "use_memory_efficient_attention": false
+}

unet/diffusion_flax_model.msgpack ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f165b465f6925fe41aa1f6578bf17e5f54974d8be63785d53fb66a25da915c84
+size 3463667343

vae/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_class_name": "FlaxAutoencoderKL",
+  "_diffusers_version": "0.16.0.dev0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 768,
+  "scaling_factor": 0.18215,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

vae/diffusion_flax_model.msgpack ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f45517affbc93f169d2f5022c6c7d6c7912477f571e8011ddddeca115001562
+size 334623853