OrlandoHugBot commited on
Commit
f522b19
·
verified ·
1 Parent(s): 8e16806

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -41
README.md CHANGED
@@ -70,60 +70,34 @@ import torch
70
  from PIL import Image
71
  from unipicv2.pipeline_stable_diffusion_3_kontext import StableDiffusion3KontextPipeline
72
  from unipicv2.transformer_sd3_kontext import SD3Transformer2DKontextModel
73
- from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL, BitsAndBytesConfig
74
  from transformers import CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
75
 
76
  # Load model components
77
- pretrained_model_name_or_path = "/mnt/datasets_vlm/chris/hf_ckpt/Unipic2-t2i"
78
- # int4 is recommended for inference:lower VRAM with no quality loss {"int4", "fp16"}
79
- quant = "int4"
80
-
81
- # BitsAndBytes config
82
- bnb4 = BitsAndBytesConfig(
83
- load_in_4bit=True,
84
- bnb_4bit_use_double_quant=True,
85
- bnb_4bit_quant_type="nf4",
86
- bnb_4bit_compute_dtype=torch.bfloat16,
87
- )
88
- bnb8 = BitsAndBytesConfig(load_in_8bit=True)
89
-
90
- if quant == "int4":
91
- transformer = SD3Transformer2DKontextModel.from_pretrained(
92
- pretrained_model_name_or_path, subfolder="transformer",
93
- quantization_config=bnb4, device_map="auto", low_cpu_mem_usage=True
94
- ).cuda()
95
- text_qconf = bnb8
96
- vae_dtype = torch.float16
97
- else: # fp16
98
- transformer = SD3Transformer2DKontextModel.from_pretrained(
99
- pretrained_model_name_or_path, subfolder="transformer",
100
- torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
101
- ).cuda()
102
- text_qconf = None
103
- vae_dtype = torch.float16
104
 
105
  vae = AutoencoderKL.from_pretrained(
106
  pretrained_model_name_or_path, subfolder="vae",
107
- torch_dtype=vae_dtype, device_map="auto", low_cpu_mem_usage=True
108
- )
109
 
110
  # Load text encoders
111
  text_encoder = CLIPTextModelWithProjection.from_pretrained(
112
- pretrained_model_name_or_path, subfolder="text_encoder",
113
- quantization_config=text_qconf, torch_dtype=None, device_map="auto", low_cpu_mem_usage=True
114
- )
115
  tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer")
116
 
117
  text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(
118
- pretrained_model_name_or_path, subfolder="text_encoder_2",
119
- quantization_config=text_qconf, torch_dtype=None, device_map="auto", low_cpu_mem_usage=True
120
- )
121
  tokenizer_2 = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer_2")
122
 
123
  text_encoder_3 = T5EncoderModel.from_pretrained(
124
- pretrained_model_name_or_path, subfolder="text_encoder_3",
125
- quantization_config=text_qconf, torch_dtype=None, device_map="auto", low_cpu_mem_usage=True
126
- )
127
  tokenizer_3 = T5TokenizerFast.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer_3")
128
 
129
  scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
@@ -149,7 +123,6 @@ image = pipeline(
149
  ).images[0]
150
 
151
  image.save("text2image.png")
152
- print(f"Image saved to text2image.png (quant={quant})")
153
 
154
  ```
155
 
@@ -187,7 +160,6 @@ edited_image = pipeline(
187
  ).images[0]
188
 
189
  edited_image.save("edited_img.png")
190
- print(f"Edited Image saved to edited_img.png (quant={quant})")
191
 
192
  ```
193
 
 
70
  from PIL import Image
71
  from unipicv2.pipeline_stable_diffusion_3_kontext import StableDiffusion3KontextPipeline
72
  from unipicv2.transformer_sd3_kontext import SD3Transformer2DKontextModel
73
+ from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL
74
  from transformers import CLIPTextModelWithProjection, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
75
 
76
  # Load model components
77
+ pretrained_model_name_or_path = "Skywork/UniPic2-SD3.5M-Kontext-2B"
78
+
79
+ transformer = SD3Transformer2DKontextModel.from_pretrained(
80
+ pretrained_model_name_or_path, subfolder="transformer", torch_dtype=torch.bfloat16).cuda()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  vae = AutoencoderKL.from_pretrained(
83
  pretrained_model_name_or_path, subfolder="vae",
84
+ torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True
85
+ ).cuda()
86
 
87
  # Load text encoders
88
  text_encoder = CLIPTextModelWithProjection.from_pretrained(
89
+ pretrained_model_name_or_path, subfolder="text_encoder", torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True
90
+ ).cuda()
 
91
  tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer")
92
 
93
  text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(
94
+ pretrained_model_name_or_path, subfolder="text_encoder_2", torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True
95
+ ).cuda()
 
96
  tokenizer_2 = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer_2")
97
 
98
  text_encoder_3 = T5EncoderModel.from_pretrained(
99
+ pretrained_model_name_or_path, subfolder="text_encoder_3", torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True
100
+ ).cuda()
 
101
  tokenizer_3 = T5TokenizerFast.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer_3")
102
 
103
  scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
 
123
  ).images[0]
124
 
125
  image.save("text2image.png")
 
126
 
127
  ```
128
 
 
160
  ).images[0]
161
 
162
  edited_image.save("edited_img.png")
 
163
 
164
  ```
165