OrlandoHugBot commited on
Commit
4afe7a9
·
verified ·
1 Parent(s): 2f5e571

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -7
README.md CHANGED
@@ -81,7 +81,6 @@ from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL,BitsAndByte
81
  pretrained_model_name_or_path = "/path/to/UniPic2-Metaquery-Flash/UniPic2-Metaquery"
82
  vlm_path = "/path/to/UniPic2-Metaquery-Flash/Qwen2.5-VL-7B-Instruct-AWQ"
83
 
84
-
85
  quant = "int4" # {"int4", "fp16"}
86
 
87
  bnb4 = BitsAndBytesConfig(
@@ -93,12 +92,12 @@ bnb4 = BitsAndBytesConfig(
93
 
94
  if quant == "int4":
95
  transformer = SD3Transformer2DKontextModel.from_pretrained(
96
- PRETRAINED_DIR, subfolder="transformer",
97
  quantization_config=bnb4, device_map="auto", low_cpu_mem_usage=True
98
  )
99
  elif quant == "fp16":
100
  transformer = SD3Transformer2DKontextModel.from_pretrained(
101
- PRETRAINED_DIR, subfolder="transformer",
102
  torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
103
  )
104
  else:
@@ -107,7 +106,7 @@ else:
107
 
108
  vae = AutoencoderKL.from_pretrained(
109
  pretrained_model_name_or_path, subfolder="vae",
110
- torch_dtype=torch.bfloat16, device_map="auto", low_cpu_mem_usage=True)
111
 
112
  # Load Qwen2.5-VL model
113
  lmm = Qwen2_5_VLForConditionalGeneration.from_pretrained(
@@ -120,8 +119,9 @@ processor.chat_template = processor.chat_template.replace(
120
  "{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}",
121
  "")
122
 
 
123
  conditioner = StableDiffusion3Conditioner.from_pretrained(
124
- pretrained_model_name_or_path, subfolder="conditioner",device_map="auto", torch_dtype=torch.bfloat16)
125
 
126
  scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
127
 
@@ -167,6 +167,7 @@ image = pipeline(
167
  ).images[0]
168
 
169
  image.save("text2image.png")
 
170
  ```
171
 
172
 
@@ -189,7 +190,7 @@ min_pixels = max_pixels = int(image.height * 28 / 32 * image.width * 28 / 32)
189
  inputs = processor(
190
  text=texts, images=[image]*2,
191
  min_pixels=min_pixels, max_pixels=max_pixels,
192
- videos=None, padding=True, return_tensors="pt")
193
 
194
  # Process with vision understanding
195
  input_ids, attention_mask, pixel_values, image_grid_thw = \
@@ -224,7 +225,9 @@ edited_image = pipeline(
224
  generator=torch.Generator(device=transformer.device).manual_seed(42)
225
  ).images[0]
226
 
227
- edited_image.save("image_editing.png")
 
 
228
  ```
229
 
230
 
 
81
  pretrained_model_name_or_path = "/path/to/UniPic2-Metaquery-Flash/UniPic2-Metaquery"
82
  vlm_path = "/path/to/UniPic2-Metaquery-Flash/Qwen2.5-VL-7B-Instruct-AWQ"
83
 
 
84
  quant = "int4" # {"int4", "fp16"}
85
 
86
  bnb4 = BitsAndBytesConfig(
 
92
 
93
  if quant == "int4":
94
  transformer = SD3Transformer2DKontextModel.from_pretrained(
95
+ pretrained_model_name_or_path, subfolder="transformer",
96
  quantization_config=bnb4, device_map="auto", low_cpu_mem_usage=True
97
  )
98
  elif quant == "fp16":
99
  transformer = SD3Transformer2DKontextModel.from_pretrained(
100
+ pretrained_model_name_or_path, subfolder="transformer",
101
  torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
102
  )
103
  else:
 
106
 
107
  vae = AutoencoderKL.from_pretrained(
108
  pretrained_model_name_or_path, subfolder="vae",
109
+ torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True).cuda()
110
 
111
  # Load Qwen2.5-VL model
112
  lmm = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 
119
  "{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}",
120
  "")
121
 
122
+ # 加上cuda
123
  conditioner = StableDiffusion3Conditioner.from_pretrained(
124
+ pretrained_model_name_or_path, subfolder="conditioner", torch_dtype=torch.float16).cuda()
125
 
126
  scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
127
 
 
167
  ).images[0]
168
 
169
  image.save("text2image.png")
170
+ print(f"Image saved to text2image.png (quant={quant})")
171
  ```
172
 
173
 
 
190
  inputs = processor(
191
  text=texts, images=[image]*2,
192
  min_pixels=min_pixels, max_pixels=max_pixels,
193
+ videos=None, padding=True, return_tensors="pt").cuda()
194
 
195
  # Process with vision understanding
196
  input_ids, attention_mask, pixel_values, image_grid_thw = \
 
225
  generator=torch.Generator(device=transformer.device).manual_seed(42)
226
  ).images[0]
227
 
228
+ edited_image.save("edited_image.png")
229
+ print(f"Image saved to edited_image.png (quant={quant})")
230
+
231
  ```
232
 
233