Update README.md
Browse files
README.md
CHANGED
|
@@ -81,7 +81,6 @@ from diffusers import FlowMatchEulerDiscreteScheduler, AutoencoderKL,BitsAndByte
|
|
| 81 |
pretrained_model_name_or_path = "/path/to/UniPic2-Metaquery-Flash/UniPic2-Metaquery"
|
| 82 |
vlm_path = "/path/to/UniPic2-Metaquery-Flash/Qwen2.5-VL-7B-Instruct-AWQ"
|
| 83 |
|
| 84 |
-
|
| 85 |
quant = "int4" # {"int4", "fp16"}
|
| 86 |
|
| 87 |
bnb4 = BitsAndBytesConfig(
|
|
@@ -93,12 +92,12 @@ bnb4 = BitsAndBytesConfig(
|
|
| 93 |
|
| 94 |
if quant == "int4":
|
| 95 |
transformer = SD3Transformer2DKontextModel.from_pretrained(
|
| 96 |
-
|
| 97 |
quantization_config=bnb4, device_map="auto", low_cpu_mem_usage=True
|
| 98 |
)
|
| 99 |
elif quant == "fp16":
|
| 100 |
transformer = SD3Transformer2DKontextModel.from_pretrained(
|
| 101 |
-
|
| 102 |
torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
|
| 103 |
)
|
| 104 |
else:
|
|
@@ -107,7 +106,7 @@ else:
|
|
| 107 |
|
| 108 |
vae = AutoencoderKL.from_pretrained(
|
| 109 |
pretrained_model_name_or_path, subfolder="vae",
|
| 110 |
-
torch_dtype=torch.
|
| 111 |
|
| 112 |
# Load Qwen2.5-VL model
|
| 113 |
lmm = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
@@ -120,8 +119,9 @@ processor.chat_template = processor.chat_template.replace(
|
|
| 120 |
"{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}",
|
| 121 |
"")
|
| 122 |
|
|
|
|
| 123 |
conditioner = StableDiffusion3Conditioner.from_pretrained(
|
| 124 |
-
pretrained_model_name_or_path, subfolder="conditioner",
|
| 125 |
|
| 126 |
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
|
| 127 |
|
|
@@ -167,6 +167,7 @@ image = pipeline(
|
|
| 167 |
).images[0]
|
| 168 |
|
| 169 |
image.save("text2image.png")
|
|
|
|
| 170 |
```
|
| 171 |
|
| 172 |
|
|
@@ -189,7 +190,7 @@ min_pixels = max_pixels = int(image.height * 28 / 32 * image.width * 28 / 32)
|
|
| 189 |
inputs = processor(
|
| 190 |
text=texts, images=[image]*2,
|
| 191 |
min_pixels=min_pixels, max_pixels=max_pixels,
|
| 192 |
-
videos=None, padding=True, return_tensors="pt")
|
| 193 |
|
| 194 |
# Process with vision understanding
|
| 195 |
input_ids, attention_mask, pixel_values, image_grid_thw = \
|
|
@@ -224,7 +225,9 @@ edited_image = pipeline(
|
|
| 224 |
generator=torch.Generator(device=transformer.device).manual_seed(42)
|
| 225 |
).images[0]
|
| 226 |
|
| 227 |
-
edited_image.save("
|
|
|
|
|
|
|
| 228 |
```
|
| 229 |
|
| 230 |
|
|
|
|
| 81 |
pretrained_model_name_or_path = "/path/to/UniPic2-Metaquery-Flash/UniPic2-Metaquery"
|
| 82 |
vlm_path = "/path/to/UniPic2-Metaquery-Flash/Qwen2.5-VL-7B-Instruct-AWQ"
|
| 83 |
|
|
|
|
| 84 |
quant = "int4" # {"int4", "fp16"}
|
| 85 |
|
| 86 |
bnb4 = BitsAndBytesConfig(
|
|
|
|
| 92 |
|
| 93 |
if quant == "int4":
|
| 94 |
transformer = SD3Transformer2DKontextModel.from_pretrained(
|
| 95 |
+
pretrained_model_name_or_path, subfolder="transformer",
|
| 96 |
quantization_config=bnb4, device_map="auto", low_cpu_mem_usage=True
|
| 97 |
)
|
| 98 |
elif quant == "fp16":
|
| 99 |
transformer = SD3Transformer2DKontextModel.from_pretrained(
|
| 100 |
+
pretrained_model_name_or_path, subfolder="transformer",
|
| 101 |
torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True
|
| 102 |
)
|
| 103 |
else:
|
|
|
|
| 106 |
|
| 107 |
vae = AutoencoderKL.from_pretrained(
|
| 108 |
pretrained_model_name_or_path, subfolder="vae",
|
| 109 |
+
torch_dtype=torch.float16, device_map="auto", low_cpu_mem_usage=True).cuda()
|
| 110 |
|
| 111 |
# Load Qwen2.5-VL model
|
| 112 |
lmm = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
|
|
| 119 |
"{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}",
|
| 120 |
"")
|
| 121 |
|
| 122 |
+
# 加上cuda
|
| 123 |
conditioner = StableDiffusion3Conditioner.from_pretrained(
|
| 124 |
+
pretrained_model_name_or_path, subfolder="conditioner", torch_dtype=torch.float16).cuda()
|
| 125 |
|
| 126 |
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler")
|
| 127 |
|
|
|
|
| 167 |
).images[0]
|
| 168 |
|
| 169 |
image.save("text2image.png")
|
| 170 |
+
print(f"Image saved to text2image.png (quant={quant})")
|
| 171 |
```
|
| 172 |
|
| 173 |
|
|
|
|
| 190 |
inputs = processor(
|
| 191 |
text=texts, images=[image]*2,
|
| 192 |
min_pixels=min_pixels, max_pixels=max_pixels,
|
| 193 |
+
videos=None, padding=True, return_tensors="pt").cuda()
|
| 194 |
|
| 195 |
# Process with vision understanding
|
| 196 |
input_ids, attention_mask, pixel_values, image_grid_thw = \
|
|
|
|
| 225 |
generator=torch.Generator(device=transformer.device).manual_seed(42)
|
| 226 |
).images[0]
|
| 227 |
|
| 228 |
+
edited_image.save("edited_image.png")
|
| 229 |
+
print(f"Image saved to edited_image.png (quant={quant})")
|
| 230 |
+
|
| 231 |
```
|
| 232 |
|
| 233 |
|