Commit
•
666a605
1
Parent(s):
c24dac7
Final features
Browse files
app.py
CHANGED
@@ -6,14 +6,14 @@ import shutil
|
|
6 |
from train_dreambooth import run_training
|
7 |
from convertosd import convert
|
8 |
from PIL import Image
|
|
|
|
|
9 |
import torch
|
10 |
-
|
11 |
css = '''
|
12 |
.instruction{position: absolute; top: 0;right: 0;margin-top: 0px !important}
|
13 |
.arrow{position: absolute;top: 0;right: -8px;margin-top: -8px !important}
|
14 |
#component-4, #component-3, #component-10{min-height: 0}
|
15 |
'''
|
16 |
-
shutil.unpack_archive("mix.zip", "mix")
|
17 |
model_to_load = "multimodalart/sd-fine-tunable"
|
18 |
maximum_concepts = 3
|
19 |
|
@@ -34,12 +34,13 @@ def swap_text(option):
|
|
34 |
|
35 |
def count_files(*inputs):
|
36 |
file_counter = 0
|
|
|
37 |
for i, input in enumerate(inputs):
|
38 |
if(i < maximum_concepts-1):
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
uses_custom = inputs[-1]
|
44 |
type_of_thing = inputs[-4]
|
45 |
if(uses_custom):
|
@@ -49,9 +50,13 @@ def count_files(*inputs):
|
|
49 |
Training_Steps = file_counter*200*2
|
50 |
else:
|
51 |
Training_Steps = file_counter*200
|
52 |
-
return(gr.update(visible=True, value=f"You are going to train {file_counter}
|
53 |
-
def train(*inputs):
|
54 |
|
|
|
|
|
|
|
|
|
|
|
55 |
if os.path.exists("diffusers_model.zip"): os.remove("diffusers_model.zip")
|
56 |
if os.path.exists("model.ckpt"): os.remove("model.ckpt")
|
57 |
file_counter = 0
|
@@ -61,6 +66,8 @@ def train(*inputs):
|
|
61 |
os.makedirs('instance_images',exist_ok=True)
|
62 |
files = inputs[i+(maximum_concepts*2)]
|
63 |
prompt = inputs[i+maximum_concepts]
|
|
|
|
|
64 |
for j, file_temp in enumerate(files):
|
65 |
file = Image.open(file_temp.name)
|
66 |
width, height = file.size
|
@@ -84,64 +91,16 @@ def train(*inputs):
|
|
84 |
Train_text_encoder_for = int(inputs[-2])
|
85 |
else:
|
86 |
Training_Steps = file_counter*200
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
with_prior_preservation=True,
|
98 |
-
prior_loss_weight=1.0,
|
99 |
-
instance_prompt="",
|
100 |
-
seed=42,
|
101 |
-
resolution=512,
|
102 |
-
mixed_precision="fp16",
|
103 |
-
train_batch_size=1,
|
104 |
-
gradient_accumulation_steps=1,
|
105 |
-
gradient_checkpointing=True,
|
106 |
-
use_8bit_adam=True,
|
107 |
-
learning_rate=2e-6,
|
108 |
-
lr_scheduler="polynomial",
|
109 |
-
lr_warmup_steps=0,
|
110 |
-
max_train_steps=Training_Steps,
|
111 |
-
num_class_images=200
|
112 |
-
)
|
113 |
-
args_unet = argparse.Namespace(
|
114 |
-
image_captions_filename = True,
|
115 |
-
train_only_unet=True,
|
116 |
-
Session_dir="output_model",
|
117 |
-
save_starting_step=0,
|
118 |
-
save_n_steps=0,
|
119 |
-
pretrained_model_name_or_path=model_to_load,
|
120 |
-
instance_data_dir="instance_images",
|
121 |
-
output_dir="output_model",
|
122 |
-
instance_prompt="",
|
123 |
-
seed=42,
|
124 |
-
resolution=512,
|
125 |
-
mixed_precision="fp16",
|
126 |
-
train_batch_size=1,
|
127 |
-
gradient_accumulation_steps=1,
|
128 |
-
gradient_checkpointing=False,
|
129 |
-
use_8bit_adam=True,
|
130 |
-
learning_rate=2e-6,
|
131 |
-
lr_scheduler="polynomial",
|
132 |
-
lr_warmup_steps=0,
|
133 |
-
max_train_steps=Training_Steps
|
134 |
-
)
|
135 |
-
run_training(args_txt_encoder)
|
136 |
-
run_training(args_unet)
|
137 |
-
elif(type_of_thing == "object" or type_of_thing == "style"):
|
138 |
-
if(type_of_thing == "object"):
|
139 |
-
Train_text_encoder_for=30
|
140 |
-
elif(type_of_thing == "style"):
|
141 |
-
Train_text_encoder_for=15
|
142 |
-
class_data_dir = None
|
143 |
-
stptxt = int((Training_Steps*Train_text_encoder_for)/100)
|
144 |
-
args_general = argparse.Namespace(
|
145 |
image_captions_filename = True,
|
146 |
train_text_encoder = True,
|
147 |
stop_text_encoder_training = stptxt,
|
@@ -161,11 +120,11 @@ def train(*inputs):
|
|
161 |
lr_scheduler="polynomial",
|
162 |
lr_warmup_steps = 0,
|
163 |
max_train_steps=Training_Steps,
|
164 |
-
|
165 |
-
|
166 |
torch.cuda.empty_cache()
|
167 |
#convert("output_model", "model.ckpt")
|
168 |
-
shutil.rmtree('instance_images')
|
169 |
shutil.make_archive("diffusers_model", 'zip', "output_model")
|
170 |
torch.cuda.empty_cache()
|
171 |
return [gr.update(visible=True, value=["diffusers_model.zip"]), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)]
|
@@ -178,8 +137,80 @@ def generate(prompt):
|
|
178 |
image = pipe(prompt).images[0]
|
179 |
return(image)
|
180 |
|
181 |
-
def push(
|
182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
def convert_to_ckpt():
|
185 |
convert("output_model", "model.ckpt")
|
@@ -200,7 +231,7 @@ with gr.Blocks(css=css) as demo:
|
|
200 |
gr.HTML('''
|
201 |
<div class="gr-prose" style="max-width: 80%">
|
202 |
<h2>You have successfully cloned the Dreambooth Training Space</h2>
|
203 |
-
<p
|
204 |
</div>
|
205 |
''')
|
206 |
gr.Markdown("# Dreambooth training")
|
@@ -258,32 +289,38 @@ with gr.Blocks(css=css) as demo:
|
|
258 |
|
259 |
|
260 |
|
261 |
-
with gr.Accordion("
|
262 |
-
swap_auto_calculated = gr.Checkbox(label="Use
|
263 |
-
gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style
|
264 |
steps = gr.Number(label="How many steps", value=800)
|
265 |
perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
|
266 |
|
267 |
type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder], queue=False)
|
268 |
training_summary = gr.Textbox("", visible=False, label="Training Summary")
|
|
|
|
|
269 |
for file in file_collection:
|
270 |
-
file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary
|
271 |
train_btn = gr.Button("Start Training")
|
272 |
with gr.Box(visible=False) as try_your_model:
|
273 |
-
gr.Markdown("Try your model")
|
274 |
with gr.Row():
|
275 |
prompt = gr.Textbox(label="Type your prompt")
|
276 |
-
|
277 |
generate_button = gr.Button("Generate Image")
|
278 |
with gr.Box(visible=False) as push_to_hub:
|
279 |
-
gr.Markdown("Push to Hugging Face Hub")
|
280 |
-
|
|
|
|
|
|
|
281 |
push_button = gr.Button("Push to the Hub")
|
282 |
result = gr.File(label="Download the uploaded models in the diffusers format", visible=True)
|
|
|
283 |
convert_button = gr.Button("Convert to CKPT", visible=False)
|
284 |
|
285 |
train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button])
|
286 |
-
generate_button.click(fn=generate, inputs=prompt, outputs=
|
287 |
-
push_button.click(fn=push, inputs=
|
288 |
convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result)
|
289 |
demo.launch()
|
|
|
6 |
from train_dreambooth import run_training
|
7 |
from convertosd import convert
|
8 |
from PIL import Image
|
9 |
+
from slugify import slugify
|
10 |
+
import requests
|
11 |
import torch
|
|
|
12 |
css = '''
|
13 |
.instruction{position: absolute; top: 0;right: 0;margin-top: 0px !important}
|
14 |
.arrow{position: absolute;top: 0;right: -8px;margin-top: -8px !important}
|
15 |
#component-4, #component-3, #component-10{min-height: 0}
|
16 |
'''
|
|
|
17 |
model_to_load = "multimodalart/sd-fine-tunable"
|
18 |
maximum_concepts = 3
|
19 |
|
|
|
34 |
|
35 |
def count_files(*inputs):
|
36 |
file_counter = 0
|
37 |
+
concept_counter = 0
|
38 |
for i, input in enumerate(inputs):
|
39 |
if(i < maximum_concepts-1):
|
40 |
+
files = inputs[i]
|
41 |
+
if(files):
|
42 |
+
concept_counter+=1
|
43 |
+
file_counter+=len(files)
|
44 |
uses_custom = inputs[-1]
|
45 |
type_of_thing = inputs[-4]
|
46 |
if(uses_custom):
|
|
|
50 |
Training_Steps = file_counter*200*2
|
51 |
else:
|
52 |
Training_Steps = file_counter*200
|
53 |
+
return(gr.update(visible=True, value=f"You are going to train {concept_counter} {type_of_thing}(s), with {file_counter} images for {Training_Steps} steps. This should take around {round(Training_Steps/1.5, 2)} seconds, or {round((Training_Steps/1.5)/3600, 2)} hours. As a reminder, the T4 GPU costs US$0.60 for 1h. Once training is over, don't forget to swap the hardware back to CPU."))
|
|
|
54 |
|
55 |
+
def train(*inputs):
|
56 |
+
if "IS_SHARED_UI" in os.environ:
|
57 |
+
raise gr.Error("This Space only works in duplicated instances")
|
58 |
+
if os.path.exists("output_model"): shutil.rmtree('output_model')
|
59 |
+
if os.path.exists("instance_images"): shutil.rmtree('instance_images')
|
60 |
if os.path.exists("diffusers_model.zip"): os.remove("diffusers_model.zip")
|
61 |
if os.path.exists("model.ckpt"): os.remove("model.ckpt")
|
62 |
file_counter = 0
|
|
|
66 |
os.makedirs('instance_images',exist_ok=True)
|
67 |
files = inputs[i+(maximum_concepts*2)]
|
68 |
prompt = inputs[i+maximum_concepts]
|
69 |
+
if(prompt == "" or prompt == None):
|
70 |
+
raise gr.Error("You forgot to define your concept prompt")
|
71 |
for j, file_temp in enumerate(files):
|
72 |
file = Image.open(file_temp.name)
|
73 |
width, height = file.size
|
|
|
91 |
Train_text_encoder_for = int(inputs[-2])
|
92 |
else:
|
93 |
Training_Steps = file_counter*200
|
94 |
+
if(type_of_thing == "object"):
|
95 |
+
Train_text_encoder_for=30
|
96 |
+
elif(type_of_thing == "person"):
|
97 |
+
Train_text_encoder_for=60
|
98 |
+
elif(type_of_thing == "style"):
|
99 |
+
Train_text_encoder_for=15
|
100 |
+
|
101 |
+
class_data_dir = None
|
102 |
+
stptxt = int((Training_Steps*Train_text_encoder_for)/100)
|
103 |
+
args_general = argparse.Namespace(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
image_captions_filename = True,
|
105 |
train_text_encoder = True,
|
106 |
stop_text_encoder_training = stptxt,
|
|
|
120 |
lr_scheduler="polynomial",
|
121 |
lr_warmup_steps = 0,
|
122 |
max_train_steps=Training_Steps,
|
123 |
+
)
|
124 |
+
run_training(args_general)
|
125 |
torch.cuda.empty_cache()
|
126 |
#convert("output_model", "model.ckpt")
|
127 |
+
#shutil.rmtree('instance_images')
|
128 |
shutil.make_archive("diffusers_model", 'zip', "output_model")
|
129 |
torch.cuda.empty_cache()
|
130 |
return [gr.update(visible=True, value=["diffusers_model.zip"]), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)]
|
|
|
137 |
image = pipe(prompt).images[0]
|
138 |
return(image)
|
139 |
|
140 |
+
def push(model_name, where_to_upload, hf_token):
|
141 |
+
if(not os.path.exists("model.ckpt")):
|
142 |
+
convert("output_model", "model.ckpt")
|
143 |
+
from huggingface_hub import HfApi, HfFolder, CommitOperationAdd
|
144 |
+
from huggingface_hub import create_repo
|
145 |
+
model_name_slug = slugify(model_name)
|
146 |
+
if(where_to_upload == "My personal profile"):
|
147 |
+
api = HfApi()
|
148 |
+
your_username = api.whoami(token=hf_token)["name"]
|
149 |
+
model_id = f"{your_username}/{model_name_slug}"
|
150 |
+
else:
|
151 |
+
model_id = f"sd-dreambooth-library/{model_name_slug}"
|
152 |
+
headers = {"Authorization" : f"Bearer: {hf_token}", "Content-Type": "application/json"}
|
153 |
+
response = requests.post("https://example.com/get-my-account-detail", headers=headers)
|
154 |
+
|
155 |
+
images_upload = os.listdir("instance_images")
|
156 |
+
image_string = ""
|
157 |
+
instance_prompt_list = []
|
158 |
+
previous_instance_prompt = ''
|
159 |
+
for i, image in enumerate(images_upload):
|
160 |
+
instance_prompt = image.split("_")[0]
|
161 |
+
if(instance_prompt != previous_instance_prompt):
|
162 |
+
title_instance_prompt_string = instance_prompt
|
163 |
+
instance_prompt_list.append(instance_prompt)
|
164 |
+
else:
|
165 |
+
title_instance_prompt_string = ''
|
166 |
+
previous_instance_prompt = instance_prompt
|
167 |
+
image_string = f'''
|
168 |
+
{title_instance_prompt_string}
|
169 |
+
{image_string}![{instance_prompt} {i}](https://huggingface.co/{model_name_slug}/resolve/main/sample_images/{image})
|
170 |
+
'''
|
171 |
+
readme_text = f'''---
|
172 |
+
license: creativeml-openrail-m
|
173 |
+
tags:
|
174 |
+
- text-to-image
|
175 |
+
---
|
176 |
+
### {model_name} Dreambooth model trained by {api.whoami(token=hf_token)["name"]} with [Hugging Face Dreambooth Training Space](https://huggingface.co/spaces/multimodalart/dreambooth-training)
|
177 |
+
|
178 |
+
You run your new concept via `diffusers` [Colab Notebook for Inference](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/sd_dreambooth_inference.ipynb)
|
179 |
+
|
180 |
+
Sample pictures of this concept:
|
181 |
+
{image_string}
|
182 |
+
'''
|
183 |
+
#Save the readme to a file
|
184 |
+
readme_file = open("README.md", "w")
|
185 |
+
readme_file.write(readme_text)
|
186 |
+
readme_file.close()
|
187 |
+
#Save the token identifier to a file
|
188 |
+
text_file = open("token_identifier.txt", "w")
|
189 |
+
text_file.write(', '.join(instance_prompt_list))
|
190 |
+
text_file.close()
|
191 |
+
operations = [
|
192 |
+
CommitOperationAdd(path_in_repo="token_identifier.txt", path_or_fileobj="token_identifier.txt"),
|
193 |
+
CommitOperationAdd(path_in_repo="README.md", path_or_fileobj="README.md"),
|
194 |
+
CommitOperationAdd(path_in_repo=f"model.ckpt",path_or_fileobj="model.ckpt")
|
195 |
+
]
|
196 |
+
api.create_commit(
|
197 |
+
repo_id=model_id,
|
198 |
+
operations=operations,
|
199 |
+
commit_message=f"Upload the model {model_name}",
|
200 |
+
token=hf_token
|
201 |
+
)
|
202 |
+
api.upload_folder(
|
203 |
+
folder_path="output_model",
|
204 |
+
repo_id=model_id,
|
205 |
+
token=hf_token
|
206 |
+
)
|
207 |
+
api.upload_folder(
|
208 |
+
folder_path="instance_images",
|
209 |
+
path_in_repo="concept_images",
|
210 |
+
repo_id=model_id,
|
211 |
+
token=hf_token
|
212 |
+
)
|
213 |
+
return [gr.update(visible=True, value=f"Successfully uploaded your model. Access it [here](https://huggingface.co/{model_id})"), gr.update(visible=True, value=["diffusers_model.zip", "model.ckpt"])]
|
214 |
|
215 |
def convert_to_ckpt():
|
216 |
convert("output_model", "model.ckpt")
|
|
|
231 |
gr.HTML('''
|
232 |
<div class="gr-prose" style="max-width: 80%">
|
233 |
<h2>You have successfully cloned the Dreambooth Training Space</h2>
|
234 |
+
<p>If you haven't already, attribute a T4 GPU to it (via the Settings tab) and run the training below. You will be billed by the minute between when you activate the GPU until when you turn it off.</p>
|
235 |
</div>
|
236 |
''')
|
237 |
gr.Markdown("# Dreambooth training")
|
|
|
289 |
|
290 |
|
291 |
|
292 |
+
with gr.Accordion("Custom Settings", open=False):
|
293 |
+
swap_auto_calculated = gr.Checkbox(label="Use custom settings")
|
294 |
+
gr.Markdown("If not checked, the number of steps and % of frozen encoder will be tuned automatically according to the amount of images you upload and whether you are training an `object`, `person` or `style` as follows: The number of steps is calculated by number of images uploaded multiplied by 20. The text-encoder is frozen after 10% of the steps for a style, 30% of the steps for an object and is fully trained for persons.")
|
295 |
steps = gr.Number(label="How many steps", value=800)
|
296 |
perc_txt_encoder = gr.Number(label="Percentage of the training steps the text-encoder should be trained as well", value=30)
|
297 |
|
298 |
type_of_thing.change(fn=swap_text, inputs=[type_of_thing], outputs=[thing_description, thing_image_example, things_naming, perc_txt_encoder], queue=False)
|
299 |
training_summary = gr.Textbox("", visible=False, label="Training Summary")
|
300 |
+
steps.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary])
|
301 |
+
perc_txt_encoder.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary])
|
302 |
for file in file_collection:
|
303 |
+
file.change(fn=count_files, inputs=file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[training_summary])
|
304 |
train_btn = gr.Button("Start Training")
|
305 |
with gr.Box(visible=False) as try_your_model:
|
306 |
+
gr.Markdown("## Try your model")
|
307 |
with gr.Row():
|
308 |
prompt = gr.Textbox(label="Type your prompt")
|
309 |
+
result_image = gr.Image()
|
310 |
generate_button = gr.Button("Generate Image")
|
311 |
with gr.Box(visible=False) as push_to_hub:
|
312 |
+
gr.Markdown("## Push to Hugging Face Hub")
|
313 |
+
model_name = gr.Textbox(label="Name of your model", placeholder="Tarsila do Amaral Style")
|
314 |
+
where_to_upload = gr.Dropdown(["My personal profile", "Public Library"], label="Upload to")
|
315 |
+
gr.Markdown("[A Hugging Face write access token](https://huggingface.co/settings/tokens), go to \"New token\" -> Role : Write. A regular read token won't work here.")
|
316 |
+
hf_token = gr.Textbox(label="Hugging Face Write Token")
|
317 |
push_button = gr.Button("Push to the Hub")
|
318 |
result = gr.File(label="Download the uploaded models in the diffusers format", visible=True)
|
319 |
+
success_message_upload = gr.Markdown(visible=False)
|
320 |
convert_button = gr.Button("Convert to CKPT", visible=False)
|
321 |
|
322 |
train_btn.click(fn=train, inputs=is_visible+concept_collection+file_collection+[type_of_thing]+[steps]+[perc_txt_encoder]+[swap_auto_calculated], outputs=[result, try_your_model, push_to_hub, convert_button])
|
323 |
+
generate_button.click(fn=generate, inputs=prompt, outputs=result_image)
|
324 |
+
push_button.click(fn=push, inputs=[model_name, where_to_upload, hf_token], outputs=[success_message_upload, result])
|
325 |
convert_button.click(fn=convert_to_ckpt, inputs=[], outputs=result)
|
326 |
demo.launch()
|