Files changed (1) hide show
  1. app.py +320 -4
app.py CHANGED
@@ -1,7 +1,323 @@
 
 
 
 
 
1
  import gradio as gr
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import io
4
+ from typing import TypedDict
5
+ import requests
6
  import gradio as gr
7
+ from PIL import Image
8
 
9
+ # Read Baseten configuration from environment variables.
10
+ BTEN_API_KEY = os.getenv("API_KEY")
11
+ URL = os.getenv("URL")
12
 
13
+ def image_to_base64(image: Image.Image) -> str:
14
+ """Convert a PIL image to a base64-encoded PNG string."""
15
+ with io.BytesIO() as buffer:
16
+ image.save(buffer, format="PNG")
17
+ return base64.b64encode(buffer.getvalue()).decode("utf-8")
18
+
19
+
20
+ def ensure_image(img) -> Image.Image:
21
+ """
22
+ Ensure the input is a PIL Image.
23
+ If it's already a PIL Image, return it.
24
+ If it's a string (file path), open it.
25
+ If it's a dict with a "name" key, open the file at that path.
26
+ """
27
+ if isinstance(img, Image.Image):
28
+ return img
29
+ elif isinstance(img, str):
30
+ return Image.open(img)
31
+ elif isinstance(img, dict) and "name" in img:
32
+ return Image.open(img["name"])
33
+ else:
34
+ raise ValueError("Cannot convert input to a PIL Image.")
35
+
36
+
37
+ def call_baseten_generate(
38
+ image: Image.Image,
39
+ prompt: str,
40
+ steps: int,
41
+ strength: float,
42
+ height: int,
43
+ width: int,
44
+ lora_name: str,
45
+ remove_bg: bool,
46
+ ) -> Image.Image | None:
47
+ """
48
+ Call the Baseten /predict endpoint with provided parameters and return the generated image.
49
+ """
50
+ image = ensure_image(image)
51
+ b64_image = image_to_base64(image)
52
+ payload = {
53
+ "image": b64_image,
54
+ "prompt": prompt,
55
+ "steps": steps,
56
+ "strength": strength,
57
+ "height": height,
58
+ "width": width,
59
+ "lora_name": lora_name,
60
+ "bgrm": remove_bg,
61
+ }
62
+ if not BTEN_API_KEY:
63
+ headers = {"Authorization": f"Api-Key {os.getenv('API_KEY')}"}
64
+ else:
65
+ headers = {"Authorization": f"Api-Key {BTEN_API_KEY}"}
66
+ try:
67
+ if not URL:
68
+ raise ValueError("The URL environment variable is not set.")
69
+
70
+ response = requests.post(URL, headers=headers, json=payload)
71
+ if response.status_code == 200:
72
+ data = response.json()
73
+ gen_b64 = data.get("generated_image", None)
74
+ if gen_b64:
75
+ return Image.open(io.BytesIO(base64.b64decode(gen_b64)))
76
+ else:
77
+ return None
78
+ else:
79
+ print(f"Error: HTTP {response.status_code}\n{response.text}")
80
+ return None
81
+ except Exception as e:
82
+ print(f"Error: {e}")
83
+ return None
84
+
85
+
86
+ # Mode defaults for each tab.
87
+
88
+ Mode = TypedDict(
89
+ "Mode",
90
+ {
91
+ "model": str,
92
+ "prompt": str,
93
+ "default_strength": float,
94
+ "default_height": int,
95
+ "default_width": int,
96
+ "models": list[str],
97
+ },
98
+ )
99
+
100
+ MODE_DEFAULTS: dict[str, Mode] = {
101
+ "Subject Generation": {
102
+ "model": "subject_99000_512",
103
+ "prompt": "A detailed portrait with soft lighting",
104
+ "default_strength": 1.2,
105
+ "default_height": 512,
106
+ "default_width": 512,
107
+ "models": [
108
+ "zendsd_512_146000",
109
+ "subject_99000_512",
110
+ "zen_variation_10000",
111
+ "zen_pers_11000",
112
+ "zen_26000_512",
113
+ "zen_22000_1280",
114
+ "zen_20000_1360",
115
+ "zen_14000_512",
116
+ "zen_1360_31000",
117
+ ],
118
+ },
119
+ "Background Generation": {
120
+ "model": "gen_back_3000_1024",
121
+ "prompt": "A vibrant background with dynamic lighting and textures",
122
+ "default_strength": 1.2,
123
+ "default_height": 1024,
124
+ "default_width": 1024,
125
+ "models": [
126
+ "bgwlight_15000_1024",
127
+ "rmgb_12000_1024",
128
+ "bg_canny_58000_1024",
129
+ "gen_back_3000_1024",
130
+ "gen_back_7000_1024",
131
+ "gen_bckgnd_18000_512",
132
+ "gen_bckgnd_18000_512",
133
+ "loose_25000_512",
134
+ "looser_23000_1024",
135
+ "looser_bg_gen_21000_1280",
136
+ "old_looser_46000_1024",
137
+ "relight_bg_gen_31000_1024",
138
+ "rmbg_loose_19000_1024",
139
+ "rmgb_12000_1024",
140
+ ],
141
+ },
142
+ "Canny": {
143
+ "model": "canny_21000_1024",
144
+ "prompt": "A futuristic cityscape with neon lights",
145
+ "default_strength": 1.2,
146
+ "default_height": 1024,
147
+ "default_width": 1024,
148
+ "models": ["canny_21000_1024"],
149
+ },
150
+ "Depth": {
151
+ "model": "depth_9800_1024",
152
+ "prompt": "A scene with pronounced depth and perspective",
153
+ "default_strength": 1.2,
154
+ "default_height": 1024,
155
+ "default_width": 1024,
156
+ "models": [
157
+ "depth_9800_1024",
158
+ ],
159
+ },
160
+ "Deblurring": {
161
+ "model": "slight_deblurr_18000",
162
+ "prompt": "A scene with pronounced depth and perspective",
163
+ "default_strength": 1.2,
164
+ "default_height": 1024,
165
+ "default_width": 1024,
166
+ "models": ["slight_deblurr_18000", "deblurr_1024_10000"],
167
+ },
168
+ }
169
+
170
+
171
+ header = """
172
+ # 🌍 ZenCtrl / FLUX
173
+ <div style="text-align: center; display: flex; justify-content: left; gap: 5px;">
174
+ <a href="https://huggingface.co/fotographerai/zenctrl_tools">
175
+ <img src="https://img.shields.io/badge/🤗-Model-ffbd45.svg" alt="Weights">
176
+ </a>
177
+ <a href="https://github.com/FotographerAI/ZenCtrl">
178
+ <img src="https://img.shields.io/badge/GitHub-Code-blue.svg?logo=github" alt="GitHub">
179
+ </a>
180
+ <a href="https://fotographer.ai/">
181
+ <img src="https://img.shields.io/badge/LP-Visit-9cf" alt="LP">
182
+ </a>
183
+ <a href="https://x.com/fotographerait">
184
+ <img src="https://img.shields.io/twitter/follow/FotographerAI?style=social" alt="Twitter">
185
+ </a>
186
+ <a href="https://discord.com/invite/b9RuYQ3F8k">
187
+ <img src="https://img.shields.io/badge/Discord-Join-7289da.svg?logo=discord" alt="Discord">
188
+ </a>
189
+ </div>
190
+ """
191
+
192
+ defaults = MODE_DEFAULTS["Subject Generation"]
193
+
194
+
195
+ with gr.Blocks(title="🌍 ZenCtrl") as demo:
196
+ gr.Markdown(header)
197
+ gr.Markdown(
198
+ """
199
+ # ZenCtrl Demo
200
+ [WIP] One Agent to Generate multi-view, diverse-scene, and task-specific high-resolution images from a single subject image—without fine-tuning.
201
+ We are first releasing some of the task specific weights and will release the codes soon.
202
+ The goal is to unify all of the visual content generation tasks with a single LLM...
203
+
204
+ **Modes:**
205
+ - **Subject Generation:** Focuses on generating detailed subject portraits.
206
+ - **Background Generation:** Creates dynamic, vibrant backgrounds:
207
+ You can generate part of the image from sketch while keeping part of it as it is.
208
+ - **Canny:** Emphasizes strong edge detection.
209
+ - **Depth:** Produces images with realistic depth and perspective.
210
+
211
+ For more details, shoot us a message on discord.
212
+ """
213
+ )
214
+ with gr.Tabs():
215
+ for mode in MODE_DEFAULTS:
216
+ with gr.Tab(mode):
217
+ defaults = MODE_DEFAULTS[mode]
218
+ gr.Markdown(f"### {mode} Mode")
219
+ gr.Markdown(f"**Default Model:** {defaults['model']}")
220
+
221
+ with gr.Row():
222
+ with gr.Column(scale=2, min_width=370):
223
+ input_image = gr.Image(
224
+ label="Upload Image",
225
+ type="pil",
226
+ scale=3,
227
+ height=370,
228
+ min_width=100,
229
+ )
230
+ generate_button = gr.Button("Generate")
231
+ with gr.Blocks(title="Options"):
232
+ model_dropdown = gr.Dropdown(
233
+ label="Model",
234
+ choices=defaults["models"],
235
+ value=defaults["model"],
236
+ interactive=True,
237
+ )
238
+ remove_bg_checkbox = gr.Checkbox(
239
+ label="Remove Background", value=False
240
+ )
241
+
242
+ with gr.Column(scale=2):
243
+ output_image = gr.Image(
244
+ label="Generated Image",
245
+ type="pil",
246
+ height=573,
247
+ scale=4,
248
+ min_width=100,
249
+ )
250
+
251
+ gr.Markdown("#### Prompt")
252
+ prompt_box = gr.Textbox(
253
+ label="Prompt", value=defaults["prompt"], lines=2
254
+ )
255
+
256
+ # Wrap generation parameters in an Accordion for collapsible view.
257
+ with gr.Accordion("Generation Parameters", open=False):
258
+ with gr.Row():
259
+ step_slider = gr.Slider(
260
+ minimum=2, maximum=28, value=2, step=2, label="Steps"
261
+ )
262
+ strength_slider = gr.Slider(
263
+ minimum=0.5,
264
+ maximum=2.0,
265
+ value=defaults["default_strength"],
266
+ step=0.1,
267
+ label="Strength",
268
+ )
269
+ with gr.Row():
270
+ height_slider = gr.Slider(
271
+ minimum=512,
272
+ maximum=1360,
273
+ value=defaults["default_height"],
274
+ step=1,
275
+ label="Height",
276
+ )
277
+ width_slider = gr.Slider(
278
+ minimum=512,
279
+ maximum=1360,
280
+ value=defaults["default_width"],
281
+ step=1,
282
+ label="Width",
283
+ )
284
+
285
+ def on_generate_click(
286
+ model_name,
287
+ prompt,
288
+ steps,
289
+ strength,
290
+ height,
291
+ width,
292
+ remove_bg,
293
+ image,
294
+ ):
295
+ return call_baseten_generate(
296
+ image,
297
+ prompt,
298
+ steps,
299
+ strength,
300
+ height,
301
+ width,
302
+ model_name,
303
+ remove_bg,
304
+ )
305
+
306
+ generate_button.click(
307
+ fn=on_generate_click,
308
+ inputs=[
309
+ model_dropdown,
310
+ prompt_box,
311
+ step_slider,
312
+ strength_slider,
313
+ height_slider,
314
+ width_slider,
315
+ remove_bg_checkbox,
316
+ input_image,
317
+ ],
318
+ outputs=[output_image],
319
+ )
320
+
321
+
322
+ if __name__ == "__main__":
323
+ demo.launch()