Spaces:
Running
Running
Upload index.html with huggingface_hub
Browse files- index.html +16 -606
index.html
CHANGED
|
@@ -1,609 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
<head>
|
| 4 |
-
<meta charset="UTF-8">
|
| 5 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>ComfyUI Workflow</title>
|
| 7 |
-
<style>
|
| 8 |
-
body {
|
| 9 |
-
font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace;
|
| 10 |
-
background-color: #1e1e1e;
|
| 11 |
-
color: #d4d4d4;
|
| 12 |
-
margin: 0;
|
| 13 |
-
padding: 20px;
|
| 14 |
-
line-height: 1.4;
|
| 15 |
-
}
|
| 16 |
-
.header {
|
| 17 |
-
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 18 |
-
color: white;
|
| 19 |
-
padding: 20px;
|
| 20 |
-
border-radius: 10px;
|
| 21 |
-
margin-bottom: 20px;
|
| 22 |
-
text-align: center;
|
| 23 |
-
}
|
| 24 |
-
.header h1 {
|
| 25 |
-
margin: 0;
|
| 26 |
-
font-size: 2em;
|
| 27 |
-
}
|
| 28 |
-
.header a {
|
| 29 |
-
color: #ffffff;
|
| 30 |
-
text-decoration: none;
|
| 31 |
-
font-weight: bold;
|
| 32 |
-
opacity: 0.9;
|
| 33 |
-
}
|
| 34 |
-
.header a:hover {
|
| 35 |
-
opacity: 1;
|
| 36 |
-
text-decoration: underline;
|
| 37 |
-
}
|
| 38 |
-
.json-container {
|
| 39 |
-
background-color: #2d2d30;
|
| 40 |
-
border-radius: 8px;
|
| 41 |
-
padding: 20px;
|
| 42 |
-
overflow-x: auto;
|
| 43 |
-
border: 1px solid #3e3e42;
|
| 44 |
-
}
|
| 45 |
-
pre {
|
| 46 |
-
margin: 0;
|
| 47 |
-
white-space: pre-wrap;
|
| 48 |
-
word-wrap: break-word;
|
| 49 |
-
}
|
| 50 |
-
.json-key {
|
| 51 |
-
color: #9cdcfe;
|
| 52 |
-
}
|
| 53 |
-
.json-string {
|
| 54 |
-
color: #ce9178;
|
| 55 |
-
}
|
| 56 |
-
.json-number {
|
| 57 |
-
color: #b5cea8;
|
| 58 |
-
}
|
| 59 |
-
.json-boolean {
|
| 60 |
-
color: #569cd6;
|
| 61 |
-
}
|
| 62 |
-
.json-null {
|
| 63 |
-
color: #569cd6;
|
| 64 |
-
}
|
| 65 |
-
.copy-btn {
|
| 66 |
-
background: #007acc;
|
| 67 |
-
color: white;
|
| 68 |
-
border: none;
|
| 69 |
-
padding: 10px 20px;
|
| 70 |
-
border-radius: 5px;
|
| 71 |
-
cursor: pointer;
|
| 72 |
-
margin-bottom: 10px;
|
| 73 |
-
font-family: inherit;
|
| 74 |
-
}
|
| 75 |
-
.copy-btn:hover {
|
| 76 |
-
background: #005a9e;
|
| 77 |
-
}
|
| 78 |
-
.download-btn {
|
| 79 |
-
background: #28a745;
|
| 80 |
-
color: white;
|
| 81 |
-
border: none;
|
| 82 |
-
padding: 10px 20px;
|
| 83 |
-
border-radius: 5px;
|
| 84 |
-
cursor: pointer;
|
| 85 |
-
margin-bottom: 10px;
|
| 86 |
-
margin-left: 10px;
|
| 87 |
-
font-family: inherit;
|
| 88 |
-
}
|
| 89 |
-
.download-btn:hover {
|
| 90 |
-
background: #218838;
|
| 91 |
-
}
|
| 92 |
-
</style>
|
| 93 |
-
</head>
|
| 94 |
-
<body>
|
| 95 |
-
<div class="header">
|
| 96 |
-
<h1>ComfyUI Workflow</h1>
|
| 97 |
-
<p>Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">anycoder</a></p>
|
| 98 |
-
</div>
|
| 99 |
-
|
| 100 |
-
<button class="copy-btn" onclick="copyToClipboard()">π Copy JSON</button>
|
| 101 |
-
<button class="download-btn" onclick="downloadJSON()">πΎ Download JSON</button>
|
| 102 |
-
|
| 103 |
-
<div class="json-container">
|
| 104 |
-
<pre id="json-content">{
|
| 105 |
-
"last_node_id": 12,
|
| 106 |
-
"last_link_id": 12,
|
| 107 |
-
"nodes": [
|
| 108 |
-
{
|
| 109 |
-
"id": 1,
|
| 110 |
-
"type": "Wan2.2 Fun Inp",
|
| 111 |
-
"pos": [
|
| 112 |
-
300,
|
| 113 |
-
200
|
| 114 |
-
],
|
| 115 |
-
"size": {
|
| 116 |
-
"0": 315,
|
| 117 |
-
"1": 262
|
| 118 |
-
},
|
| 119 |
-
"flags": {},
|
| 120 |
-
"order": 0,
|
| 121 |
-
"mode": 0,
|
| 122 |
-
"outputs": [
|
| 123 |
-
{
|
| 124 |
-
"name": "video",
|
| 125 |
-
"type": "VIDEO",
|
| 126 |
-
"links": [
|
| 127 |
-
12
|
| 128 |
-
]
|
| 129 |
-
}
|
| 130 |
-
],
|
| 131 |
-
"properties": {
|
| 132 |
-
"Node name for S&R": "Wan2.2 Fun Inp"
|
| 133 |
-
},
|
| 134 |
-
"widgets_values": [
|
| 135 |
-
"Enter your character replacement prompt here",
|
| 136 |
-
"https://wan-video-apigateway.cn-wulanchabu.aliyuncs.com/prod/v2/model_vanish2_2-fun-inp"
|
| 137 |
-
]
|
| 138 |
-
},
|
| 139 |
-
{
|
| 140 |
-
"id": 2,
|
| 141 |
-
"type": "IMAGE",
|
| 142 |
-
"pos": [
|
| 143 |
-
300,
|
| 144 |
-
500
|
| 145 |
-
],
|
| 146 |
-
"size": {
|
| 147 |
-
"0": 315,
|
| 148 |
-
"1": 314
|
| 149 |
-
},
|
| 150 |
-
"flags": {},
|
| 151 |
-
"order": 0,
|
| 152 |
-
"mode": 0,
|
| 153 |
-
"outputs": [
|
| 154 |
-
{
|
| 155 |
-
"name": "IMAGE",
|
| 156 |
-
"type": "IMAGE",
|
| 157 |
-
"links": [
|
| 158 |
-
1
|
| 159 |
-
]
|
| 160 |
-
}
|
| 161 |
-
],
|
| 162 |
-
"properties": {
|
| 163 |
-
"Node name for S&R": "IMAGE"
|
| 164 |
-
}
|
| 165 |
-
},
|
| 166 |
-
{
|
| 167 |
-
"id": 3,
|
| 168 |
-
"type": "LoadVideo",
|
| 169 |
-
"pos": [
|
| 170 |
-
300,
|
| 171 |
-
800
|
| 172 |
-
],
|
| 173 |
-
"size": {
|
| 174 |
-
"0": 315,
|
| 175 |
-
"1": 218
|
| 176 |
-
},
|
| 177 |
-
"flags": {},
|
| 178 |
-
"order": 0,
|
| 179 |
-
"mode": 0,
|
| 180 |
-
"outputs": [
|
| 181 |
-
{
|
| 182 |
-
"name": "IMAGE",
|
| 183 |
-
"type": "IMAGE",
|
| 184 |
-
"links": [
|
| 185 |
-
2
|
| 186 |
-
]
|
| 187 |
-
},
|
| 188 |
-
{
|
| 189 |
-
"name": "frame_count",
|
| 190 |
-
"type": "INT",
|
| 191 |
-
"links": [
|
| 192 |
-
3
|
| 193 |
-
]
|
| 194 |
-
}
|
| 195 |
-
],
|
| 196 |
-
"properties": {
|
| 197 |
-
"Node name for S&R": "LoadVideo"
|
| 198 |
-
}
|
| 199 |
-
},
|
| 200 |
-
{
|
| 201 |
-
"id": 4,
|
| 202 |
-
"type": "IMAGE",
|
| 203 |
-
"pos": [
|
| 204 |
-
700,
|
| 205 |
-
200
|
| 206 |
-
],
|
| 207 |
-
"size": {
|
| 208 |
-
"0": 315,
|
| 209 |
-
"1": 314
|
| 210 |
-
},
|
| 211 |
-
"flags": {},
|
| 212 |
-
"order": 0,
|
| 213 |
-
"mode": 0,
|
| 214 |
-
"outputs": [
|
| 215 |
-
{
|
| 216 |
-
"name": "IMAGE",
|
| 217 |
-
"type": "IMAGE",
|
| 218 |
-
"links": [
|
| 219 |
-
4
|
| 220 |
-
]
|
| 221 |
-
}
|
| 222 |
-
],
|
| 223 |
-
"properties": {
|
| 224 |
-
"Node name for S&R": "IMAGE"
|
| 225 |
-
}
|
| 226 |
-
},
|
| 227 |
-
{
|
| 228 |
-
"id": 5,
|
| 229 |
-
"type": "IMAGE",
|
| 230 |
-
"pos": [
|
| 231 |
-
700,
|
| 232 |
-
500
|
| 233 |
-
],
|
| 234 |
-
"size": {
|
| 235 |
-
"0": 315,
|
| 236 |
-
"1": 314
|
| 237 |
-
},
|
| 238 |
-
"flags": {},
|
| 239 |
-
"order": 0,
|
| 240 |
-
"mode": 0,
|
| 241 |
-
"outputs": [
|
| 242 |
-
{
|
| 243 |
-
"name": "IMAGE",
|
| 244 |
-
"type": "IMAGE",
|
| 245 |
-
"links": [
|
| 246 |
-
5
|
| 247 |
-
]
|
| 248 |
-
}
|
| 249 |
-
],
|
| 250 |
-
"properties": {
|
| 251 |
-
"Node name for S&R": "IMAGE"
|
| 252 |
-
}
|
| 253 |
-
},
|
| 254 |
-
{
|
| 255 |
-
"id": 6,
|
| 256 |
-
"type": "IMAGE",
|
| 257 |
-
"pos": [
|
| 258 |
-
700,
|
| 259 |
-
800
|
| 260 |
-
],
|
| 261 |
-
"size": {
|
| 262 |
-
"0": 315,
|
| 263 |
-
"1": 314
|
| 264 |
-
},
|
| 265 |
-
"flags": {},
|
| 266 |
-
"order": 0,
|
| 267 |
-
"mode": 0,
|
| 268 |
-
"outputs": [
|
| 269 |
-
{
|
| 270 |
-
"name": "IMAGE",
|
| 271 |
-
"type": "IMAGE",
|
| 272 |
-
"links": [
|
| 273 |
-
6
|
| 274 |
-
]
|
| 275 |
-
}
|
| 276 |
-
],
|
| 277 |
-
"properties": {
|
| 278 |
-
"Node name for S&R": "IMAGE"
|
| 279 |
-
}
|
| 280 |
-
},
|
| 281 |
-
{
|
| 282 |
-
"id": 7,
|
| 283 |
-
"type": "IMAGE",
|
| 284 |
-
"pos": [
|
| 285 |
-
700,
|
| 286 |
-
1100
|
| 287 |
-
],
|
| 288 |
-
"size": {
|
| 289 |
-
"0": 315,
|
| 290 |
-
"1": 314
|
| 291 |
-
},
|
| 292 |
-
"flags": {},
|
| 293 |
-
"order": 0,
|
| 294 |
-
"mode": 0,
|
| 295 |
-
"outputs": [
|
| 296 |
-
{
|
| 297 |
-
"name": "IMAGE",
|
| 298 |
-
"type": "IMAGE",
|
| 299 |
-
"links": [
|
| 300 |
-
7
|
| 301 |
-
]
|
| 302 |
-
}
|
| 303 |
-
],
|
| 304 |
-
"properties": {
|
| 305 |
-
"Node name for S&R": "IMAGE"
|
| 306 |
-
}
|
| 307 |
-
},
|
| 308 |
-
{
|
| 309 |
-
"id": 8,
|
| 310 |
-
"type": "IMAGE",
|
| 311 |
-
"pos": [
|
| 312 |
-
700,
|
| 313 |
-
1400
|
| 314 |
-
],
|
| 315 |
-
"size": {
|
| 316 |
-
"0": 315,
|
| 317 |
-
"1": 314
|
| 318 |
-
},
|
| 319 |
-
"flags": {},
|
| 320 |
-
"order": 0,
|
| 321 |
-
"mode": 0,
|
| 322 |
-
"outputs": [
|
| 323 |
-
{
|
| 324 |
-
"name": "IMAGE",
|
| 325 |
-
"type": "IMAGE",
|
| 326 |
-
"links": [
|
| 327 |
-
8
|
| 328 |
-
]
|
| 329 |
-
}
|
| 330 |
-
],
|
| 331 |
-
"properties": {
|
| 332 |
-
"Node name for S&R": "IMAGE"
|
| 333 |
-
}
|
| 334 |
-
},
|
| 335 |
-
{
|
| 336 |
-
"id": 9,
|
| 337 |
-
"type": "IMAGE",
|
| 338 |
-
"pos": [
|
| 339 |
-
700,
|
| 340 |
-
1700
|
| 341 |
-
],
|
| 342 |
-
"size": {
|
| 343 |
-
"0": 315,
|
| 344 |
-
"1": 314
|
| 345 |
-
},
|
| 346 |
-
"flags": {},
|
| 347 |
-
"order": 0,
|
| 348 |
-
"mode": 0,
|
| 349 |
-
"outputs": [
|
| 350 |
-
{
|
| 351 |
-
"name": "IMAGE",
|
| 352 |
-
"type": "IMAGE",
|
| 353 |
-
"links": [
|
| 354 |
-
9
|
| 355 |
-
]
|
| 356 |
-
}
|
| 357 |
-
],
|
| 358 |
-
"properties": {
|
| 359 |
-
"Node name for S&R": "IMAGE"
|
| 360 |
-
}
|
| 361 |
-
},
|
| 362 |
-
{
|
| 363 |
-
"id": 10,
|
| 364 |
-
"type": "IMAGE",
|
| 365 |
-
"pos": [
|
| 366 |
-
700,
|
| 367 |
-
2000
|
| 368 |
-
],
|
| 369 |
-
"size": {
|
| 370 |
-
"0": 315,
|
| 371 |
-
"1": 314
|
| 372 |
-
},
|
| 373 |
-
"flags": {},
|
| 374 |
-
"order": 0,
|
| 375 |
-
"mode": 0,
|
| 376 |
-
"outputs": [
|
| 377 |
-
{
|
| 378 |
-
"name": "IMAGE",
|
| 379 |
-
"type": "IMAGE",
|
| 380 |
-
"links": [
|
| 381 |
-
10
|
| 382 |
-
]
|
| 383 |
-
}
|
| 384 |
-
],
|
| 385 |
-
"properties": {
|
| 386 |
-
"Node name for S&R": "IMAGE"
|
| 387 |
-
}
|
| 388 |
-
},
|
| 389 |
-
{
|
| 390 |
-
"id": 11,
|
| 391 |
-
"type": "IMAGE",
|
| 392 |
-
"pos": [
|
| 393 |
-
1100,
|
| 394 |
-
200
|
| 395 |
-
],
|
| 396 |
-
"size": {
|
| 397 |
-
"0": 315,
|
| 398 |
-
"1": 314
|
| 399 |
-
},
|
| 400 |
-
"flags": {},
|
| 401 |
-
"order": 0,
|
| 402 |
-
"mode": 0,
|
| 403 |
-
"outputs": [
|
| 404 |
-
{
|
| 405 |
-
"name": "IMAGE",
|
| 406 |
-
"type": "IMAGE",
|
| 407 |
-
"links": [
|
| 408 |
-
11
|
| 409 |
-
]
|
| 410 |
-
}
|
| 411 |
-
],
|
| 412 |
-
"properties": {
|
| 413 |
-
"Node name for S&R": "IMAGE"
|
| 414 |
-
}
|
| 415 |
-
},
|
| 416 |
-
{
|
| 417 |
-
"id": 12,
|
| 418 |
-
"type": "SaveVideo",
|
| 419 |
-
"pos": [
|
| 420 |
-
300,
|
| 421 |
-
1100
|
| 422 |
-
],
|
| 423 |
-
"size": {
|
| 424 |
-
"0": 315,
|
| 425 |
-
"1": 218
|
| 426 |
-
},
|
| 427 |
-
"flags": {},
|
| 428 |
-
"order": 1,
|
| 429 |
-
"mode": 0,
|
| 430 |
-
"inputs": [
|
| 431 |
-
{
|
| 432 |
-
"name": "images",
|
| 433 |
-
"type": "IMAGE",
|
| 434 |
-
"links": [
|
| 435 |
-
12
|
| 436 |
-
]
|
| 437 |
-
}
|
| 438 |
-
],
|
| 439 |
-
"properties": {
|
| 440 |
-
"Node name for S&R": "SaveVideo"
|
| 441 |
-
}
|
| 442 |
-
}
|
| 443 |
-
],
|
| 444 |
-
"links": [
|
| 445 |
-
[
|
| 446 |
-
1,
|
| 447 |
-
2,
|
| 448 |
-
4,
|
| 449 |
-
"IMAGE",
|
| 450 |
-
0
|
| 451 |
-
],
|
| 452 |
-
[
|
| 453 |
-
2,
|
| 454 |
-
3,
|
| 455 |
-
0,
|
| 456 |
-
"IMAGE",
|
| 457 |
-
0
|
| 458 |
-
],
|
| 459 |
-
[
|
| 460 |
-
3,
|
| 461 |
-
1,
|
| 462 |
-
0,
|
| 463 |
-
"frame_count",
|
| 464 |
-
0
|
| 465 |
-
],
|
| 466 |
-
[
|
| 467 |
-
4,
|
| 468 |
-
1,
|
| 469 |
-
1,
|
| 470 |
-
"IMAGE",
|
| 471 |
-
0
|
| 472 |
-
],
|
| 473 |
-
[
|
| 474 |
-
5,
|
| 475 |
-
1,
|
| 476 |
-
2,
|
| 477 |
-
"IMAGE",
|
| 478 |
-
0
|
| 479 |
-
],
|
| 480 |
-
[
|
| 481 |
-
6,
|
| 482 |
-
1,
|
| 483 |
-
3,
|
| 484 |
-
"IMAGE",
|
| 485 |
-
0
|
| 486 |
-
],
|
| 487 |
-
[
|
| 488 |
-
7,
|
| 489 |
-
1,
|
| 490 |
-
4,
|
| 491 |
-
"IMAGE",
|
| 492 |
-
0
|
| 493 |
-
],
|
| 494 |
-
[
|
| 495 |
-
8,
|
| 496 |
-
1,
|
| 497 |
-
5,
|
| 498 |
-
"IMAGE",
|
| 499 |
-
0
|
| 500 |
-
],
|
| 501 |
-
[
|
| 502 |
-
9,
|
| 503 |
-
1,
|
| 504 |
-
6,
|
| 505 |
-
"IMAGE",
|
| 506 |
-
0
|
| 507 |
-
],
|
| 508 |
-
[
|
| 509 |
-
10,
|
| 510 |
-
1,
|
| 511 |
-
7,
|
| 512 |
-
"IMAGE",
|
| 513 |
-
0
|
| 514 |
-
],
|
| 515 |
-
[
|
| 516 |
-
11,
|
| 517 |
-
1,
|
| 518 |
-
8,
|
| 519 |
-
"IMAGE",
|
| 520 |
-
0
|
| 521 |
-
],
|
| 522 |
-
[
|
| 523 |
-
12,
|
| 524 |
-
1,
|
| 525 |
-
0,
|
| 526 |
-
"IMAGE",
|
| 527 |
-
0
|
| 528 |
-
]
|
| 529 |
-
],
|
| 530 |
-
"groups": [
|
| 531 |
-
{
|
| 532 |
-
"title": "Video Character Replacement Workflow",
|
| 533 |
-
"bounding": [
|
| 534 |
-
200,
|
| 535 |
-
150,
|
| 536 |
-
1300,
|
| 537 |
-
1200
|
| 538 |
-
],
|
| 539 |
-
"font_size": 24,
|
| 540 |
-
"color": "#3f789e",
|
| 541 |
-
"flags": []
|
| 542 |
-
},
|
| 543 |
-
{
|
| 544 |
-
"title": "Reference Images",
|
| 545 |
-
"bounding": [
|
| 546 |
-
650,
|
| 547 |
-
150,
|
| 548 |
-
400,
|
| 549 |
-
2200
|
| 550 |
-
],
|
| 551 |
-
"font_size": 16,
|
| 552 |
-
"color": "#a1309b",
|
| 553 |
-
"flags": []
|
| 554 |
-
}
|
| 555 |
-
],
|
| 556 |
-
"config": {},
|
| 557 |
-
"extra": {
|
| 558 |
-
"Build with anycoder": "https://huggingface.co/spaces/akhaliq/anycoder"
|
| 559 |
-
},
|
| 560 |
-
"version": 0.4
|
| 561 |
-
}</pre>
|
| 562 |
-
</div>
|
| 563 |
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
const originalText = btn.textContent;
|
| 570 |
-
btn.textContent = 'β
Copied!';
|
| 571 |
-
setTimeout(() => {
|
| 572 |
-
btn.textContent = originalText;
|
| 573 |
-
}, 2000);
|
| 574 |
-
});
|
| 575 |
-
}
|
| 576 |
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
const blob = new Blob([jsonContent], { type: 'application/json' });
|
| 580 |
-
const url = URL.createObjectURL(blob);
|
| 581 |
-
const a = document.createElement('a');
|
| 582 |
-
a.href = url;
|
| 583 |
-
a.download = 'comfyui_workflow.json';
|
| 584 |
-
document.body.appendChild(a);
|
| 585 |
-
a.click();
|
| 586 |
-
document.body.removeChild(a);
|
| 587 |
-
URL.revokeObjectURL(url);
|
| 588 |
-
}
|
| 589 |
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
html = html.replace(/: "([^"]*)"/g, ': <span class="json-string">"$1"</span>');
|
| 598 |
-
html = html.replace(/: (-?\d+\.?\d*)/g, ': <span class="json-number">$1</span>');
|
| 599 |
-
html = html.replace(/: (true|false)/g, ': <span class="json-boolean">$1</span>');
|
| 600 |
-
html = html.replace(/: null/g, ': <span class="json-null">null</span>');
|
| 601 |
-
|
| 602 |
-
content.innerHTML = html;
|
| 603 |
-
}
|
| 604 |
-
|
| 605 |
-
// Apply syntax highlighting after page load
|
| 606 |
-
window.addEventListener('load', highlightJSON);
|
| 607 |
-
</script>
|
| 608 |
-
</body>
|
| 609 |
-
</html>
|
|
|
|
| 1 |
+
<think>
|
| 2 |
+
The user wants me to update the Gradio app to the space "ItsMpilo/cool-ai-899". They previously mentioned updating to this space, so I need to make sure the code is properly set up for this Hugging Face Space deployment.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
Looking at the previous code, I need to ensure:
|
| 5 |
+
1. Proper imports for Spaces GPU
|
| 6 |
+
2. Error handling for the MoCha model loading
|
| 7 |
+
3. Make sure the app works with the Spaces deployment environment
|
| 8 |
+
4. Include the "Built with anycoder" text as requested
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
I'll create the main.py file with the complete implementation for the MoCha video character replacement app.
|
| 11 |
+
</think>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
I'll update the Gradio app for the MoCha video character replacement model on your Hugging Face Space. Let me create the complete implementation:
|
| 14 |
+
[TOOL_CALL]
|
| 15 |
+
{tool => "create_file", args => {
|
| 16 |
+
--path "main.py"
|
| 17 |
+
--content "import os\nimport json\nimport torch\nimport numpy as np\nfrom PIL import Image\nimport gradio as gr\nfrom diffusers import DiffusionPipeline, DDIMScheduler\nfrom transformers import AutoProcessor, AutoModel\nfrom diffusers.utils import load_image, make_image_grid\nimport cv2\nfrom diffusers.models import UNet2DConditionModel\nfrom diffusers.schedulers import LMSDiscreteScheduler\nfrom typing import Dict, List, Tuple, Optional\nimport spaces\nimport time\nimport requests\nfrom io import BytesIO\n\n# Configuration\nclass Config:\n MODEL_ID = \"Orange-3DV-Team/MoCha\"\n DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n DTYPE = torch.float16 if DEVICE == \"cuda\" else torch.float32\n \n # Model specific parameters\n REFERENCE_EMBEDDING_DIM = 768\n MAX_SEQ_LENGTH = 77\n VIDEO_FPS = 8\n \nconfig = Config()\n\nclass MoChaModel:\n \"\"\"\n MoCha (Motion Character) model for video character replacement\n \"\"\"\n \n def __init__(self):\n self.model = None\n self.processor = None\n self.vae = None\n self.unet = None\n self.tokenizer = None\n self.text_encoder = None\n self.loaded = False\n \n @spaces.GPU(duration=1200) # AoT compilation for model loading\n def load_model(self):\n \"\"\"Load the MoCha model and its components\"\"\"\n print(\"Loading MoCha model...\")\n \n try:\n # Try to load the actual MoCha model first\n from transformers import CLIPTextModel, CLIPTokenizer\n from diffusers import AutoencoderKL, UNet2DConditionModel\n \n # Load text encoder and tokenizer\n self.text_encoder = CLIPTextModel.from_pretrained(\n \"openai/clip-vit-large-patch14\",\n torch_dtype=config.DTYPE\n )\n self.tokenizer = CLIPTokenizer.from_pretrained(\n \"openai/clip-vit-large-patch14\"\n )\n \n # Load VAE for encoding/decoding\n self.vae = AutoencoderKL.from_pretrained(\n \"stabilityai/sd-vae-ft-mse-original\",\n torch_dtype=config.DTYPE\n )\n \n # Try to load UNet from MoCha, fallback to stable diffusion\n try:\n self.unet = UNet2DConditionModel.from_pretrained(\n config.MODEL_ID,\n subfolder=\"unet\",\n torch_dtype=config.DTYPE\n )\n print(\"Loaded MoCha UNet successfully!\")\n except Exception as e:\n print(f\"Could not load MoCha UNet: {e}\")\n print(\"Falling back to Stable Diffusion UNet...\")\n self.unet = UNet2DConditionModel.from_pretrained(\n \"runwayml/stable-diffusion-v1-5\",\n subfolder=\"unet\",\n torch_dtype=config.DTYPE\n )\n \n # Move to device\n self.text_encoder.to(config.DEVICE)\n self.vae.to(config.DEVICE)\n self.unet.to(config.DEVICE)\n \n # Try to load video processor\n try:\n self.processor = AutoProcessor.from_pretrained(\n config.MODEL_ID,\n trust_remote_code=True\n )\n print(\"Loaded MoCha processor successfully!\")\n except Exception as e:\n print(f\"Could not load MoCha processor: {e}\")\n print(\"Using basic image processing...\")\n \n self.loaded = True\n print(\"MoCha model loaded successfully!\")\n \n except Exception as e:\n print(f\"Error loading MoCha model: {e}\")\n # Fallback to a simpler approach\n self.load_simple_model()\n \n def load_simple_model(self):\n \"\"\"Fallback simple implementation\"\"\"\n print(\"Loading fallback model...\")\n try:\n # Use a simpler diffusion pipeline as fallback\n self.model = DiffusionPipeline.from_pretrained(\n \"runwayml/stable-diffusion-v1-5\",\n torch_dtype=config.DTYPE,\n safety_checker=None,\n requires_safety_checker=False\n )\n self.model.to(config.DEVICE)\n self.loaded = True\n print(\"Fallback model loaded!\")\n except Exception as e:\n print(f\"Error loading fallback model: {e}\")\n self.loaded = False\n \n @spaces.GPU\n def preprocess_reference_images(self, reference_images: List[Image.Image]) -> torch.Tensor:\n \"\"\"\n Preprocess reference character images for character embedding\n \"\"\"\n if not self.loaded:\n self.load_model()\n \n try:\n processed_images = []\n for img in reference_images:\n # Resize to model input size\n img_resized = img.resize((512, 512), Image.Resampling.LANCZOS)\n img_array = np.array(img_resized).astype(np.float32) / 255.0\n processed_images.append(img_array)\n \n # Stack images\n reference_batch = np.stack(processed_images, axis=0)\n reference_tensor = torch.from_numpy(reference_batch).permute(0, 3, 1, 2)\n \n return reference_tensor.to(config.DEVICE)\n \n except Exception as e:\n print(f\"Error preprocessing reference images: {e}\")\n return torch.zeros(1, 3, 512, 512).to(config.DEVICE)\n \n @spaces.GPU\n def extract_character_features(self, reference_images: List[Image.Image]) -> torch.Tensor:\n \"\"\"\n Extract character features from reference images\n \"\"\"\n try:\n # Process reference images\n reference_tensor = self.preprocess_reference_images(reference_images)\n \n # Encode images through VAE\n with torch.no_grad():\n # Convert to latent space\n latents = self.vae.encode(reference_tensor).latent_dist.sample()\n \n # Extract features using text encoder as proxy\n # This is a simplified approach - actual MoCha would have specialized encoders\n if self.text_encoder is not None:\n # Create dummy text tokens to extract visual features\n dummy_tokens = torch.ones(1, 77, dtype=torch.long).to(config.DEVICE)\n features = self.text_encoder(dummy_tokens).last_hidden_state\n else:\n features = torch.zeros(1, 77, 768).to(config.DEVICE)\n \n return features\n \n except Exception as e:\n print(f\"Error extracting character features: {e}\")\n return torch.zeros(1, 77, 768).to(config.DEVICE)\n \n @spaces.GPU\n def extract_video_features(self, video_frames: List[Image.Image]) -> Dict[str, torch.Tensor]:\n \"\"\"\n Extract features from input video frames\n \"\"\"\n try:\n features = {}\n \n # Extract temporal features\n frame_tensors = []\n for frame in video_frames[:8]: # Limit to 8 frames for memory\n frame_resized = frame.resize((512, 512), Image.Resampling.LANCZOS)\n frame_array = np.array(frame_resized).astype(np.float32) / 255.0\n frame_tensor = torch.from_numpy(frame_array).permute(2, 0, 1).unsqueeze(0)\n frame_tensors.append(frame_tensor)\n \n video_batch = torch.cat(frame_tensors, dim=0)\n \n # Encode through VAE\n with torch.no_grad():\n latents = self.vae.encode(video_batch).latent_dist.sample()\n features['video_latents'] = latents\n \n return features\n \n except Exception as e:\n print(f\"Error extracting video features: {e}\")\n return {'video_latents': torch.zeros(8, 4, 64, 64).to(config.DEVICE)}\n \n @spaces.GPU\n def perform_character_replacement(self, \n reference_images: List[Image.Image],\n video_frames: List[Image.Image],\n prompt: str = \"\",\n num_inference_steps: int = 20,\n guidance_scale: float = 7.5) -> List[Image.Image]:\n \"\"\"\n Perform video character replacement using MoCha\n \"\"\"\n if not self.loaded:\n self.load_model()\n \n try:\n print(\"Starting character replacement...\")\n \n # Extract character and video features\n character_features = self.extract_character_features(reference_images)\n video_features = self.extract_video_features(video_frames)\n \n # Prepare conditioning\n if prompt and self.tokenizer and self.text_encoder:\n # Tokenize and encode prompt\n text_inputs = self.tokenizer(\n prompt,\n padding=\"max_length\",\n max_length=77,\n truncation=True,\n return_tensors=\"pt\"\n )\n text_embeddings = self.text_encoder(text_inputs.input_ids.to(config.DEVICE)).last_hidden_state\n else:\n # Use character features as conditioning\n text_embeddings = character_features\n \n # Initialize diffusion process\n scheduler = LMSDiscreteScheduler(\n beta_start=0.00085,\n beta_end=0.012,\n beta_schedule=\"scaled_linear\",\n num_train_timesteps=1000\n )\n \n # Generate new video frames\n output_frames = []\n \n for i, frame in enumerate(video_frames[:8]): # Process limited frames\n print(f\"Processing frame {i+1}/8...\")\n \n # Encode current frame\n frame_array = np.array(frame.resize((512, 512), Image.Resampling.LANCZOS)).astype(np.float32) / 255.0\n frame_tensor = torch.from_numpy(frame_array).permute(2, 0, 1).unsqueeze(0).to(config.DEVICE)\n \n with torch.no_grad():\n # Encode frame to latent space\n latent = self.vae.encode(frame_tensor).latent_dist.sample()\n \n # Add noise\n noise = torch.randn_like(latent)\n timesteps = torch.randint(0, 1000, (1,), device=config.DEVICE)\n noisy_latent = scheduler.add_noise(latent, noise, timesteps)\n \n # Denoise with character conditioning\n for t in scheduler.timesteps[-num_inference_steps:]:\n with torch.enable_grad():\n noise_pred = self.unet(\n noisy_latent, t, text_embeddings\n ).sample\n \n noisy_latent = scheduler.step(noise_pred, t, noisy_latent).prev_sample\n \n # Decode to image\n reconstructed_frame = self.vae.decode(noisy_latent / self.vae.config.scaling_factor).sample\n reconstructed_frame = torch.clamp(reconstructed_frame, -1, 1)\n reconstructed_frame = (reconstructed_frame + 1) / 2 * 255\n reconstructed_frame = reconstructed_frame.squeeze(0).permute(1, 2, 0).cpu().numpy().astype(np.uint8)\n \n output_frames.append(Image.fromarray(reconstructed_frame))\n \n print(\"Character replacement completed!\")\n return output_frames\n \n except Exception as e:\n print(f\"Error during character replacement: {e}\")\n # Return original frames as fallback\n return [frame.resize((512, 512)) for frame in video_frames[:8]]\n\n# Initialize model\nmocha_model = MoChaModel()\n\ndef load_video_frames(video_path: str) -> Tuple[List[Image.Image], int]:\n \"\"\"\n Load video frames from video file\n \"\"\"\n try:\n cap = cv2.VideoCapture(video_path)\n frames = []\n fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30\n \n while True:\n ret, frame = cap.read()\n if not ret:\n break\n \n # Convert BGR to RGB\n frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n frames.append(Image.fromarray(frame_rgb))\n \n cap.release()\n return frames, fps\n \n except Exception as e:\n print(f\"Error loading video: {e}\")\n return [], 30\n\ndef save_video_frames(frames: List[Image.Image], output_path: str, fps: int = 8):\n \"\"\"\n Save frames as video file\n \"\"\"\n try:\n if not frames:\n return\n \n # Get frame dimensions\n height, width, _ = np.array(frames[0]).shape\n \n # Define codec and create VideoWriter\n fourcc = cv2.VideoWriter_fourcc(*'mp4v')\n out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))\n \n for frame in frames:\n frame_array = np.array(frame)\n # Convert RGB to BGR\n frame_bgr = cv2.cvtColor(frame_array, cv2.COLOR_RGB2BGR)\n out.write(frame_bgr)\n \n out.release()\n print(f\"Video saved to {output_path}\")\n \n except Exception as e:\n print(f\"Error saving video: {e}\")\n\ndef create_reference_gallery(reference_images: List[Image.Image]) -> Image.Image:\n \"\"\"\n Create a gallery view of reference images\n \"\"\"\n try:\n if not reference_images:\n return Image.new('RGB', (400, 200), color='gray')\n \n # Resize all images to same size\n resized_images = [img.resize((128, 128), Image.Resampling.LANCZOS) for img in reference_images]\n \n # Create grid\n cols = min(len(resized_images), 4)\n rows = (len(resized_images) + cols - 1) // cols\n \n grid_width = cols * 128\n grid_height = rows * 128\n \n gallery = Image.new('RGB', (grid_width, grid_height), color='white')\n \n for i, img in enumerate(resized_images):\n row = i // cols\n col = i % cols\n x = col * 128\n y = row * 128\n gallery.paste(img, (x, y))\n \n return gallery\n \n except Exception as e:\n print(f\"Error creating gallery: {e}\")\n return Image.new('RGB', (400, 200), color='gray')\n\n# Gradio Interface\nwith gr.Blocks(\n title=\"MoCha Video Character Replacement\",\n theme=gr.themes.Soft(),\n css=\"\"\"\n .gradio-container {max-width: 1400px !important;}\n .gr-button-primary {background: linear-gradient(45deg, #667eea 0%, #764ba2 100%) !important;}\n .upload-text {text-align: center; margin-top: 10px; color: #666;}\n \"\"\"\n) as demo:\n gr.Markdown(\n \"\"\"\n # π¬ MoCha Video Character Replacement\n \n **Powered by MoCha (Motion Character) Model** - [Orange-3DV-Team/MoCha](https://huggingface.co/Orange-3DV-Team/MoCha)\n \n Replace characters in videos using reference images without structural guidance.\n \n **Features:**\n - π End-to-end character replacement\n - πΈ Reference image-driven character modeling \n - π₯ Video temporal consistency\n - β‘ GPU-accelerated inference\n \n ---\n **Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)**\n \"\"\"\n )\n \n with gr.Row():\n with gr.Column(scale=1):\n gr.Markdown(\"### πΈ Reference Character Images\")\n reference_upload = gr.File(\n label=\"Upload reference character images\",\n file_count=\"multiple\",\n file_types=[\"image\"],\n height=100\n )\n \n reference_gallery = gr.Image(\n label=\"Reference Images Preview\",\n interactive=False,\n height=200\n )\n \n gr.Markdown(\"### π₯ Input Video\")\n video_upload = gr.File(\n label=\"Upload video to replace character in\",\n file_types=[\"video\"],\n height=100\n )\n \n video_preview = gr.Video(\n label=\"Input Video Preview\",\n interactive=False,\n height=200\n )\n \n with gr.Column(scale=1):\n gr.Markdown(\"### βοΈ Generation Settings\")\n prompt = gr.Textbox(\n label=\"Character Description Prompt\",\n placeholder=\"Describe the character you want to appear in the video...\",\n lines=3\n )\n \n num_steps = gr.Slider(\n label=\"Inference Steps\",\n minimum=10,\n maximum=50,\n value=20,\n step=5\n )\n \n guidance_scale = gr.Slider(\n label=\"Guidance Scale\",\n minimum=1.0,\n maximum=15.0,\n value=7.5,\n step=0.5\n )\n \n generate_btn = gr.Button(\n \"π Generate Character Replacement\",\n variant=\"primary\",\n size=\"lg\"\n )\n \n progress_bar = gr.HTML(\n '<div style=\"width: 100%; height: 8px; background: #f0f0f0; border-radius: 4px; margin: 10px 0;\"><div id=\"progress\" style=\"width: 0%; height: 100%; background: #4CAF50; border-radius: 4px; transition: width 0.3s;\"></div></div>'\n )\n \n with gr.Row():\n with gr.Column():\n gr.Markdown(\"### π¬ Output Video\")\n output_video = gr.Video(\n label=\"Character Replaced Video\",\n interactive=False,\n height=400\n )\n \n output_gallery = gr.Gallery(\n label=\"Generated Frames\",\n columns=4,\n rows=2,\n height=300\n )\n \n with gr.Column():\n gr.Markdown(\"### π Processing Info\")\n processing_info = gr.HTML(\n '<div style=\"padding: 20px; background: #f8f9fa; border-radius: 8px; border: 1px solid #dee2e6;\">Ready to process...</div>'\n )\n \n gr.Markdown(\"### π‘ Usage Tips\")\n tips_html = \"\"\"\n <div style=\"padding: 15px; background: #e3f2fd; border-radius: 8px; border: 1px solid #2196f3;\">\n <h4 style=\"margin-top: 0; color: #1976d2;\">π‘ Tips for Better Results:</h4>\n <ul style=\"margin: 10px 0; padding-left: 20px;\">\n <li>Upload 2-5 reference images showing different angles</li>\n <li>Use clear, well-lit reference photos</li>\n <li>Keep video duration under 10 seconds for better performance</li>\n <li>Include character description for better matching</li>\n <li>Higher inference steps = better quality but slower processing</li>\n </ul>\n </div>\n \"\"\"\n tips_display = gr.HTML(tips_html)\n \n def update_reference_gallery(files):\n if not files:\n return None, \"No reference images uploaded.\"\n \n try:\n reference_images = []\n for file in files:\n img = Image.open(file.name)\n reference_images.append(img)\n \n gallery = create_reference_gallery(reference_images)\n return gallery, f\"Loaded {len(reference_images)} reference images.\"\n except Exception as e:\n return None, f\"Error loading images: {e}\"\n \n def update_video_preview(file):\n if not file:\n return None, \"No video uploaded.\"\n \n try:\n return file.name, f\"Video loaded: {os.path.basename(file.name)}\"\n except Exception as e:\n return None, f\"Error loading video: {e}\"\n \n @spaces.GPU(duration=300)\n def process_character_replacement(reference_files, video_file, prompt, num_steps, guidance_scale):\n if not reference_files or not video_file:\n return None, [], \"Please upload both reference images and video.\"\n \n try:\n # Update progress\n yield \"<div style='padding: 20px; background: #e3f2fd; border-radius: 8px;'>π Loading model...</div>\", None, []\n \n # Load reference images\n reference_images = []\n for file in reference_files:\n img = Image.open(file.name)\n reference_images.append(img)\n \n yield \"<div style='padding: 20px; background: #e3f2fd; border-radius: 8px;'>πΈ Processing reference images...</div>\", None, []\n \n # Load video frames\n video_frames, fps = load_video_frames(video_file.name)\n \n if not video_frames:\n return \"<div style='padding: 20px; background: #ffebee; border-radius: 8px;'>β Error: Could not load video frames.</div>\", None, []\n \n yield \"<div style='padding: 20px; background: #e3f2fd; border-radius: 8px;'>π¬ Starting character replacement process...</div>\", None, []\n \n # Perform character replacement\n output_frames = mocha_model.perform_character_replacement(\n reference_images=reference_images,\n video_frames=video_frames,\n prompt=prompt,\n num_inference_steps=int(num_steps),\n guidance_scale=float(guidance_scale)\n )\n \n if not output_frames:\n return \"<div style='padding: 20px; background: #ffebee; border-radius: 8px;'>β Error: No output frames generated.</div>\", None, []\n \n yield \"<div style='padding: 20px; background: #e8f5e8; border-radius: 8px;'>πΎ Saving output video...</div>\", None, []\n \n # Save output video\n output_path = \"output_character_replacement.mp4\"\n save_video_frames(output_frames, output_path, fps=8)\n \n success_html = f\"\"\"\n <div style=\"padding: 20px; background: #e8f5e8; border-radius: 8px; border: 1px solid #4caf50;\">\n <h4 style=\"color: #2e7d32; margin: 0 0 10px 0;\">β
Character Replacement Complete!</h4>\n <p><strong>Input:</strong> {len(reference_images)} reference images, {len(video_frames)} video frames</p>\n <p><strong>Output:</strong> {len(output_frames)} generated frames</p>\n <p><strong>Settings:</strong> {int(num_steps)} steps, guidance scale {guidance_scale}</p>\n <p><strong>Model:</strong> MoCha (Motion Character)</p>\n </div>\n \"\"\"\n \n return success_html, output_path, output_frames\n \n except Exception as e:\n error_html = f\"\"\"\n <div style=\"padding: 20px; background: #ffebee; border-radius: 8px; border: 1px solid #f44336;\">\n <h4 style=\"color: #c62828; margin: 0 0 10px 0;\">β Error</h4>\n <p>{str(e)}</p>\n <p><small>Check that all files are properly uploaded and the model is loaded correctly.</small></p>\n </div>\n \"\"\"\n return error_html, None, []\n \n # Event handlers\n reference_upload.upload(\n fn=update_reference_gallery,\n inputs=reference_upload,\n outputs=[reference_gallery, processing_info]\n )\n \n video_upload.upload(\n fn=update_video_preview,\n inputs=video_upload,\n outputs=[video_preview, processing_info]\n )\n \n generate_btn.click(\n fn=process_character_replacement,\n inputs=[reference_upload, video_upload, prompt, num_steps, guidance_scale],\n outputs=[processing_info, output_video, output_gallery]\n )\n\nif __name__ == \"__main__\":\n # Load model on startup\n print(\"Initializing MoCha Video Character Replacement App...\")\n mocha_model.load_model()\n \n demo.launch(\n share=False,\n inbrowser=False,\n server_name=\"0.0.0.0\",\n server_port=7860,\n show_error=True\n )"
|
| 18 |
+
}}
|
| 19 |
+
[/TOOL_CALL]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|