Spaces:
Running
Running
Upload 9 files
Browse files- LICENSE +22 -0
- README.md +6 -7
- app.py +870 -0
- config.py +45 -0
- config.toml +186 -0
- prompt_generator.py +348 -0
- requirements.txt +14 -0
- style.css +224 -0
- utils.py +226 -0
LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 hysts
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
22 |
+
|
README.md
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
---
|
2 |
-
title: FanFic Illustrator
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
-
|
|
|
10 |
short_description: Turn your fan stories into beautiful illustrations with AI
|
11 |
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: FanFic Illustrator
|
3 |
+
emoji: 🌍
|
4 |
+
colorFrom: green
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.44.1
|
8 |
app_file: app.py
|
9 |
+
license: mit
|
10 |
+
pinned: true
|
11 |
short_description: Turn your fan stories into beautiful illustrations with AI
|
12 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,870 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gc
|
3 |
+
import gradio as gr
|
4 |
+
import numpy as np
|
5 |
+
import torch
|
6 |
+
import json
|
7 |
+
import spaces
|
8 |
+
import random
|
9 |
+
import config
|
10 |
+
import utils
|
11 |
+
import logging
|
12 |
+
from PIL import Image, PngImagePlugin
|
13 |
+
from datetime import datetime
|
14 |
+
from diffusers.models import AutoencoderKL
|
15 |
+
from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline
|
16 |
+
from config import (
|
17 |
+
MODEL,
|
18 |
+
MIN_IMAGE_SIZE,
|
19 |
+
MAX_IMAGE_SIZE,
|
20 |
+
USE_TORCH_COMPILE,
|
21 |
+
ENABLE_CPU_OFFLOAD,
|
22 |
+
OUTPUT_DIR,
|
23 |
+
DEFAULT_NEGATIVE_PROMPT,
|
24 |
+
DEFAULT_ASPECT_RATIO,
|
25 |
+
sampler_list,
|
26 |
+
aspect_ratios,
|
27 |
+
style_list,
|
28 |
+
# 設定
|
29 |
+
TEXT_TO_PROMPT_ENABLED,
|
30 |
+
DEFAULT_CATEGORY,
|
31 |
+
DEFAULT_SERIES,
|
32 |
+
DEFAULT_CHARACTER,
|
33 |
+
series_list,
|
34 |
+
character_list,
|
35 |
+
category_list,
|
36 |
+
)
|
37 |
+
import time
|
38 |
+
from typing import List, Dict, Tuple, Optional
|
39 |
+
|
40 |
+
# プロンプト生成モジュールのインポート
|
41 |
+
if TEXT_TO_PROMPT_ENABLED:
|
42 |
+
import prompt_generator
|
43 |
+
|
44 |
+
# Enhanced logging configuration
|
45 |
+
logging.basicConfig(
|
46 |
+
level=logging.INFO,
|
47 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
48 |
+
datefmt='%Y-%m-%d %H:%M:%S'
|
49 |
+
)
|
50 |
+
logger = logging.getLogger(__name__)
|
51 |
+
|
52 |
+
# Constants
|
53 |
+
IS_COLAB = utils.is_google_colab() or os.getenv("IS_COLAB") == "1"
|
54 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
55 |
+
CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES") == "1"
|
56 |
+
|
57 |
+
# PyTorch settings for better performance and determinism
|
58 |
+
torch.backends.cudnn.deterministic = True
|
59 |
+
torch.backends.cudnn.benchmark = False
|
60 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
61 |
+
|
62 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
63 |
+
logger.info(f"Using device: {device}")
|
64 |
+
|
65 |
+
# グローバル変数としてパイプラインを定義
|
66 |
+
pipe = None
|
67 |
+
vae = None
|
68 |
+
|
69 |
+
# スタイルリストから名前のみを抽出
|
70 |
+
style_names = [style["name"] for style in style_list]
|
71 |
+
|
72 |
+
def cleanup_old_images(output_dir, max_age_hours=1):
|
73 |
+
"""
|
74 |
+
指定されたディレクトリ内の古い画像ファイル(PNG)を削除します
|
75 |
+
|
76 |
+
Args:
|
77 |
+
output_dir: 画像ファイルが保存されているディレクトリのパス
|
78 |
+
max_age_hours: この時間(時間単位)より古いファイルを削除する
|
79 |
+
"""
|
80 |
+
import os
|
81 |
+
import time
|
82 |
+
from datetime import datetime
|
83 |
+
|
84 |
+
logger.info(f"Cleaning up images older than {max_age_hours} hours in {output_dir}")
|
85 |
+
current_time = time.time()
|
86 |
+
max_age_seconds = max_age_hours * 60 * 60
|
87 |
+
deleted_count = 0
|
88 |
+
|
89 |
+
# ディレクトリが存在しない場合は作成
|
90 |
+
if not os.path.exists(output_dir):
|
91 |
+
os.makedirs(output_dir, exist_ok=True)
|
92 |
+
return 0
|
93 |
+
|
94 |
+
# ディレクトリ内のすべてのファイルをチェック
|
95 |
+
for filename in os.listdir(output_dir):
|
96 |
+
if filename.lower().endswith('.png'):
|
97 |
+
file_path = os.path.join(output_dir, filename)
|
98 |
+
file_age = current_time - os.path.getmtime(file_path)
|
99 |
+
|
100 |
+
# 指定された時間より古いファイルを削除
|
101 |
+
if file_age > max_age_seconds:
|
102 |
+
try:
|
103 |
+
os.remove(file_path)
|
104 |
+
deleted_count += 1
|
105 |
+
except Exception as e:
|
106 |
+
logger.error(f"Failed to delete {file_path}: {str(e)}")
|
107 |
+
|
108 |
+
if deleted_count > 0:
|
109 |
+
logger.info(f"Deleted {deleted_count} old image files from {output_dir}")
|
110 |
+
return deleted_count
|
111 |
+
|
112 |
+
|
113 |
+
# シリーズとキャラクターの紐付けを処理する
|
114 |
+
def get_character_series_mapping():
|
115 |
+
try:
|
116 |
+
mapping = config.config.get('text_to_prompt', {}).get('character_series_mapping', {})
|
117 |
+
return mapping
|
118 |
+
except Exception as e:
|
119 |
+
logger.error(f"Failed to get character-series mapping: {str(e)}")
|
120 |
+
return {}
|
121 |
+
|
122 |
+
# シリーズ名と表示名を分割
|
123 |
+
def parse_series_list():
|
124 |
+
series_dict = {}
|
125 |
+
display_series_list = []
|
126 |
+
|
127 |
+
for item in series_list:
|
128 |
+
if '|' in item:
|
129 |
+
code, display = item.split('|', 1)
|
130 |
+
series_dict[code] = display
|
131 |
+
display_series_list.append(f"{code} / {display}")
|
132 |
+
else:
|
133 |
+
series_dict[item] = item
|
134 |
+
display_series_list.append(item)
|
135 |
+
|
136 |
+
return series_dict, display_series_list
|
137 |
+
|
138 |
+
# キャラクター名と表示名を分割
|
139 |
+
def parse_character_list():
|
140 |
+
character_dict = {}
|
141 |
+
display_character_list = []
|
142 |
+
|
143 |
+
for item in character_list:
|
144 |
+
if '|' in item:
|
145 |
+
code, display = item.split('|', 1)
|
146 |
+
character_dict[code] = display
|
147 |
+
display_character_list.append(f"{code} / {display}")
|
148 |
+
else:
|
149 |
+
character_dict[item] = item
|
150 |
+
display_character_list.append(item)
|
151 |
+
|
152 |
+
return character_dict, display_character_list
|
153 |
+
|
154 |
+
# カテゴリー名と表示名を分割
|
155 |
+
def parse_category_list():
|
156 |
+
category_dict = {}
|
157 |
+
display_category_list = []
|
158 |
+
|
159 |
+
for item in category_list:
|
160 |
+
# カテゴリの表示が英語のみなので、そのまま表示
|
161 |
+
category_dict[item] = item
|
162 |
+
display_category_list.append(item)
|
163 |
+
|
164 |
+
return category_dict, display_category_list
|
165 |
+
|
166 |
+
# 逆引き辞書の作成
|
167 |
+
def create_reverse_dict(original_dict):
|
168 |
+
return {v: k for k, v in original_dict.items()}
|
169 |
+
|
170 |
+
# 表示名から内部コードを取得する関数
|
171 |
+
def get_code_from_display(display_name, reverse_dict):
|
172 |
+
# 表示名が "code / display" 形式の場合
|
173 |
+
if " / " in display_name:
|
174 |
+
code = display_name.split(" / ")[0]
|
175 |
+
return code
|
176 |
+
# 元のコードの場合はそのまま返す
|
177 |
+
return reverse_dict.get(display_name, display_name)
|
178 |
+
|
179 |
+
# 辞書とマッピングの作成
|
180 |
+
series_dict, display_series_list = parse_series_list()
|
181 |
+
character_dict, display_character_list = parse_character_list()
|
182 |
+
category_dict, display_category_list = parse_category_list()
|
183 |
+
reverse_series_dict = create_reverse_dict(series_dict)
|
184 |
+
reverse_character_dict = create_reverse_dict(character_dict)
|
185 |
+
character_series_mapping = get_character_series_mapping()
|
186 |
+
|
187 |
+
# 特定のシリーズに属するキャラクターのリストを取得
|
188 |
+
def get_characters_for_series(series_display_name):
|
189 |
+
try:
|
190 |
+
# 表示名からシリーズコードを取得
|
191 |
+
series_code = get_code_from_display(series_display_name, reverse_series_dict)
|
192 |
+
|
193 |
+
if not series_code:
|
194 |
+
logger.warning(f"Unknown series: {series_display_name}")
|
195 |
+
return display_character_list
|
196 |
+
|
197 |
+
character_codes = character_series_mapping.get(series_code, [])
|
198 |
+
if not character_codes:
|
199 |
+
logger.warning(f"No characters found for series: {series_code}")
|
200 |
+
return display_character_list
|
201 |
+
|
202 |
+
# コードから表示名へ変換
|
203 |
+
characters = [f"{code} / {character_dict.get(code, code)}" for code in character_codes]
|
204 |
+
return characters
|
205 |
+
except Exception as e:
|
206 |
+
logger.error(f"Error getting characters for series: {str(e)}")
|
207 |
+
return display_character_list
|
208 |
+
|
209 |
+
class GenerationError(Exception):
|
210 |
+
"""Custom exception for generation errors"""
|
211 |
+
pass
|
212 |
+
|
213 |
+
def validate_prompt(prompt: str) -> str:
|
214 |
+
"""Validate and clean up the input prompt."""
|
215 |
+
if not isinstance(prompt, str):
|
216 |
+
raise GenerationError("Prompt must be a string")
|
217 |
+
try:
|
218 |
+
# Ensure proper UTF-8 encoding/decoding
|
219 |
+
prompt = prompt.encode('utf-8').decode('utf-8')
|
220 |
+
# Add space between ! and ,
|
221 |
+
prompt = prompt.replace("!,", "! ,")
|
222 |
+
except UnicodeError:
|
223 |
+
raise GenerationError("Invalid characters in prompt")
|
224 |
+
|
225 |
+
# Only check if the prompt is completely empty or only whitespace
|
226 |
+
if not prompt or prompt.isspace():
|
227 |
+
raise GenerationError("Prompt cannot be empty")
|
228 |
+
return prompt.strip()
|
229 |
+
|
230 |
+
def validate_dimensions(width: int, height: int) -> None:
|
231 |
+
"""Validate image dimensions."""
|
232 |
+
if not MIN_IMAGE_SIZE <= width <= MAX_IMAGE_SIZE:
|
233 |
+
raise GenerationError(f"Width must be between {MIN_IMAGE_SIZE} and {MAX_IMAGE_SIZE}")
|
234 |
+
|
235 |
+
if not MIN_IMAGE_SIZE <= height <= MAX_IMAGE_SIZE:
|
236 |
+
raise GenerationError(f"Height must be between {MIN_IMAGE_SIZE} and {MAX_IMAGE_SIZE}")
|
237 |
+
|
238 |
+
def convert_text_to_prompt(
|
239 |
+
novel_text: str,
|
240 |
+
series_display_name: str = series_dict.get(DEFAULT_SERIES, DEFAULT_SERIES),
|
241 |
+
character_display_name: str = character_dict.get(DEFAULT_CHARACTER, DEFAULT_CHARACTER),
|
242 |
+
category: str = DEFAULT_CATEGORY,
|
243 |
+
) -> Tuple[str, str]:
|
244 |
+
"""テキストからプロンプトを生成する関数"""
|
245 |
+
if not TEXT_TO_PROMPT_ENABLED:
|
246 |
+
return "Text to Prompt機能は無効になっています", novel_text
|
247 |
+
|
248 |
+
# 表示名からコードに変換
|
249 |
+
series_name = get_code_from_display(series_display_name, reverse_series_dict)
|
250 |
+
character_name = get_code_from_display(character_display_name, reverse_character_dict)
|
251 |
+
|
252 |
+
try:
|
253 |
+
thinking, prompt = prompt_generator.generate_prompt(
|
254 |
+
novel_text, series_name, character_name, category
|
255 |
+
)
|
256 |
+
return thinking, prompt
|
257 |
+
except Exception as e:
|
258 |
+
logger.error(f"Error in convert_text_to_prompt: {str(e)}")
|
259 |
+
return f"エラーが発生しました: {str(e)}", novel_text
|
260 |
+
|
261 |
+
def load_image_model():
|
262 |
+
"""画像生成モデルをロードする関数"""
|
263 |
+
global pipe, vae
|
264 |
+
|
265 |
+
# メモリ管理
|
266 |
+
torch.cuda.empty_cache()
|
267 |
+
gc.collect()
|
268 |
+
|
269 |
+
# LLMがロードされていれば解放
|
270 |
+
if TEXT_TO_PROMPT_ENABLED:
|
271 |
+
prompt_generator.unload_model()
|
272 |
+
|
273 |
+
logger.info("Loading image generation model...")
|
274 |
+
styles = {style["name"]: (style["prompt"], style.get("negative_prompt", "")) for style in style_list}
|
275 |
+
|
276 |
+
# VAEを明示的にロード
|
277 |
+
vae = AutoencoderKL.from_pretrained(
|
278 |
+
f"{MODEL}/vae",
|
279 |
+
torch_dtype=torch.float16
|
280 |
+
)
|
281 |
+
|
282 |
+
# パイプラインにVAEを渡す
|
283 |
+
pipe = utils.load_pipeline(MODEL, device, HF_TOKEN, vae=vae)
|
284 |
+
|
285 |
+
if USE_TORCH_COMPILE:
|
286 |
+
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
287 |
+
logger.info("Model compiled with torch.compile")
|
288 |
+
|
289 |
+
return "Image generation model loaded successfully"
|
290 |
+
|
291 |
+
@spaces.GPU
|
292 |
+
def generate(
|
293 |
+
prompt: str,
|
294 |
+
negative_prompt: str = DEFAULT_NEGATIVE_PROMPT,
|
295 |
+
seed: int = -1,
|
296 |
+
custom_width: int = 1024,
|
297 |
+
custom_height: int = 1024,
|
298 |
+
guidance_scale: float = 5.0,
|
299 |
+
num_inference_steps: int = 28,
|
300 |
+
sampler: str = "Euler a",
|
301 |
+
aspect_ratio_selector: str = DEFAULT_ASPECT_RATIO,
|
302 |
+
style_selector: str = "(None)",
|
303 |
+
use_upscaler: bool = False,
|
304 |
+
upscaler_strength: float = 0.55,
|
305 |
+
upscale_by: float = 1.5,
|
306 |
+
add_quality_tags: bool = True,
|
307 |
+
progress: gr.Progress = gr.Progress(track_tqdm=True),
|
308 |
+
) -> Tuple[List[str], Dict]:
|
309 |
+
"""Generate images based on the given parameters."""
|
310 |
+
global pipe
|
311 |
+
|
312 |
+
if pipe is None:
|
313 |
+
load_image_model()
|
314 |
+
|
315 |
+
start_time = time.time()
|
316 |
+
upscaler_pipe = None
|
317 |
+
backup_scheduler = None
|
318 |
+
styles = {style["name"]: (style["prompt"], style.get("negative_prompt", "")) for style in style_list}
|
319 |
+
|
320 |
+
try:
|
321 |
+
# Memory management
|
322 |
+
cleanup_old_images(OUTPUT_DIR)
|
323 |
+
torch.cuda.empty_cache()
|
324 |
+
gc.collect()
|
325 |
+
|
326 |
+
# Input validation
|
327 |
+
prompt = validate_prompt(prompt)
|
328 |
+
if negative_prompt:
|
329 |
+
negative_prompt = negative_prompt.encode('utf-8').decode('utf-8')
|
330 |
+
|
331 |
+
validate_dimensions(custom_width, custom_height)
|
332 |
+
|
333 |
+
# Set up generation
|
334 |
+
if seed == 0: # 0が入力された場合、ランダムなシード値を生成
|
335 |
+
seed = random.randint(0, utils.MAX_SEED)
|
336 |
+
generator = utils.seed_everything(seed)
|
337 |
+
|
338 |
+
|
339 |
+
width, height = utils.aspect_ratio_handler(
|
340 |
+
aspect_ratio_selector,
|
341 |
+
custom_width,
|
342 |
+
custom_height,
|
343 |
+
)
|
344 |
+
|
345 |
+
# Process prompts
|
346 |
+
if add_quality_tags:
|
347 |
+
prompt = "{prompt}, masterpiece, high score, great score, absurdres".format(prompt=prompt)
|
348 |
+
|
349 |
+
prompt, negative_prompt = utils.preprocess_prompt(
|
350 |
+
styles, style_selector, prompt, negative_prompt
|
351 |
+
)
|
352 |
+
|
353 |
+
width, height = utils.preprocess_image_dimensions(width, height)
|
354 |
+
|
355 |
+
# Set up pipeline
|
356 |
+
backup_scheduler = pipe.scheduler
|
357 |
+
pipe.scheduler = utils.get_scheduler(pipe.scheduler.config, sampler)
|
358 |
+
|
359 |
+
if use_upscaler:
|
360 |
+
upscaler_pipe = StableDiffusionXLImg2ImgPipeline(**pipe.components)
|
361 |
+
|
362 |
+
# Prepare metadata
|
363 |
+
metadata = {
|
364 |
+
"prompt": prompt,
|
365 |
+
"negative_prompt": negative_prompt,
|
366 |
+
"resolution": f"{width} x {height}",
|
367 |
+
"guidance_scale": guidance_scale,
|
368 |
+
"num_inference_steps": num_inference_steps,
|
369 |
+
"style_preset": style_selector,
|
370 |
+
"seed": seed,
|
371 |
+
"sampler": sampler,
|
372 |
+
"Model": "Animagine XL 4.0 Opt",
|
373 |
+
"Model hash": "6327eca98b",
|
374 |
+
}
|
375 |
+
|
376 |
+
if use_upscaler:
|
377 |
+
new_width = int(width * upscale_by)
|
378 |
+
new_height = int(height * upscale_by)
|
379 |
+
metadata["use_upscaler"] = {
|
380 |
+
"upscale_method": "nearest-exact",
|
381 |
+
"upscaler_strength": upscaler_strength,
|
382 |
+
"upscale_by": upscale_by,
|
383 |
+
"new_resolution": f"{new_width} x {new_height}",
|
384 |
+
}
|
385 |
+
else:
|
386 |
+
metadata["use_upscaler"] = None
|
387 |
+
|
388 |
+
logger.info(f"Starting generation with parameters: {json.dumps(metadata, indent=4)}")
|
389 |
+
|
390 |
+
# Generate images
|
391 |
+
if use_upscaler:
|
392 |
+
latents = pipe(
|
393 |
+
prompt=prompt,
|
394 |
+
negative_prompt=negative_prompt,
|
395 |
+
width=width,
|
396 |
+
height=height,
|
397 |
+
guidance_scale=guidance_scale,
|
398 |
+
num_inference_steps=num_inference_steps,
|
399 |
+
generator=generator,
|
400 |
+
output_type="latent",
|
401 |
+
).images
|
402 |
+
upscaled_latents = utils.upscale(latents, "nearest-exact", upscale_by)
|
403 |
+
images = upscaler_pipe(
|
404 |
+
prompt=prompt,
|
405 |
+
negative_prompt=negative_prompt,
|
406 |
+
image=upscaled_latents,
|
407 |
+
guidance_scale=guidance_scale,
|
408 |
+
num_inference_steps=num_inference_steps,
|
409 |
+
strength=upscaler_strength,
|
410 |
+
generator=generator,
|
411 |
+
output_type="pil",
|
412 |
+
).images
|
413 |
+
else:
|
414 |
+
images = pipe(
|
415 |
+
prompt=prompt,
|
416 |
+
negative_prompt=negative_prompt,
|
417 |
+
width=width,
|
418 |
+
height=height,
|
419 |
+
guidance_scale=guidance_scale,
|
420 |
+
num_inference_steps=num_inference_steps,
|
421 |
+
generator=generator,
|
422 |
+
output_type="pil",
|
423 |
+
).images
|
424 |
+
|
425 |
+
# Save images
|
426 |
+
if images:
|
427 |
+
total = len(images)
|
428 |
+
image_paths = []
|
429 |
+
for idx, image in enumerate(images, 1):
|
430 |
+
progress(idx/total, desc="Saving images...")
|
431 |
+
path = utils.save_image(image, metadata, OUTPUT_DIR, IS_COLAB)
|
432 |
+
image_paths.append(path)
|
433 |
+
logger.info(f"Image {idx}/{total} saved as {path}")
|
434 |
+
|
435 |
+
generation_time = time.time() - start_time
|
436 |
+
logger.info(f"Generation completed successfully in {generation_time:.2f} seconds")
|
437 |
+
metadata["generation_time"] = f"{generation_time:.2f}s"
|
438 |
+
|
439 |
+
return image_paths, metadata
|
440 |
+
|
441 |
+
except GenerationError as e:
|
442 |
+
logger.warning(f"Generation validation error: {str(e)}")
|
443 |
+
raise gr.Error(str(e))
|
444 |
+
except Exception as e:
|
445 |
+
logger.exception("Unexpected error during generation")
|
446 |
+
raise gr.Error(f"Generation failed: {str(e)}")
|
447 |
+
finally:
|
448 |
+
# Cleanup
|
449 |
+
torch.cuda.empty_cache()
|
450 |
+
gc.collect()
|
451 |
+
|
452 |
+
if upscaler_pipe is not None:
|
453 |
+
del upscaler_pipe
|
454 |
+
|
455 |
+
if backup_scheduler is not None and pipe is not None:
|
456 |
+
pipe.scheduler = backup_scheduler
|
457 |
+
|
458 |
+
utils.free_memory()
|
459 |
+
|
460 |
+
# シリーズが変更されたときに対応するキャラクターを更新
|
461 |
+
def update_character_list(series_display_name):
|
462 |
+
characters = get_characters_for_series(series_display_name)
|
463 |
+
if characters and len(characters) > 0:
|
464 |
+
default_character = characters[0]
|
465 |
+
else:
|
466 |
+
default_character = display_character_list[0]
|
467 |
+
|
468 |
+
return gr.update(choices=characters, value=default_character)
|
469 |
+
|
470 |
+
# テキストからプロンプトを生成する関数を追加
|
471 |
+
@spaces.GPU
|
472 |
+
def process_text_to_prompt(
|
473 |
+
novel_text: str,
|
474 |
+
series_display_name: str = series_dict.get(DEFAULT_SERIES, DEFAULT_SERIES),
|
475 |
+
character_display_name: str = character_dict.get(DEFAULT_CHARACTER, DEFAULT_CHARACTER),
|
476 |
+
category: str = DEFAULT_CATEGORY,
|
477 |
+
) -> Tuple[str, str, Dict]:
|
478 |
+
"""テキストからプロンプトを生成して、UIに表示する関数"""
|
479 |
+
try:
|
480 |
+
# 必要に応じてLLMをロード
|
481 |
+
if TEXT_TO_PROMPT_ENABLED and not hasattr(prompt_generator, "_model") or prompt_generator._model is None:
|
482 |
+
prompt_generator.load_model()
|
483 |
+
|
484 |
+
thinking, prompt_text = convert_text_to_prompt(novel_text, series_display_name, character_display_name, category)
|
485 |
+
|
486 |
+
# プロンプト生成に関するメタデータ
|
487 |
+
metadata = {
|
488 |
+
"novel_text": novel_text[:100] + "..." if len(novel_text) > 100 else novel_text,
|
489 |
+
"series": series_display_name,
|
490 |
+
"character": character_display_name,
|
491 |
+
"category": category,
|
492 |
+
"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
493 |
+
}
|
494 |
+
|
495 |
+
return thinking, prompt_text, metadata
|
496 |
+
|
497 |
+
except Exception as e:
|
498 |
+
logger.exception("Error in process_text_to_prompt")
|
499 |
+
error_message = f"プロンプト生成中にエラーが発生しました: {str(e)}"
|
500 |
+
return error_message, "", {"error": str(e)}
|
501 |
+
|
502 |
+
# 生成されたプロンプトを画像生成パラメータにコピーする関数
|
503 |
+
def copy_prompt_to_generation(prompt_text):
|
504 |
+
return prompt_text, gr.update(visible=False)
|
505 |
+
|
506 |
+
# スタイルが変更された時にプロンプトを更新する関数
|
507 |
+
def update_prompt_with_style(prompt_text, current_style, new_style):
|
508 |
+
if prompt_text.strip() == "":
|
509 |
+
return prompt_text
|
510 |
+
|
511 |
+
# スタイル情報を取得
|
512 |
+
styles = {style["name"]: (style["prompt"], style.get("negative_prompt", "")) for style in style_list}
|
513 |
+
|
514 |
+
# 現在のスタイルのプロンプト部分を取得
|
515 |
+
current_style_prompt = ""
|
516 |
+
if current_style != "(None)":
|
517 |
+
current_style_template = styles.get(current_style, ("", ""))[0]
|
518 |
+
# {prompt} の部分を除外してスタイル部分だけを抽出
|
519 |
+
if "{prompt}" in current_style_template:
|
520 |
+
current_style_prompt = current_style_template.replace("{prompt}", "").strip()
|
521 |
+
if current_style_prompt.startswith(","):
|
522 |
+
current_style_prompt = current_style_prompt[1:].strip()
|
523 |
+
|
524 |
+
# 新しいスタイルのプロンプト部分を取得
|
525 |
+
new_style_prompt = ""
|
526 |
+
if new_style != "(None)":
|
527 |
+
new_style_template = styles.get(new_style, ("", ""))[0]
|
528 |
+
# {prompt} の部分を除外してスタイル部分だけを抽出
|
529 |
+
if "{prompt}" in new_style_template:
|
530 |
+
new_style_prompt = new_style_template.replace("{prompt}", "").strip()
|
531 |
+
if new_style_prompt.startswith(","):
|
532 |
+
new_style_prompt = new_style_prompt[1:].strip()
|
533 |
+
|
534 |
+
# 現在のプロンプトからスタイル部分を削除
|
535 |
+
base_prompt = prompt_text
|
536 |
+
if current_style_prompt:
|
537 |
+
style_part = f", {current_style_prompt}"
|
538 |
+
if style_part in base_prompt:
|
539 |
+
base_prompt = base_prompt.replace(style_part, "")
|
540 |
+
|
541 |
+
# 新しいスタイルを追加
|
542 |
+
if new_style_prompt:
|
543 |
+
if base_prompt.strip():
|
544 |
+
base_prompt = f"{base_prompt.strip()}, {new_style_prompt}"
|
545 |
+
else:
|
546 |
+
base_prompt = new_style_prompt
|
547 |
+
|
548 |
+
return base_prompt
|
549 |
+
|
550 |
+
# 最初にLLMを読み込む
|
551 |
+
if TEXT_TO_PROMPT_ENABLED:
|
552 |
+
logger.info("Loading LLM for prompt generation first...")
|
553 |
+
prompt_generator.load_model()
|
554 |
+
|
555 |
+
# Create CSS with improved buttons and styling
|
556 |
+
custom_css = """
|
557 |
+
.header {
|
558 |
+
text-align: center;
|
559 |
+
margin-bottom: 2rem;
|
560 |
+
background: linear-gradient(to right, #4a69bd, #6a89cc);
|
561 |
+
padding: 1.5rem;
|
562 |
+
border-radius: 10px;
|
563 |
+
color: white;
|
564 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
565 |
+
}
|
566 |
+
|
567 |
+
.title {
|
568 |
+
margin: 0;
|
569 |
+
font-size: 2.5rem;
|
570 |
+
font-weight: 700;
|
571 |
+
}
|
572 |
+
|
573 |
+
.subtitle {
|
574 |
+
font-size: 1.1rem;
|
575 |
+
margin-top: 0.5rem;
|
576 |
+
opacity: 0.9;
|
577 |
+
}
|
578 |
+
|
579 |
+
.subtitle-inline {
|
580 |
+
font-size: 1.3rem;
|
581 |
+
font-weight: 400;
|
582 |
+
opacity: 0.9;
|
583 |
+
}
|
584 |
+
|
585 |
+
.section {
|
586 |
+
background: white;
|
587 |
+
border-radius: 10px;
|
588 |
+
padding: 1.5rem;
|
589 |
+
margin-bottom: 1.5rem;
|
590 |
+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
|
591 |
+
border: 1px solid #e1e4e8;
|
592 |
+
}
|
593 |
+
|
594 |
+
.section-title {
|
595 |
+
font-size: 1.3rem;
|
596 |
+
margin-top: 0;
|
597 |
+
margin-bottom: 1.2rem;
|
598 |
+
color: #4a69bd;
|
599 |
+
border-bottom: 2px solid #e1e4e8;
|
600 |
+
padding-bottom: 0.5rem;
|
601 |
+
}
|
602 |
+
|
603 |
+
/* Improved button styling */
|
604 |
+
.primary-button {
|
605 |
+
background-color: #4a69bd !important;
|
606 |
+
color: white !important;
|
607 |
+
font-weight: 600 !important;
|
608 |
+
padding: 0.7rem 1.2rem !important;
|
609 |
+
border-radius: 8px !important;
|
610 |
+
border: none !important;
|
611 |
+
cursor: pointer !important;
|
612 |
+
transition: all 0.2s ease !important;
|
613 |
+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1) !important;
|
614 |
+
text-transform: uppercase !important;
|
615 |
+
letter-spacing: 0.5px !important;
|
616 |
+
}
|
617 |
+
|
618 |
+
.primary-button:hover {
|
619 |
+
background-color: #3a539b !important;
|
620 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important;
|
621 |
+
transform: translateY(-1px) !important;
|
622 |
+
}
|
623 |
+
|
624 |
+
/* 思考プロセスとプロンプト出力のスタイルを改善 */
|
625 |
+
.thinking-output-label {
|
626 |
+
font-weight: 600 !important;
|
627 |
+
color: #4285f4 !important;
|
628 |
+
background-color: transparent !important;
|
629 |
+
margin-bottom: 4px !important;
|
630 |
+
}
|
631 |
+
|
632 |
+
.thinking-output {
|
633 |
+
background-color: #f0f7ff !important;
|
634 |
+
border-left: 4px solid #4285f4 !important;
|
635 |
+
padding: 12px !important;
|
636 |
+
border-radius: 6px !important;
|
637 |
+
font-size: 0.95rem !important;
|
638 |
+
color: #333 !important;
|
639 |
+
}
|
640 |
+
|
641 |
+
.generated-prompt-label {
|
642 |
+
font-weight: 600 !important;
|
643 |
+
color: #34a853 !important;
|
644 |
+
background-color: transparent !important;
|
645 |
+
margin-bottom: 4px !important;
|
646 |
+
margin-top: 12px !important;
|
647 |
+
}
|
648 |
+
|
649 |
+
.generated-prompt {
|
650 |
+
background-color: #f0fff4 !important;
|
651 |
+
border-left: 4px solid #34a853 !important;
|
652 |
+
padding: 12px !important;
|
653 |
+
border-radius: 6px !important;
|
654 |
+
font-weight: 500 !important;
|
655 |
+
font-size: 0.95rem !important;
|
656 |
+
color: #333 !important;
|
657 |
+
}
|
658 |
+
|
659 |
+
.text-input-area {
|
660 |
+
border: 1px solid #d0d7de;
|
661 |
+
border-radius: 8px;
|
662 |
+
}
|
663 |
+
|
664 |
+
/* Add animation for loading states */
|
665 |
+
@keyframes pulse {
|
666 |
+
0% { opacity: 1; }
|
667 |
+
50% { opacity: 0.7; }
|
668 |
+
100% { opacity: 1; }
|
669 |
+
}
|
670 |
+
|
671 |
+
.loading {
|
672 |
+
animation: pulse 1.5s infinite;
|
673 |
+
}
|
674 |
+
|
675 |
+
/* Gallery improvements */
|
676 |
+
.gallery-item {
|
677 |
+
border-radius: 8px;
|
678 |
+
overflow: hidden;
|
679 |
+
box-shadow: 0 3px 10px rgba(0, 0, 0, 0.1);
|
680 |
+
transition: transform 0.2s ease;
|
681 |
+
}
|
682 |
+
|
683 |
+
.gallery-item:hover {
|
684 |
+
transform: scale(1.02);
|
685 |
+
}
|
686 |
+
"""
|
687 |
+
|
688 |
+
# Create the Gradio interface
|
689 |
+
with gr.Blocks(css=custom_css) as demo:
|
690 |
+
gr.HTML("<div class='header'><h1 class='title'>FanFic Illustrator <span class='subtitle-inline'>with Animagine XL 4.0 Opt</span></h1><p class='subtitle'>Illustrate your fan stories with beautiful AI-generated art<br>二次創作ファン小説にAIで魅力的な挿絵を</p></div>")
|
691 |
+
|
692 |
+
with gr.Column():
|
693 |
+
# Text Input Section
|
694 |
+
with gr.Group(elem_classes=["section"]):
|
695 |
+
gr.HTML("<h3 class='section-title'>1. Your Narrative / あなたの創作した物語</h3>")
|
696 |
+
novel_text = gr.Textbox(
|
697 |
+
label="",
|
698 |
+
placeholder="Enter your fan story or narrative here... / ここにファンストーリーや物語を入力してください...",
|
699 |
+
lines=10,
|
700 |
+
elem_classes=["text-input-area"],
|
701 |
+
)
|
702 |
+
|
703 |
+
with gr.Row():
|
704 |
+
with gr.Column(scale=1):
|
705 |
+
series_selector = gr.Dropdown(
|
706 |
+
choices=display_series_list,
|
707 |
+
value=display_series_list[0] if display_series_list else "",
|
708 |
+
label="Series / シリーズ",
|
709 |
+
)
|
710 |
+
with gr.Column(scale=1):
|
711 |
+
character_selector = gr.Dropdown(
|
712 |
+
choices=get_characters_for_series(display_series_list[0] if display_series_list else ""),
|
713 |
+
value=display_character_list[0] if display_character_list else "",
|
714 |
+
label="Character / キャラクター",
|
715 |
+
)
|
716 |
+
|
717 |
+
with gr.Row():
|
718 |
+
category_selector = gr.Dropdown(
|
719 |
+
choices=display_category_list,
|
720 |
+
value=display_category_list[0] if display_category_list else "",
|
721 |
+
label="Illustration Type / イラストタイプ",
|
722 |
+
)
|
723 |
+
|
724 |
+
convert_btn = gr.Button("Generate Prompt / プロンプト生成", elem_classes=["primary-button"])
|
725 |
+
|
726 |
+
# Thinking Process & Generated Prompt Section
|
727 |
+
with gr.Group(elem_classes=["section"]):
|
728 |
+
gr.HTML("<h3 class='section-title'>2. AI Interpretation / AIの解釈結果</h3>")
|
729 |
+
|
730 |
+
gr.HTML("<div class='thinking-output-label'>AI Thought Process / AIの思考過程</div>")
|
731 |
+
thinking_output = gr.Textbox(
|
732 |
+
label="",
|
733 |
+
lines=6,
|
734 |
+
elem_classes=["thinking-output"],
|
735 |
+
visible=True
|
736 |
+
)
|
737 |
+
|
738 |
+
gr.HTML("<div class='generated-prompt-label'>Generated Prompt / 生成されたプロンプト</div>")
|
739 |
+
prompt_output = gr.Textbox(
|
740 |
+
label="",
|
741 |
+
lines=3,
|
742 |
+
elem_classes=["generated-prompt"],
|
743 |
+
)
|
744 |
+
|
745 |
+
use_prompt_btn = gr.Button("Create Illustration with This Prompt / このプロンプトでイラスト作成", elem_classes=["primary-button"])
|
746 |
+
|
747 |
+
# Image Generation Section
|
748 |
+
with gr.Group(elem_classes=["section"]):
|
749 |
+
gr.HTML("<h3 class='section-title'>3. Illustration Generation / イラスト生成</h3>")
|
750 |
+
|
751 |
+
# 生成イラストを一番上に配置
|
752 |
+
output_gallery = gr.Gallery(label="Generated Illustrations / 生成されたイラスト", show_label=True)
|
753 |
+
|
754 |
+
# プロンプト入力欄
|
755 |
+
prompt = gr.Textbox(
|
756 |
+
label="Prompt / プロンプト",
|
757 |
+
placeholder="Enter your prompt here... / ここにプロンプトを入力してください...",
|
758 |
+
lines=3,
|
759 |
+
)
|
760 |
+
|
761 |
+
# 詳細設定のアコーディオン - デフォルトでは閉じている
|
762 |
+
with gr.Accordion("Advanced Options / 詳細設定", open=False):
|
763 |
+
|
764 |
+
# スタイルセレクター
|
765 |
+
current_style = gr.State("(None)") # 現在選択されているスタイルを保存
|
766 |
+
style_selector = gr.Dropdown(
|
767 |
+
choices=style_names,
|
768 |
+
value="(None)",
|
769 |
+
label="Style / スタイル",
|
770 |
+
info="Select a style to apply to your prompt / プロンプトに適用するスタイルを選択",
|
771 |
+
)
|
772 |
+
|
773 |
+
# ネガティブプロンプト
|
774 |
+
negative_prompt = gr.Textbox(
|
775 |
+
label="Negative Prompt / ネガティブプロンプト",
|
776 |
+
placeholder="What you don't want to see in the image / 画像に含めたくない要素",
|
777 |
+
value=DEFAULT_NEGATIVE_PROMPT,
|
778 |
+
lines=3,
|
779 |
+
)
|
780 |
+
|
781 |
+
# 「イラスト生成」を「イラスト再生成」に変更
|
782 |
+
generate_btn = gr.Button("Regenerate Illustration / イラスト再生成", elem_classes=["primary-button"])
|
783 |
+
|
784 |
+
# Setup event listeners
|
785 |
+
# シリーズが変更されたときにキャラクターリストを更新するイベント
|
786 |
+
series_selector.change(
|
787 |
+
fn=update_character_list,
|
788 |
+
inputs=[series_selector],
|
789 |
+
outputs=[character_selector],
|
790 |
+
)
|
791 |
+
|
792 |
+
# スタイルが変更されたときにプロンプトを更新するイベント
|
793 |
+
style_selector.change(
|
794 |
+
fn=update_prompt_with_style,
|
795 |
+
inputs=[prompt, current_style, style_selector],
|
796 |
+
outputs=[prompt],
|
797 |
+
).then(
|
798 |
+
fn=lambda x: x,
|
799 |
+
inputs=[style_selector],
|
800 |
+
outputs=[current_style],
|
801 |
+
)
|
802 |
+
|
803 |
+
# プロンプト生成ボタンのイベント
|
804 |
+
convert_btn.click(
|
805 |
+
fn=process_text_to_prompt,
|
806 |
+
inputs=[
|
807 |
+
novel_text,
|
808 |
+
series_selector,
|
809 |
+
character_selector,
|
810 |
+
category_selector,
|
811 |
+
],
|
812 |
+
outputs=[thinking_output, prompt_output, gr.JSON(visible=False)],
|
813 |
+
)
|
814 |
+
|
815 |
+
# プロンプトを画像生成に使用するボタンのイベント
|
816 |
+
use_prompt_btn.click(
|
817 |
+
fn=copy_prompt_to_generation,
|
818 |
+
inputs=[prompt_output],
|
819 |
+
outputs=[prompt, gr.Textbox(visible=False)],
|
820 |
+
).then(
|
821 |
+
fn=load_image_model,
|
822 |
+
inputs=[],
|
823 |
+
outputs=[gr.Textbox(visible=False)],
|
824 |
+
).then(
|
825 |
+
fn=lambda p, np, style: generate(
|
826 |
+
prompt=p,
|
827 |
+
negative_prompt=np,
|
828 |
+
seed=0, # デフォルトのseed値
|
829 |
+
custom_width=832, # デフォルトの幅
|
830 |
+
custom_height=1216, # デフォルトの高さ
|
831 |
+
guidance_scale=5.0, # デフォルトのguidance_scale
|
832 |
+
num_inference_steps=28, # デフォルトのnum_inference_steps
|
833 |
+
sampler="Euler a", # デフォルトのsampler
|
834 |
+
aspect_ratio_selector=DEFAULT_ASPECT_RATIO, # デフォルトのアスペクト比
|
835 |
+
style_selector=style, # スタイルセレクターの値を渡す
|
836 |
+
use_upscaler=False, # デフォルトのupscaler設定
|
837 |
+
upscaler_strength=0.55, # デフォルトのupscaler強度
|
838 |
+
upscale_by=1.5, # デフォルトのupscale値
|
839 |
+
add_quality_tags=True, # デフォルトの品質タグ設定
|
840 |
+
),
|
841 |
+
inputs=[prompt, negative_prompt, style_selector],
|
842 |
+
outputs=[output_gallery, gr.JSON(visible=False)],
|
843 |
+
)
|
844 |
+
|
845 |
+
# 画像生成ボタンのイベント
|
846 |
+
generate_btn.click(
|
847 |
+
fn=lambda p, np, style: generate(
|
848 |
+
prompt=p,
|
849 |
+
negative_prompt=np,
|
850 |
+
seed=0,
|
851 |
+
custom_width=832,
|
852 |
+
custom_height=1216,
|
853 |
+
guidance_scale=5.0,
|
854 |
+
num_inference_steps=28,
|
855 |
+
sampler="Euler a",
|
856 |
+
aspect_ratio_selector=DEFAULT_ASPECT_RATIO,
|
857 |
+
style_selector=style,
|
858 |
+
use_upscaler=False,
|
859 |
+
upscaler_strength=0.55,
|
860 |
+
upscale_by=1.5,
|
861 |
+
add_quality_tags=True,
|
862 |
+
),
|
863 |
+
inputs=[prompt, negative_prompt, style_selector],
|
864 |
+
outputs=[output_gallery, gr.JSON(visible=False)],
|
865 |
+
)
|
866 |
+
|
867 |
+
|
868 |
+
# Launch the app
|
869 |
+
if __name__ == "__main__":
|
870 |
+
demo.launch(server_name="0.0.0.0", share=IS_COLAB)
|
config.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import tomli
|
3 |
+
from typing import Dict, Any, List
|
4 |
+
|
5 |
+
def fix_escaping(text: str) -> str:
|
6 |
+
# When JSON is loaded, \\\\ becomes \\ automatically
|
7 |
+
# So we don't need to do any transformation
|
8 |
+
return text
|
9 |
+
|
10 |
+
def load_config() -> Dict[str, Any]:
|
11 |
+
config_path = os.path.join(os.path.dirname(__file__), 'config.toml')
|
12 |
+
with open(config_path, 'rb') as f:
|
13 |
+
config = tomli.load(f)
|
14 |
+
return config
|
15 |
+
|
16 |
+
# Load configuration
|
17 |
+
config = load_config()
|
18 |
+
|
19 |
+
# Export variables for backward compatibility
|
20 |
+
MODEL = os.getenv("MODEL", config['model'].get('path', 'cagliostrolab/animagine-xl-4.0'))
|
21 |
+
|
22 |
+
MIN_IMAGE_SIZE = int(os.getenv("MIN_IMAGE_SIZE", config['model']['min_image_size']))
|
23 |
+
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", config['model']['max_image_size']))
|
24 |
+
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", str(config['model']['use_torch_compile'])).lower() == "true"
|
25 |
+
ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", str(config['model']['enable_cpu_offload'])).lower() == "true"
|
26 |
+
OUTPUT_DIR = os.getenv("OUTPUT_DIR", config['model']['output_dir'])
|
27 |
+
|
28 |
+
DEFAULT_NEGATIVE_PROMPT = config['prompts']['default_negative']
|
29 |
+
DEFAULT_ASPECT_RATIO = config['prompts']['default_aspect_ratio']
|
30 |
+
|
31 |
+
sampler_list = config['samplers']['list']
|
32 |
+
aspect_ratios = config['aspect_ratios']['list']
|
33 |
+
style_list = config['styles']
|
34 |
+
|
35 |
+
# Text to prompt settings
|
36 |
+
TEXT_TO_PROMPT_ENABLED = config.get('text_to_prompt', {}).get('enabled', False)
|
37 |
+
DEFAULT_CATEGORY = config.get('text_to_prompt', {}).get('default_category', 'sfw')
|
38 |
+
DEFAULT_SERIES = config.get('text_to_prompt', {}).get('default_series', 'original')
|
39 |
+
DEFAULT_CHARACTER = config.get('text_to_prompt', {}).get('default_character', 'original character')
|
40 |
+
|
41 |
+
# シリーズとキャラクターのリストを取得
|
42 |
+
series_list = config.get('text_to_prompt', {}).get('series', {}).get('list', [])
|
43 |
+
character_list = config.get('text_to_prompt', {}).get('characters', {}).get('list', [])
|
44 |
+
category_list = config.get('text_to_prompt', {}).get('categories', {}).get('list', [])
|
45 |
+
|
config.toml
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[model]
|
2 |
+
path = "cagliostrolab/animagine-xl-4.0"
|
3 |
+
min_image_size = 512
|
4 |
+
max_image_size = 2048
|
5 |
+
use_torch_compile = false
|
6 |
+
enable_cpu_offload = false
|
7 |
+
output_dir = "./outputs"
|
8 |
+
|
9 |
+
[prompts]
|
10 |
+
default_negative = "lowres, bad anatomy, bad hands, text, error, missing finger, extra digits, fewer digits, cropped, worst quality, low quality, low score, bad score, average score, signature, watermark, username, blurry"
|
11 |
+
default_aspect_ratio = "832 x 1216"
|
12 |
+
|
13 |
+
[text_to_prompt]
|
14 |
+
enabled = true
|
15 |
+
default_category = "general"
|
16 |
+
default_series = "original"
|
17 |
+
default_character = "original character"
|
18 |
+
|
19 |
+
[text_to_prompt.series]
|
20 |
+
list = [
|
21 |
+
"original|特定の原作なし",
|
22 |
+
"touhou|東方Project",
|
23 |
+
"vocaloid|ボーカロイド",
|
24 |
+
"kantai collection|艦隊これくしょん",
|
25 |
+
"touken ranbu|刀剣乱舞"
|
26 |
+
]
|
27 |
+
|
28 |
+
[text_to_prompt.characters]
|
29 |
+
list = [
|
30 |
+
"original character|オリジナルキャラクター",
|
31 |
+
"kaga (kancolle)|加賀(艦これ)",
|
32 |
+
"shimakaze (kancolle)|島風(艦これ)",
|
33 |
+
"hibiki (kancolle)|響(艦これ)",
|
34 |
+
"shigure (kancolle)|時雨(艦これ)",
|
35 |
+
"kongou (kancolle)|金剛(艦これ)",
|
36 |
+
"hatsune miku|初音ミク",
|
37 |
+
"kagamine rin|鏡音リン",
|
38 |
+
"kagamine len|鏡音レン",
|
39 |
+
"megurine luka|巡音ルカ",
|
40 |
+
"yuzuki yukari|結月ゆかり",
|
41 |
+
"hakurei reimu|博麗霊夢",
|
42 |
+
"kirisame marisa|霧雨魔理沙",
|
43 |
+
"izayoi sakuya|十六夜咲夜",
|
44 |
+
"remilia scarlet|レミリア・スカーレット",
|
45 |
+
"flandre scarlet|フランドール・スカーレット",
|
46 |
+
"mikazuki munechika|三日月宗近",
|
47 |
+
"kashuu kiyomitsu|加州清光",
|
48 |
+
"yamato-no-kami yasusada|大和守安定",
|
49 |
+
"tsurumaru kuninaga|鶴丸国永",
|
50 |
+
"namazuo toushirou|鯰尾藤四郎"
|
51 |
+
]
|
52 |
+
|
53 |
+
# キャラクターとシリーズの紐付け
|
54 |
+
[text_to_prompt.character_series_mapping]
|
55 |
+
"original" = [
|
56 |
+
"original character"
|
57 |
+
]
|
58 |
+
"touhou" = [
|
59 |
+
"hakurei reimu",
|
60 |
+
"kirisame marisa",
|
61 |
+
"izayoi sakuya",
|
62 |
+
"remilia scarlet",
|
63 |
+
"flandre scarlet"
|
64 |
+
]
|
65 |
+
"vocaloid" = [
|
66 |
+
"hatsune miku",
|
67 |
+
"kagamine rin",
|
68 |
+
"kagamine len",
|
69 |
+
"megurine luka",
|
70 |
+
"yuzuki yukari"
|
71 |
+
]
|
72 |
+
"kantai collection" = [
|
73 |
+
"kaga (kancolle)",
|
74 |
+
"shimakaze (kancolle)",
|
75 |
+
"hibiki (kancolle)",
|
76 |
+
"shigure (kancolle)",
|
77 |
+
"kongou (kancolle)"
|
78 |
+
]
|
79 |
+
"touken ranbu" = [
|
80 |
+
"mikazuki munechika",
|
81 |
+
"kashuu kiyomitsu",
|
82 |
+
"yamato-no-kami yasusada",
|
83 |
+
"tsurumaru kuninaga",
|
84 |
+
"namazuo toushirou"
|
85 |
+
]
|
86 |
+
|
87 |
+
[text_to_prompt.categories]
|
88 |
+
list = [
|
89 |
+
"general",
|
90 |
+
"sensitive"
|
91 |
+
]
|
92 |
+
|
93 |
+
[samplers]
|
94 |
+
list = [
|
95 |
+
"DPM++ 2M Karras",
|
96 |
+
"DPM++ SDE Karras",
|
97 |
+
"DPM++ 2M SDE Karras",
|
98 |
+
"Euler",
|
99 |
+
"Euler a",
|
100 |
+
"DDIM"
|
101 |
+
]
|
102 |
+
|
103 |
+
[aspect_ratios]
|
104 |
+
list = [
|
105 |
+
"1024 x 1024",
|
106 |
+
"1152 x 896",
|
107 |
+
"896 x 1152",
|
108 |
+
"1216 x 832",
|
109 |
+
"832 x 1216",
|
110 |
+
"1344 x 768",
|
111 |
+
"768 x 1344",
|
112 |
+
"1536 x 640",
|
113 |
+
"640 x 1536",
|
114 |
+
"Custom"
|
115 |
+
]
|
116 |
+
|
117 |
+
[[styles]]
|
118 |
+
name = "(None)"
|
119 |
+
prompt = "{prompt}"
|
120 |
+
negative_prompt = ""
|
121 |
+
|
122 |
+
[[styles]]
|
123 |
+
name = "Anim4gine"
|
124 |
+
prompt = "{prompt}, depth of field, faux traditional media, painterly, impressionism, photo background"
|
125 |
+
negative_prompt = ""
|
126 |
+
|
127 |
+
[[styles]]
|
128 |
+
name = "Painting"
|
129 |
+
prompt = "{prompt}, painterly, painting (medium)"
|
130 |
+
negative_prompt = ""
|
131 |
+
|
132 |
+
[[styles]]
|
133 |
+
name = "Pixel art"
|
134 |
+
prompt = "{prompt}, pixel art"
|
135 |
+
negative_prompt = ""
|
136 |
+
|
137 |
+
[[styles]]
|
138 |
+
name = "1980s"
|
139 |
+
prompt = "{prompt}, 1980s (style), retro artstyle"
|
140 |
+
negative_prompt = ""
|
141 |
+
|
142 |
+
[[styles]]
|
143 |
+
name = "1990s"
|
144 |
+
prompt = "{prompt}, 1990s (style), retro artstyle"
|
145 |
+
negative_prompt = ""
|
146 |
+
|
147 |
+
[[styles]]
|
148 |
+
name = "2000s"
|
149 |
+
prompt = "{prompt}, 2000s (style), retro artstyle"
|
150 |
+
negative_prompt = ""
|
151 |
+
|
152 |
+
[[styles]]
|
153 |
+
name = "Toon"
|
154 |
+
prompt = "{prompt}, toon (style)"
|
155 |
+
negative_prompt = ""
|
156 |
+
|
157 |
+
[[styles]]
|
158 |
+
name = "Lineart"
|
159 |
+
prompt = "{prompt}, lineart, thick lineart"
|
160 |
+
negative_prompt = ""
|
161 |
+
|
162 |
+
[[styles]]
|
163 |
+
name = "Art Nouveau"
|
164 |
+
prompt = "{prompt}, art nouveau"
|
165 |
+
negative_prompt = ""
|
166 |
+
|
167 |
+
[[styles]]
|
168 |
+
name = "Western Comics"
|
169 |
+
prompt = "{prompt}, western comics (style)"
|
170 |
+
negative_prompt = ""
|
171 |
+
|
172 |
+
[[styles]]
|
173 |
+
name = "3D"
|
174 |
+
prompt = "{prompt}, 3d"
|
175 |
+
negative_prompt = ""
|
176 |
+
|
177 |
+
[[styles]]
|
178 |
+
name = "Realistic"
|
179 |
+
prompt = "{prompt}, realistic, photorealistic"
|
180 |
+
negative_prompt = ""
|
181 |
+
|
182 |
+
[[styles]]
|
183 |
+
name = "Neonpunk"
|
184 |
+
prompt = "{prompt}, neonpunk"
|
185 |
+
negative_prompt = ""
|
186 |
+
|
prompt_generator.py
ADDED
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import torch
|
4 |
+
from typing import Tuple, Optional, Dict, Any
|
5 |
+
import gc
|
6 |
+
|
7 |
+
# ロギング設定
|
8 |
+
logging.basicConfig(
|
9 |
+
level=logging.INFO,
|
10 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
11 |
+
datefmt='%Y-%m-%d %H:%M:%S'
|
12 |
+
)
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
|
15 |
+
def setup_prompt_logger():
|
16 |
+
"""プロンプト生成専用のファイルログハンドラをセットアップします"""
|
17 |
+
import os
|
18 |
+
import logging
|
19 |
+
from logging.handlers import RotatingFileHandler
|
20 |
+
|
21 |
+
# プロンプト記録用のロガー
|
22 |
+
prompt_logger = logging.getLogger('prompt_generator.io')
|
23 |
+
prompt_logger.setLevel(logging.INFO)
|
24 |
+
|
25 |
+
# ログディレクトリの作成
|
26 |
+
log_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'logs')
|
27 |
+
os.makedirs(log_dir, exist_ok=True)
|
28 |
+
|
29 |
+
# ログファイルのパス
|
30 |
+
log_file = os.path.join(log_dir, 'prompt_generation.log')
|
31 |
+
|
32 |
+
# ローテーションするファイルハンドラ(5MBごとに最大5ファイル)
|
33 |
+
file_handler = RotatingFileHandler(
|
34 |
+
log_file,
|
35 |
+
maxBytes=5*1024*1024, # 5MB
|
36 |
+
backupCount=5,
|
37 |
+
encoding='utf-8'
|
38 |
+
)
|
39 |
+
|
40 |
+
# フォーマッタの設定
|
41 |
+
formatter = logging.Formatter('%(asctime)s - %(message)s')
|
42 |
+
file_handler.setFormatter(formatter)
|
43 |
+
|
44 |
+
# ハンドラをロガーに追加(既存のハンドラがあれば追加しない)
|
45 |
+
if not prompt_logger.handlers:
|
46 |
+
prompt_logger.addHandler(file_handler)
|
47 |
+
|
48 |
+
return prompt_logger
|
49 |
+
|
50 |
+
# ロガーのセットアップ
|
51 |
+
prompt_io_logger = setup_prompt_logger()
|
52 |
+
|
53 |
+
|
54 |
+
def log_prompt_io(novel_text, series_name, character_name, category, thinking, prompt_text):
|
55 |
+
"""プロンプト生成の入力と出力をログに記録する関数"""
|
56 |
+
# 入力テキストが長すぎる場合は省略
|
57 |
+
if len(novel_text) > 500:
|
58 |
+
logged_text = novel_text[:500] + "...(truncated)"
|
59 |
+
else:
|
60 |
+
logged_text = novel_text
|
61 |
+
|
62 |
+
log_entry = (
|
63 |
+
f"\n{'='*80}\n"
|
64 |
+
f"INPUT:\n"
|
65 |
+
f"Series: {series_name}\n"
|
66 |
+
f"Character: {character_name}\n"
|
67 |
+
f"Category: {category}\n"
|
68 |
+
f"Text: {logged_text}\n\n"
|
69 |
+
f"OUTPUT:\n"
|
70 |
+
f"Thinking: {thinking}\n\n"
|
71 |
+
f"Prompt: {prompt_text}\n"
|
72 |
+
f"{'='*80}\n"
|
73 |
+
)
|
74 |
+
|
75 |
+
prompt_io_logger.info(log_entry)
|
76 |
+
|
77 |
+
# グローバル変数
|
78 |
+
_model = None
|
79 |
+
_tokenizer = None
|
80 |
+
|
81 |
+
|
82 |
+
def load_model():
|
83 |
+
"""モデルをロードする関数"""
|
84 |
+
global _model, _tokenizer
|
85 |
+
|
86 |
+
# すでにロードされている場合はスキップ
|
87 |
+
if _model is not None and _tokenizer is not None:
|
88 |
+
return _model, _tokenizer
|
89 |
+
|
90 |
+
try:
|
91 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
92 |
+
|
93 |
+
logger.info("Loading prompt generation model...")
|
94 |
+
|
95 |
+
# 使用するモデル名 - 環境変数から取得するか、デフォルト値を使用
|
96 |
+
model_name = os.getenv("PROMPT_MODEL_NAME", "webbigdata/FanFic-Illustrator")
|
97 |
+
|
98 |
+
# HuggingFaceからモデルを直接ロードする
|
99 |
+
model_path = model_name
|
100 |
+
|
101 |
+
# 利用可能なデバイスを確認
|
102 |
+
device_map = "auto" if torch.cuda.is_available() else "cpu"
|
103 |
+
logger.info(f"Using device: {device_map} for prompt generation model")
|
104 |
+
|
105 |
+
# モデルの読み込み
|
106 |
+
_model = AutoModelForCausalLM.from_pretrained(
|
107 |
+
model_path,
|
108 |
+
torch_dtype=torch.bfloat16 if torch.cuda.is_available() and hasattr(torch, 'bfloat16') else torch.float16,
|
109 |
+
device_map=device_map,
|
110 |
+
use_cache=True,
|
111 |
+
low_cpu_mem_usage=True,
|
112 |
+
)
|
113 |
+
|
114 |
+
# トークナイザーの読み込み
|
115 |
+
_tokenizer = AutoTokenizer.from_pretrained(model_path)
|
116 |
+
|
117 |
+
# パッドトークンが設定されていない場合は設定
|
118 |
+
if _tokenizer.pad_token is None:
|
119 |
+
_tokenizer.pad_token = _tokenizer.eos_token
|
120 |
+
|
121 |
+
logger.info("Prompt generation model loaded successfully!")
|
122 |
+
return _model, _tokenizer
|
123 |
+
|
124 |
+
except Exception as e:
|
125 |
+
logger.error(f"Failed to load prompt generation model: {str(e)}")
|
126 |
+
raise
|
127 |
+
|
128 |
+
|
129 |
+
|
130 |
+
def unload_model():
|
131 |
+
"""メモリからモデルをアンロードする関数"""
|
132 |
+
global _model, _tokenizer
|
133 |
+
|
134 |
+
if _model is not None:
|
135 |
+
del _model
|
136 |
+
_model = None
|
137 |
+
|
138 |
+
if _tokenizer is not None:
|
139 |
+
del _tokenizer
|
140 |
+
_tokenizer = None
|
141 |
+
|
142 |
+
# メモリの解放
|
143 |
+
if torch.cuda.is_available():
|
144 |
+
torch.cuda.empty_cache()
|
145 |
+
gc.collect()
|
146 |
+
|
147 |
+
logger.info("Prompt generation model unloaded")
|
148 |
+
|
149 |
+
def generate_prompt(
|
150 |
+
novel_text: str,
|
151 |
+
series_name: str = "original",
|
152 |
+
character_name: str = "original character",
|
153 |
+
category: str = "general"
|
154 |
+
) -> Tuple[str, str]:
|
155 |
+
try:
|
156 |
+
# モデルとトークナイザーの読み込み
|
157 |
+
model, tokenizer = load_model()
|
158 |
+
|
159 |
+
# 入力の検証
|
160 |
+
if not novel_text or novel_text.isspace():
|
161 |
+
return "入力テキストが空です", "1girl, original character, anime style, highres"
|
162 |
+
|
163 |
+
# 長すぎる入力のトリミング(トークン数の制限)
|
164 |
+
max_input_length = 5072 #1024
|
165 |
+
#if len(novel_text) > max_input_length * 4: # 大まかな文字数の制限
|
166 |
+
# novel_text = novel_text[:max_input_length * 4]
|
167 |
+
# logger.warning(f"Input text was too long and has been truncated")
|
168 |
+
|
169 |
+
# システムプロンプト
|
170 |
+
system = "あなたは文章の一説を指定ジャンル・キャラクターが登場するシーンに書き換え、そのシーンに合った挿絵を作成するために画像生成AI用プロンプトを作成する優秀なプロンプトエンジニアです"
|
171 |
+
|
172 |
+
# ユーザープロンプト
|
173 |
+
prompt = f"""### 小説のコンテキストを補足する情報
|
174 |
+
content category: {category}
|
175 |
+
series name: {series_name}
|
176 |
+
series description: {series_name} series
|
177 |
+
character name: {character_name}
|
178 |
+
character description: {character_name} from {series_name}
|
179 |
+
|
180 |
+
### 小説データ
|
181 |
+
{novel_text}
|
182 |
+
|
183 |
+
まず<think>内で以下のように思考を整理します。
|
184 |
+
|
185 |
+
<think>
|
186 |
+
concept: イラストのコンセプトを考えます。小説の内容から主題、設定、雰囲気を理解し、どのようなイラストが最も適切か、全体の構成を考えます
|
187 |
+
- 人数: 挿絵の中に登場させる人数を考えます。作品に登場する人物の数や重要性を考慮し、メインで描くべき人物やサブキャラクターについても検討してください
|
188 |
+
- キャラクター名/シリーズ名: 既存作品のキャラクター/シリーズか、オリジナル作品かを考えます。既存作品の場合は、原作の設定や特徴を尊重した表現方法も考慮してください
|
189 |
+
- ポーズ/構図: ポーズ/構図指定に使うタグを考えます。物語の場面において、キャラクターがどのような体勢/状況にあるのか、どのアングルから描くと効果
|
190 |
+
的かを検討してください
|
191 |
+
- 背景/環境: 背景/環境指定に使うタグを考えます。物語の舞台設定や時間帯、天候など、雰囲気を表現するために必要な背景要素を詳しく考えてください
|
192 |
+
- 描画スタイル/テクニック: 描画スタイル/テクニックに使うタグを考えます。物語のジャンルや雰囲気に合わせて、どのような画風や技法が適しているかを検討してください
|
193 |
+
- 身体的特徴/画面上の物体: 身体的特徴/画面上の物体に関連するタグを考えます。キャラクターの外見的特徴や、シーンに必要な小道具、アイテムなどを詳細に考えてください
|
194 |
+
</think>
|
195 |
+
|
196 |
+
改行の場所も含めて、この順序と書式を厳密に守ってください。
|
197 |
+
各項目は上記の順序と書式で記述してください。具体的かつ詳細に説明し、十分な長さで考察してください(<think>タグ全体で600-800文字程度が望ましいです)
|
198 |
+
|
199 |
+
その後、思考結果に基づき<prompt>内に英単語を18単語ほどカンマで区切って出力してください。日本語は使用しないでください。
|
200 |
+
最も重要で適切なタグを選び、有効なプロンプトとなるよう考慮してください
|
201 |
+
|
202 |
+
### 使用可能な英単語
|
203 |
+
出力時には以下のタグを優先して使用し、足りない場合は一般的な英単語で補足します
|
204 |
+
masterpiece, best quality, highresなどの品質に関連するタグは後工程で付与するのでつけてはいけません
|
205 |
+
|
206 |
+
**人数/性別**:
|
207 |
+
- 風景や動物を中心に描画する時: no_human
|
208 |
+
- 女性の人数: 1girl, 2girls, 3girls, multiple girls
|
209 |
+
- 男性の人数: 1boy, 2boys, 3boys, multiple boys
|
210 |
+
- 1girlや1boy指定時にキャラクター中心の構図にするために追加で指定: solo
|
211 |
+
|
212 |
+
**ポーズ/構図**:
|
213 |
+
- 視点: from above, from behind, from below, looking at viewer, straight-on, looking at another, looking back, out of frame, on back, from side, looking to the side, feet out of frame, sideways, three quarter view, looking up, looking down, looking ahead, dutch angle, high up, from outside, pov, vanishing point
|
214 |
+
- 姿勢/行動: battle, chasing, fighting, leaning, running, sitting, squatting, standing, walking, arm up, arms up, against wall, against tree, holding, spread legs, lying, straddling, flying, holding weapon, clothes lift, hand on own cheek, scar on cheek, hand on another's cheek, kissing cheek, cheek-to-cheek, bandaid on cheek, finger to cheek, hands on another's cheeks, hand on own hip, hand over face, v, kneeling, arabesque (pose), body roll, indian style, standing on one leg, hugging own legs, seiza, nuzzle, unsheathing, holding weapon, holding sword, holding gun
|
215 |
+
|
216 |
+
**背景/環境**:
|
217 |
+
- 構図/芸術ジャンル: landscape, portrait, still life, group shot, cowboy shot, upper body, full body, detailed face, depth of field, intricate details, cinematic lighting, detailed background, detailed, extremely detailed, perfect composition, detailed face, solo focus, detailed face and body, character focus, intricate, sharp focus, male focus
|
218 |
+
- 色彩/装飾: greyscale, sepia, blue theme, flat color, high contrast, limited palette, border, cinematic, scenery, rendered, contrast, rich contrast, volumetric lighting, high contrast, glowing
|
219 |
+
- 背景/風景: checkered background, simple background, indoors, outdoors, jungle, mountain, beach, forest, city, school, cafe, white background, sky
|
220 |
+
- 時間帯: day, night, twilight, morning, sunset, dawn, dusk
|
221 |
+
- 天気: sunny, rain, snow, cloud, storm, wind, fogg
|
222 |
+
|
223 |
+
**描画スタイル/テクニック**:
|
224 |
+
- 技法: 3D, oekaki, pixel art, sketch, watercolor, oil painting, digital art, illustration, photorealistic, anime, monochrome, retro color, source anime, cg, realistic
|
225 |
+
- 表現手法: animalization, personification, science fiction, cyberpunk, steampunk, fantasy, dark novel style, anime style, realistic style, graphic novel style, comic, concept art
|
226 |
+
- 媒体/伝統的技法: traditional media, marker (medium), watercolor (medium), graphite (medium), official art, sketch, artbook, cover
|
227 |
+
- 絵柄の年代(指定された時のみ利用): newest, year 1980, year 2000, year 2010, year 1990, year 2020
|
228 |
+
|
229 |
+
**身体的特徴/画面上の物体**:
|
230 |
+
- キャラクター属性/職業/クラス: student, teacher, soldier, knight, wizard, ninja, doctor, artist, musician, athlete, virtual youtuber, chibi, maid
|
231 |
+
- 表情: angry, blush stickers, drunk, grin, aroused, happy, sad, smile, laugh, crying, surprised, worried, nervous, serious, drunk, blush, aroused, :d, tongue out, sweatdrop, tongue out, :o, tears, tearing up
|
232 |
+
|
233 |
+
- 身体的特徴: {{'髪型/髪色': ['long hair', 'short hair', 'twintails', 'ponytail', 'braid', 'bun', 'curly hair', 'straight hair', 'messy hair', 'blonde hair', 'black hair', 'brown hair', 'red hair', 'blue hair', 'green hair', 'white hair', 'purple hair', 'grey hair', 'ahoge', 'sidelocks', 'side ponytail', 'perfect hair', 'tail', 'multicolored hair', 'wavy hair', 'bangs', 'blunt bangs', 'twintails', 'hair between eyes', 'very long hair', 'braid', 'curly hair', 'braided ponytail', 'hand in own hair', 'hair over one eye', 'hair flower', 'two-tone hair', 'streaked hair', 'two side up'], '目の色': ['blue eyes', 'brown eyes', 'green eyes', 'red eyes', 'black eyes', 'purple eyes', 'yellow eyes', 'heterochromia', 'detailed eyes', 'glowing eyes', 'beatiful eyes', 'closed eyes', 'one eye closed'], '身体部位': ['bare shoulders', 'bare arms', 'bare legs', 'barefoot', 'abs', 'flat chest', 'small breasts', 'medium breasts', 'asymmetrical breasts', 'pointy breasts', 'sagging breasts', 'clenched teeth', 'pointy ears', 'perfect anatomy', 'closed mouth', 'long sleeves', 'open mouth', 'pale skin', 'collarbone', 'midriff', 'perfect anatomy', 'bare arms', 'thighs', 'parted lips', 'tongue', 'tanlines', 'dot nose', 'goggles on head', 'armpits', 'nail polish', 'mole', 'feet', 'lips', 'dark-skinned female', 'zettai ryouiki', 'shiny skin'], '身体部位(獣人、擬人化時のみ使用)': ['animal ears', 'cat ears', 'horse ears', 'horse girl', 'fang', 'teeth', 'horns', 'tail'], '服装/装飾品': ['uniform', 'suit', 'dress', 'casual wear', 'formal wear', 'belt', 'detached sleeves', 'swimsuit', 'kimono', 'armor', 'hat', 'glasses', 'white shirt', 'shirt', 'jewelry', 'necklace', 'earrings', 'bracelet', 'watch', 'ribbon', 'hair ribbon', 'scarf', 'gloves', 'boots', 'high heels', 'hair ornament', 'jacket', 'glasses', 'skirt', 'long sleeves', 'short sleeves', 'thighhighs', 'underwear', 'school uniform', 'swimsuit', 'panties', 'hair bow', 'bikini', 'miniskirt', 'fingerless gloves', 'bowtie', 'serafuku', 'japanese clothes', 'choker', 'pants', 'wings', 'open clothes', 'pantyhose', 'pleated skirt', 'frills', 'necktie', 'shorts', 'collared shirt', 'leather armor', 'hairband', 'shoes', 'sleeveless', 'alternate costume', 'socks', 'fingering', 'denim shorts', 'epaulettes', 'santa costume', 'ribbon-trimmed sleeves', 'black bowtie', 'gym uniform', 'white bra', 'angel wings', 'crossdressing', 'cuffs', 'halo', 'high heels', 'apron', 'red bow', 'vest', 'open jacket', 'white panties', 'leotard', 'coat', 'black jacket', 'high heels', 'black pantyhose', 'see-through', 'miniskirt', 'elbow gloves', 'wide sleeves', 'white thighhighs', 'fur trim', 'plaid', 'one-piece swimsuit', 'maid headdress', 'ascot', 'high-waist skirt']}}
|
234 |
+
- 体液: blood, saliva, sweat, tears
|
235 |
+
- 前景/持ち物/操作物: sword, katana, sheath, gun, book, phone, bag, umbrella, instrument, vehicle, food, drink, guitar, piano, violin, drums, flute, car, bicycle, motorcycle, airplane, ship, flower, weapon, heart, speech bubble, carriage, locomotive
|
236 |
+
- 生物: dog, cat, horse, bird, fish, dragon, unicorn, monster, fox, wolf, bear, tiger, lion, dragon, fairy, ghost, zombie, vampire
|
237 |
+
|
238 |
+
**性的表現(sensitive, nsfw, explicitのいずれかを指定した時のみ使用可)**:
|
239 |
+
- 身体部位女性専用: cleavage, backboob, sideboob, underboob, navel, huge breasts, large breasts
|
240 |
+
- 身体部位男性専用: topless male, necktie between pectorals, loose necktie, bare pectorals, male underwear, fundoshi
|
241 |
+
- 身体部位共通: open shirt, unbuttoned shirt, seductive smile, bare back, groin, groin tendon, midriff
|
242 |
+
|
243 |
+
### 出力
|
244 |
+
"""
|
245 |
+
|
246 |
+
# メッセージ形式に整形
|
247 |
+
messages = [
|
248 |
+
{"role": "system", "content": system},
|
249 |
+
{"role": "user", "content": prompt},
|
250 |
+
]
|
251 |
+
|
252 |
+
# トークナイゼーション
|
253 |
+
inputs = tokenizer.apply_chat_template(
|
254 |
+
messages,
|
255 |
+
tokenize=True,
|
256 |
+
add_generation_prompt=True,
|
257 |
+
return_tensors="pt",
|
258 |
+
).to(model.device)
|
259 |
+
|
260 |
+
# 長すぎる入力のトリミング
|
261 |
+
if inputs.shape[1] > max_input_length:
|
262 |
+
inputs = inputs[:, :max_input_length]
|
263 |
+
logger.warning(f"Input tokens were too many and have been truncated to {max_input_length}")
|
264 |
+
|
265 |
+
# 生成
|
266 |
+
with torch.no_grad():
|
267 |
+
generated_ids = model.generate(
|
268 |
+
input_ids=inputs,
|
269 |
+
num_beams=3,
|
270 |
+
max_new_tokens=400,
|
271 |
+
do_sample=True,
|
272 |
+
temperature=0.5,
|
273 |
+
top_p=0.95,
|
274 |
+
repetition_penalty=1.0,
|
275 |
+
#dry_multiplier=0.5,
|
276 |
+
top_k = 40,
|
277 |
+
min_p = 0.00,
|
278 |
+
pad_token_id=tokenizer.pad_token_id,
|
279 |
+
)
|
280 |
+
|
281 |
+
# デコード
|
282 |
+
full_outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
283 |
+
|
284 |
+
# モデルが生成したメッセージ部分だけを抽出
|
285 |
+
model_markers = ["assistant\n", "assistant:\n", "assitant\n"]
|
286 |
+
model_response = full_outputs[0]
|
287 |
+
|
288 |
+
for marker in model_markers:
|
289 |
+
if marker in model_response:
|
290 |
+
model_response = model_response.split(marker)[-1].strip()
|
291 |
+
break
|
292 |
+
|
293 |
+
# 思考過程とプロンプトの抽出
|
294 |
+
thinking = ""
|
295 |
+
prompt_text = ""
|
296 |
+
|
297 |
+
print(model_response)
|
298 |
+
if "<think>" in model_response and "</think>" in model_response:
|
299 |
+
thinking = model_response.split("<think>")[1].split("</think>")[0].strip()
|
300 |
+
|
301 |
+
def clean_prompt_text(text):
|
302 |
+
# 削除するタグのリスト
|
303 |
+
tags_to_remove = [
|
304 |
+
"masterpiece", "high score", "great score", "absurdres",
|
305 |
+
"highres", "original character", "original series",
|
306 |
+
"general", "sensitive", "nsfw", "explicit"
|
307 |
+
]
|
308 |
+
|
309 |
+
# テキストを単語に分割して処理
|
310 |
+
words = []
|
311 |
+
current_words = text.split(',')
|
312 |
+
|
313 |
+
# 各単語をトリムして処理
|
314 |
+
for word in current_words:
|
315 |
+
word = word.strip()
|
316 |
+
# 空の単語はスキップ
|
317 |
+
if not word:
|
318 |
+
continue
|
319 |
+
# 削除対象のタグかチェック
|
320 |
+
if any(tag == word.lower() for tag in tags_to_remove):
|
321 |
+
continue
|
322 |
+
# まだ追加されていない単語のみ追加(重複排除)
|
323 |
+
if word not in words:
|
324 |
+
words.append(word)
|
325 |
+
|
326 |
+
# カンマで結合して返す
|
327 |
+
return ', '.join(words)
|
328 |
+
|
329 |
+
if "<prompt>" in model_response:
|
330 |
+
if "</prompt>" in model_response:
|
331 |
+
prompt_text = model_response.split("<prompt>")[1].split("</prompt>")[0].strip()
|
332 |
+
else:
|
333 |
+
prompt_text = model_response.split("<prompt>")[1].strip()
|
334 |
+
|
335 |
+
prompt_text = clean_prompt_text(prompt_text)
|
336 |
+
else:
|
337 |
+
prompt_text = f"1girl, {character_name}, {series_name}, anime style, highres"
|
338 |
+
|
339 |
+
prompt_text = prompt_text + f", {category}"
|
340 |
+
|
341 |
+
log_prompt_io(novel_text, series_name, character_name, category, thinking, prompt_text)
|
342 |
+
logger.info(f"Successfully generated prompt from text")
|
343 |
+
return thinking, prompt_text
|
344 |
+
|
345 |
+
except Exception as e:
|
346 |
+
logger.error(f"Error generating prompt: {str(e)}")
|
347 |
+
# エラー時のフォールバック
|
348 |
+
return f"エラーが発生しました: {str(e)}", f"1girl, {character_name}, {series_name}, anime style, highres"
|
requirements.txt
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==1.5.2
|
2 |
+
diffusers==0.32.2
|
3 |
+
gradio==4.44.1
|
4 |
+
hf-transfer==0.1.9
|
5 |
+
spaces==0.32.0
|
6 |
+
torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu124
|
7 |
+
transformers==4.49.0
|
8 |
+
tomli==2.2.1
|
9 |
+
sentencepiece==0.2.0
|
10 |
+
peft==0.14.0
|
11 |
+
bitsandbytes==0.45.2
|
12 |
+
packaging==23.1
|
13 |
+
argcomplete==1.9.4
|
14 |
+
xformers==0.0.28.post3
|
style.css
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* Enhanced CSS for FanFic Illustrator */
|
2 |
+
|
3 |
+
.header {
|
4 |
+
text-align: center;
|
5 |
+
margin-bottom: 2rem;
|
6 |
+
background: linear-gradient(to right, #4a69bd, #6a89cc);
|
7 |
+
padding: 1.5rem;
|
8 |
+
border-radius: 10px;
|
9 |
+
color: white;
|
10 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
11 |
+
}
|
12 |
+
|
13 |
+
.title {
|
14 |
+
margin: 0;
|
15 |
+
font-size: 2.5rem;
|
16 |
+
font-weight: 700;
|
17 |
+
}
|
18 |
+
|
19 |
+
.subtitle {
|
20 |
+
font-size: 1.1rem;
|
21 |
+
margin-top: 0.5rem;
|
22 |
+
opacity: 0.9;
|
23 |
+
line-height: 1.5;
|
24 |
+
}
|
25 |
+
|
26 |
+
.subtitle-inline {
|
27 |
+
font-size: 1.3rem;
|
28 |
+
font-weight: 400;
|
29 |
+
opacity: 0.9;
|
30 |
+
}
|
31 |
+
|
32 |
+
.section {
|
33 |
+
background: white;
|
34 |
+
border-radius: 10px;
|
35 |
+
padding: 1.5rem;
|
36 |
+
margin-bottom: 1.5rem;
|
37 |
+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
|
38 |
+
border: 1px solid #e1e4e8;
|
39 |
+
}
|
40 |
+
|
41 |
+
.section-title {
|
42 |
+
font-size: 1.3rem;
|
43 |
+
margin-top: 0;
|
44 |
+
margin-bottom: 1.2rem;
|
45 |
+
color: #4a69bd;
|
46 |
+
border-bottom: 2px solid #e1e4e8;
|
47 |
+
padding-bottom: 0.5rem;
|
48 |
+
}
|
49 |
+
|
50 |
+
/* Improved button styling */
|
51 |
+
.primary-button {
|
52 |
+
background-color: #4a69bd !important;
|
53 |
+
color: white !important;
|
54 |
+
font-weight: 600 !important;
|
55 |
+
padding: 0.7rem 1.2rem !important;
|
56 |
+
border-radius: 8px !important;
|
57 |
+
border: none !important;
|
58 |
+
cursor: pointer !important;
|
59 |
+
transition: all 0.2s ease !important;
|
60 |
+
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1) !important;
|
61 |
+
text-transform: uppercase !important;
|
62 |
+
letter-spacing: 0.5px !important;
|
63 |
+
}
|
64 |
+
|
65 |
+
.primary-button:hover {
|
66 |
+
background-color: #3a539b !important;
|
67 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important;
|
68 |
+
transform: translateY(-1px) !important;
|
69 |
+
}
|
70 |
+
|
71 |
+
/* Improved AI output styling with better contrast */
|
72 |
+
.thinking-output-label {
|
73 |
+
font-weight: 600 !important;
|
74 |
+
color: #4285f4 !important;
|
75 |
+
background-color: transparent !important;
|
76 |
+
margin-bottom: 4px !important;
|
77 |
+
display: block !important;
|
78 |
+
margin-top: 0.5rem !important;
|
79 |
+
}
|
80 |
+
|
81 |
+
.thinking-output {
|
82 |
+
background-color: #f0f7ff !important;
|
83 |
+
border-left: 4px solid #4285f4 !important;
|
84 |
+
padding: 12px !important;
|
85 |
+
border-radius: 6px !important;
|
86 |
+
font-size: 0.95rem !important;
|
87 |
+
color: #333 !important;
|
88 |
+
margin-bottom: 10px !important;
|
89 |
+
}
|
90 |
+
|
91 |
+
.generated-prompt-label {
|
92 |
+
font-weight: 600 !important;
|
93 |
+
color: #34a853 !important;
|
94 |
+
background-color: transparent !important;
|
95 |
+
margin-bottom: 4px !important;
|
96 |
+
margin-top: 16px !important;
|
97 |
+
display: block !important;
|
98 |
+
}
|
99 |
+
|
100 |
+
.generated-prompt {
|
101 |
+
background-color: #f0fff4 !important;
|
102 |
+
border-left: 4px solid #34a853 !important;
|
103 |
+
padding: 12px !important;
|
104 |
+
border-radius: 6px !important;
|
105 |
+
font-weight: 500 !important;
|
106 |
+
font-size: 0.95rem !important;
|
107 |
+
color: #333 !important;
|
108 |
+
}
|
109 |
+
|
110 |
+
.text-input-area {
|
111 |
+
border: 1px solid #d0d7de;
|
112 |
+
border-radius: 8px;
|
113 |
+
}
|
114 |
+
|
115 |
+
/* Add animation for loading states */
|
116 |
+
@keyframes pulse {
|
117 |
+
0% { opacity: 1; }
|
118 |
+
50% { opacity: 0.7; }
|
119 |
+
100% { opacity: 1; }
|
120 |
+
}
|
121 |
+
|
122 |
+
.loading {
|
123 |
+
animation: pulse 1.5s infinite;
|
124 |
+
}
|
125 |
+
|
126 |
+
/* Gallery improvements */
|
127 |
+
.gallery-item {
|
128 |
+
border-radius: 8px;
|
129 |
+
overflow: hidden;
|
130 |
+
box-shadow: 0 3px 10px rgba(0, 0, 0, 0.1);
|
131 |
+
transition: transform 0.2s ease;
|
132 |
+
}
|
133 |
+
|
134 |
+
.gallery-item:hover {
|
135 |
+
transform: scale(1.02);
|
136 |
+
}
|
137 |
+
|
138 |
+
/* Form element styling */
|
139 |
+
input, select, textarea {
|
140 |
+
border-radius: 6px !important;
|
141 |
+
border: 1px solid #d0d7de !important;
|
142 |
+
padding: 8px 12px !important;
|
143 |
+
transition: border-color 0.2s ease !important;
|
144 |
+
}
|
145 |
+
|
146 |
+
input:focus, select:focus, textarea:focus {
|
147 |
+
border-color: #4a69bd !important;
|
148 |
+
box-shadow: 0 0 0 3px rgba(74, 105, 189, 0.1) !important;
|
149 |
+
}
|
150 |
+
|
151 |
+
/* Dropdown styling */
|
152 |
+
.gr-dropdown {
|
153 |
+
border-radius: 6px !important;
|
154 |
+
transition: all 0.2s ease !important;
|
155 |
+
}
|
156 |
+
|
157 |
+
.gr-dropdown:hover {
|
158 |
+
border-color: #4a69bd !important;
|
159 |
+
}
|
160 |
+
|
161 |
+
/* Accordion styling */
|
162 |
+
.gr-accordion {
|
163 |
+
border: 1px solid #e1e4e8 !important;
|
164 |
+
border-radius: 8px !important;
|
165 |
+
overflow: hidden !important;
|
166 |
+
margin-top: 1rem !important;
|
167 |
+
margin-bottom: 1rem !important;
|
168 |
+
}
|
169 |
+
|
170 |
+
.gr-accordion-title {
|
171 |
+
font-weight: 600 !important;
|
172 |
+
color: #4a69bd !important;
|
173 |
+
padding: 10px 15px !important;
|
174 |
+
}
|
175 |
+
|
176 |
+
/* Label styling to improve readability */
|
177 |
+
.gr-label {
|
178 |
+
font-weight: 600 !important;
|
179 |
+
margin-bottom: 4px !important;
|
180 |
+
color: #444 !important;
|
181 |
+
}
|
182 |
+
|
183 |
+
/* Responsive adjustments */
|
184 |
+
@media (max-width: 768px) {
|
185 |
+
.title {
|
186 |
+
font-size: 2rem;
|
187 |
+
}
|
188 |
+
|
189 |
+
.subtitle-inline {
|
190 |
+
font-size: 1.1rem;
|
191 |
+
}
|
192 |
+
|
193 |
+
.section {
|
194 |
+
padding: 1rem;
|
195 |
+
}
|
196 |
+
|
197 |
+
.primary-button {
|
198 |
+
padding: 0.6rem 1rem !important;
|
199 |
+
font-size: 0.9rem !important;
|
200 |
+
}
|
201 |
+
}
|
202 |
+
|
203 |
+
/* Textbox focus state improvement */
|
204 |
+
.gr-textbox:focus-within {
|
205 |
+
border-color: #4a69bd !important;
|
206 |
+
box-shadow: 0 0 0 3px rgba(74, 105, 189, 0.1) !important;
|
207 |
+
}
|
208 |
+
|
209 |
+
/* Gallery container styling */
|
210 |
+
.gr-gallery {
|
211 |
+
background-color: #f8f9fa !important;
|
212 |
+
border-radius: 8px !important;
|
213 |
+
padding: 4px !important;
|
214 |
+
margin-bottom: 16px !important;
|
215 |
+
}
|
216 |
+
|
217 |
+
/* Gallery item caption styling */
|
218 |
+
.gr-gallery-item-caption {
|
219 |
+
background-color: rgba(255, 255, 255, 0.9) !important;
|
220 |
+
color: #333 !important;
|
221 |
+
border-radius: 0 0 8px 8px !important;
|
222 |
+
padding: 8px !important;
|
223 |
+
}
|
224 |
+
|
utils.py
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gc
|
2 |
+
import os
|
3 |
+
import random
|
4 |
+
import numpy as np
|
5 |
+
import json
|
6 |
+
import torch
|
7 |
+
import uuid
|
8 |
+
from PIL import Image, PngImagePlugin
|
9 |
+
from datetime import datetime
|
10 |
+
from dataclasses import dataclass
|
11 |
+
from typing import Callable, Dict, Optional, Tuple, Any, List
|
12 |
+
from diffusers import (
|
13 |
+
DDIMScheduler,
|
14 |
+
DPMSolverMultistepScheduler,
|
15 |
+
DPMSolverSinglestepScheduler,
|
16 |
+
EulerAncestralDiscreteScheduler,
|
17 |
+
EulerDiscreteScheduler,
|
18 |
+
AutoencoderKL,
|
19 |
+
StableDiffusionXLPipeline,
|
20 |
+
)
|
21 |
+
import logging
|
22 |
+
|
23 |
+
MAX_SEED = np.iinfo(np.int32).max
|
24 |
+
|
25 |
+
|
26 |
+
@dataclass
|
27 |
+
class StyleConfig:
|
28 |
+
prompt: str
|
29 |
+
negative_prompt: str
|
30 |
+
|
31 |
+
|
32 |
+
def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
33 |
+
if randomize_seed:
|
34 |
+
seed = random.randint(0, MAX_SEED)
|
35 |
+
return seed
|
36 |
+
|
37 |
+
|
38 |
+
def seed_everything(seed: int) -> torch.Generator:
|
39 |
+
torch.manual_seed(seed)
|
40 |
+
torch.cuda.manual_seed_all(seed)
|
41 |
+
np.random.seed(seed)
|
42 |
+
generator = torch.Generator()
|
43 |
+
generator.manual_seed(seed)
|
44 |
+
return generator
|
45 |
+
|
46 |
+
|
47 |
+
def parse_aspect_ratio(aspect_ratio: str) -> Optional[Tuple[int, int]]:
|
48 |
+
if aspect_ratio == "Custom":
|
49 |
+
return None
|
50 |
+
width, height = aspect_ratio.split(" x ")
|
51 |
+
return int(width), int(height)
|
52 |
+
|
53 |
+
|
54 |
+
def aspect_ratio_handler(
|
55 |
+
aspect_ratio: str, custom_width: int, custom_height: int
|
56 |
+
) -> Tuple[int, int]:
|
57 |
+
if aspect_ratio == "Custom":
|
58 |
+
return custom_width, custom_height
|
59 |
+
else:
|
60 |
+
width, height = parse_aspect_ratio(aspect_ratio)
|
61 |
+
return width, height
|
62 |
+
|
63 |
+
|
64 |
+
def get_scheduler(scheduler_config: Dict, name: str) -> Optional[Callable]:
|
65 |
+
scheduler_factory_map = {
|
66 |
+
"DPM++ 2M Karras": lambda: DPMSolverMultistepScheduler.from_config(
|
67 |
+
scheduler_config, use_karras_sigmas=True
|
68 |
+
),
|
69 |
+
"DPM++ SDE Karras": lambda: DPMSolverSinglestepScheduler.from_config(
|
70 |
+
scheduler_config, use_karras_sigmas=True
|
71 |
+
),
|
72 |
+
"DPM++ 2M SDE Karras": lambda: DPMSolverMultistepScheduler.from_config(
|
73 |
+
scheduler_config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
|
74 |
+
),
|
75 |
+
"Euler": lambda: EulerDiscreteScheduler.from_config(scheduler_config),
|
76 |
+
"Euler a": lambda: EulerAncestralDiscreteScheduler.from_config(
|
77 |
+
scheduler_config
|
78 |
+
),
|
79 |
+
"DDIM": lambda: DDIMScheduler.from_config(scheduler_config),
|
80 |
+
}
|
81 |
+
return scheduler_factory_map.get(name, lambda: None)()
|
82 |
+
|
83 |
+
|
84 |
+
def free_memory() -> None:
|
85 |
+
"""Free up GPU and system memory."""
|
86 |
+
if torch.cuda.is_available():
|
87 |
+
torch.cuda.empty_cache()
|
88 |
+
torch.cuda.ipc_collect()
|
89 |
+
gc.collect()
|
90 |
+
|
91 |
+
|
92 |
+
def preprocess_prompt(
|
93 |
+
style_dict,
|
94 |
+
style_name: str,
|
95 |
+
positive: str,
|
96 |
+
negative: str = "",
|
97 |
+
add_style: bool = True,
|
98 |
+
) -> Tuple[str, str]:
|
99 |
+
p, n = style_dict.get(style_name, style_dict["(None)"])
|
100 |
+
|
101 |
+
if add_style and positive.strip():
|
102 |
+
formatted_positive = p.format(prompt=positive)
|
103 |
+
else:
|
104 |
+
formatted_positive = positive
|
105 |
+
|
106 |
+
combined_negative = n
|
107 |
+
if negative.strip():
|
108 |
+
if combined_negative:
|
109 |
+
combined_negative += ", " + negative
|
110 |
+
else:
|
111 |
+
combined_negative = negative
|
112 |
+
|
113 |
+
return formatted_positive, combined_negative
|
114 |
+
|
115 |
+
|
116 |
+
def common_upscale(
|
117 |
+
samples: torch.Tensor,
|
118 |
+
width: int,
|
119 |
+
height: int,
|
120 |
+
upscale_method: str,
|
121 |
+
) -> torch.Tensor:
|
122 |
+
return torch.nn.functional.interpolate(
|
123 |
+
samples, size=(height, width), mode=upscale_method
|
124 |
+
)
|
125 |
+
|
126 |
+
|
127 |
+
def upscale(
|
128 |
+
samples: torch.Tensor, upscale_method: str, scale_by: float
|
129 |
+
) -> torch.Tensor:
|
130 |
+
width = round(samples.shape[3] * scale_by)
|
131 |
+
height = round(samples.shape[2] * scale_by)
|
132 |
+
return common_upscale(samples, width, height, upscale_method)
|
133 |
+
|
134 |
+
|
135 |
+
def preprocess_image_dimensions(width, height):
|
136 |
+
if width % 8 != 0:
|
137 |
+
width = width - (width % 8)
|
138 |
+
if height % 8 != 0:
|
139 |
+
height = height - (height % 8)
|
140 |
+
return width, height
|
141 |
+
|
142 |
+
|
143 |
+
def save_image(image, metadata, output_dir, is_colab):
|
144 |
+
if is_colab:
|
145 |
+
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
|
146 |
+
filename = f"image_{current_time}.png"
|
147 |
+
else:
|
148 |
+
filename = str(uuid.uuid4()) + ".png"
|
149 |
+
os.makedirs(output_dir, exist_ok=True)
|
150 |
+
filepath = os.path.join(output_dir, filename)
|
151 |
+
metadata_str = json.dumps(metadata)
|
152 |
+
info = PngImagePlugin.PngInfo()
|
153 |
+
info.add_text("parameters", metadata_str)
|
154 |
+
image.save(filepath, "PNG", pnginfo=info)
|
155 |
+
return filepath
|
156 |
+
|
157 |
+
|
158 |
+
def is_google_colab():
|
159 |
+
try:
|
160 |
+
import google.colab
|
161 |
+
return True
|
162 |
+
except:
|
163 |
+
return False
|
164 |
+
|
165 |
+
|
166 |
+
def load_pipeline(model_name: str, device: torch.device, hf_token: Optional[str] = None, vae: Optional[AutoencoderKL] = None) -> Any:
|
167 |
+
"""Load the Stable Diffusion pipeline."""
|
168 |
+
try:
|
169 |
+
logging.info(f"Loading pipeline from {model_name}...")
|
170 |
+
|
171 |
+
# Choose the right loading method based on file path or model ID
|
172 |
+
if os.path.exists(model_name) and os.path.isdir(model_name):
|
173 |
+
# It's a local directory path
|
174 |
+
if os.path.exists(os.path.join(model_name, "animagine-xl-4.0.safetensors")):
|
175 |
+
# Load from single file if it exists
|
176 |
+
pipe = StableDiffusionXLPipeline.from_single_file(
|
177 |
+
os.path.join(model_name, "animagine-xl-4.0.safetensors"),
|
178 |
+
torch_dtype=torch.float16,
|
179 |
+
use_safetensors=True,
|
180 |
+
custom_pipeline="lpw_stable_diffusion_xl",
|
181 |
+
add_watermarker=False
|
182 |
+
)
|
183 |
+
else:
|
184 |
+
# Load the VAE first to ensure it's not None
|
185 |
+
vae_path = os.path.join(model_name, "vae")
|
186 |
+
if vae is None and os.path.exists(vae_path):
|
187 |
+
logging.info(f"Loading VAE from {vae_path}...")
|
188 |
+
vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16)
|
189 |
+
|
190 |
+
# Load pipeline from directory
|
191 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(
|
192 |
+
model_name,
|
193 |
+
vae=vae,
|
194 |
+
torch_dtype=torch.float16,
|
195 |
+
use_safetensors=True,
|
196 |
+
custom_pipeline="lpw_stable_diffusion_xl",
|
197 |
+
add_watermarker=False
|
198 |
+
)
|
199 |
+
elif model_name.endswith(".safetensors"):
|
200 |
+
# It's a single file
|
201 |
+
pipe = StableDiffusionXLPipeline.from_single_file(
|
202 |
+
model_name,
|
203 |
+
torch_dtype=torch.float16,
|
204 |
+
use_safetensors=True,
|
205 |
+
custom_pipeline="lpw_stable_diffusion_xl",
|
206 |
+
add_watermarker=False
|
207 |
+
)
|
208 |
+
else:
|
209 |
+
# It's a Hugging Face model ID
|
210 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(
|
211 |
+
model_name,
|
212 |
+
vae=vae,
|
213 |
+
token=hf_token,
|
214 |
+
torch_dtype=torch.float16,
|
215 |
+
use_safetensors=True,
|
216 |
+
custom_pipeline="lpw_stable_diffusion_xl",
|
217 |
+
add_watermarker=False
|
218 |
+
)
|
219 |
+
|
220 |
+
pipe.to(device)
|
221 |
+
logging.info("Pipeline loaded successfully!")
|
222 |
+
return pipe
|
223 |
+
except Exception as e:
|
224 |
+
logging.error(f"Failed to load pipeline: {str(e)}", exc_info=True)
|
225 |
+
raise
|
226 |
+
|