Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,12 +24,40 @@ from transformers import (
|
|
| 24 |
from transformers.image_utils import load_image
|
| 25 |
from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
| 26 |
|
|
|
|
| 27 |
MAX_MAX_NEW_TOKENS = 2048
|
| 28 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
| 29 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
|
|
| 30 |
|
| 31 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
model_id = "prithivMLmods/FastThink-0.5B-Tiny"
|
| 34 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 35 |
model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -43,6 +71,10 @@ TTS_VOICES = [
|
|
| 43 |
"en-US-JennyNeural", # @tts1
|
| 44 |
"en-US-GuyNeural", # @tts2
|
| 45 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
MODEL_ID_VL = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
|
| 47 |
processor = AutoProcessor.from_pretrained(MODEL_ID_VL, trust_remote_code=True)
|
| 48 |
model_m = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
@@ -81,7 +113,6 @@ def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
|
| 81 |
seed = random.randint(0, MAX_SEED)
|
| 82 |
return seed
|
| 83 |
|
| 84 |
-
MAX_SEED = np.iinfo(np.int32).max
|
| 85 |
CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "0") == "1"
|
| 86 |
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048"))
|
| 87 |
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
|
|
@@ -89,6 +120,9 @@ ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
|
|
| 89 |
|
| 90 |
dtype = torch.float16 if device.type == "cuda" else torch.float32
|
| 91 |
|
|
|
|
|
|
|
|
|
|
| 92 |
if torch.cuda.is_available():
|
| 93 |
# Lightning 5 model
|
| 94 |
pipe = StableDiffusionXLPipeline.from_pretrained(
|
|
@@ -174,6 +208,9 @@ def save_image(img: Image.Image) -> str:
|
|
| 174 |
img.save(unique_name)
|
| 175 |
return unique_name
|
| 176 |
|
|
|
|
|
|
|
|
|
|
| 177 |
@spaces.GPU
|
| 178 |
def generate(
|
| 179 |
input_dict: dict,
|
|
@@ -188,7 +225,7 @@ def generate(
|
|
| 188 |
files = input_dict.get("files", [])
|
| 189 |
|
| 190 |
lower_text = text.lower().strip()
|
| 191 |
-
#
|
| 192 |
if (lower_text.startswith("@lightningv5") or
|
| 193 |
lower_text.startswith("@lightningv4") or
|
| 194 |
lower_text.startswith("@turbov3")):
|
|
@@ -234,7 +271,7 @@ def generate(
|
|
| 234 |
torch.cuda.empty_cache()
|
| 235 |
|
| 236 |
selected_pipe = models.get(model_choice, pipe)
|
| 237 |
-
yield "
|
| 238 |
images = selected_pipe(**options).images
|
| 239 |
image_path = save_image(images[0])
|
| 240 |
yield gr.Image(image_path)
|
|
@@ -272,7 +309,7 @@ def generate(
|
|
| 272 |
thread.start()
|
| 273 |
|
| 274 |
buffer = ""
|
| 275 |
-
yield "
|
| 276 |
for new_text in streamer:
|
| 277 |
buffer += new_text
|
| 278 |
buffer = buffer.replace("<|im_end|>", "")
|
|
@@ -311,6 +348,9 @@ def generate(
|
|
| 311 |
output_file = asyncio.run(text_to_speech(final_response, voice))
|
| 312 |
yield gr.Audio(output_file, autoplay=True)
|
| 313 |
|
|
|
|
|
|
|
|
|
|
| 314 |
DESCRIPTION = """
|
| 315 |
# IMAGINEO CHAT β‘
|
| 316 |
"""
|
|
@@ -354,7 +394,6 @@ demo = gr.ChatInterface(
|
|
| 354 |
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="use the tags @lightningv5 @lightningv4 @turbov3 for image gen !"),
|
| 355 |
stop_btn="Stop Generation",
|
| 356 |
multimodal=True,
|
| 357 |
-
|
| 358 |
)
|
| 359 |
|
| 360 |
if __name__ == "__main__":
|
|
|
|
| 24 |
from transformers.image_utils import load_image
|
| 25 |
from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
| 26 |
|
| 27 |
+
# Constants
|
| 28 |
MAX_MAX_NEW_TOKENS = 2048
|
| 29 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
| 30 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 31 |
+
MAX_SEED = np.iinfo(np.int32).max
|
| 32 |
|
| 33 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
| 34 |
|
| 35 |
+
# -----------------------
|
| 36 |
+
# PROGRESS BAR HELPER
|
| 37 |
+
# -----------------------
|
| 38 |
+
def progress_bar_html(label: str) -> str:
|
| 39 |
+
"""
|
| 40 |
+
Returns an HTML snippet for a thin progress bar with a label.
|
| 41 |
+
The progress bar is styled as a dark red animated bar.
|
| 42 |
+
"""
|
| 43 |
+
return f'''
|
| 44 |
+
<div style="display: flex; align-items: center;">
|
| 45 |
+
<span style="margin-right: 10px; font-size: 14px;">{label}</span>
|
| 46 |
+
<div style="width: 110px; height: 5px; background-color: #f0f0f0; border-radius: 2px; overflow: hidden;">
|
| 47 |
+
<div style="width: 100%; height: 100%; background-color: #ff0000; animation: loading 1.5s linear infinite;"></div>
|
| 48 |
+
</div>
|
| 49 |
+
</div>
|
| 50 |
+
<style>
|
| 51 |
+
@keyframes loading {{
|
| 52 |
+
0% {{ transform: translateX(-100%); }}
|
| 53 |
+
100% {{ transform: translateX(100%); }}
|
| 54 |
+
}}
|
| 55 |
+
</style>
|
| 56 |
+
'''
|
| 57 |
+
|
| 58 |
+
# -----------------------
|
| 59 |
+
# TEXT & TTS MODELS
|
| 60 |
+
# -----------------------
|
| 61 |
model_id = "prithivMLmods/FastThink-0.5B-Tiny"
|
| 62 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 63 |
model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 71 |
"en-US-JennyNeural", # @tts1
|
| 72 |
"en-US-GuyNeural", # @tts2
|
| 73 |
]
|
| 74 |
+
|
| 75 |
+
# -----------------------
|
| 76 |
+
# MULTIMODAL (OCR) MODELS
|
| 77 |
+
# -----------------------
|
| 78 |
MODEL_ID_VL = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
|
| 79 |
processor = AutoProcessor.from_pretrained(MODEL_ID_VL, trust_remote_code=True)
|
| 80 |
model_m = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
|
|
| 113 |
seed = random.randint(0, MAX_SEED)
|
| 114 |
return seed
|
| 115 |
|
|
|
|
| 116 |
CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES", "0") == "1"
|
| 117 |
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048"))
|
| 118 |
USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
|
|
|
|
| 120 |
|
| 121 |
dtype = torch.float16 if device.type == "cuda" else torch.float32
|
| 122 |
|
| 123 |
+
# -----------------------
|
| 124 |
+
# STABLE DIFFUSION IMAGE GENERATION MODELS
|
| 125 |
+
# -----------------------
|
| 126 |
if torch.cuda.is_available():
|
| 127 |
# Lightning 5 model
|
| 128 |
pipe = StableDiffusionXLPipeline.from_pretrained(
|
|
|
|
| 208 |
img.save(unique_name)
|
| 209 |
return unique_name
|
| 210 |
|
| 211 |
+
# -----------------------
|
| 212 |
+
# MAIN GENERATION FUNCTION
|
| 213 |
+
# -----------------------
|
| 214 |
@spaces.GPU
|
| 215 |
def generate(
|
| 216 |
input_dict: dict,
|
|
|
|
| 225 |
files = input_dict.get("files", [])
|
| 226 |
|
| 227 |
lower_text = text.lower().strip()
|
| 228 |
+
# If the prompt is an image generation command (using model flags)
|
| 229 |
if (lower_text.startswith("@lightningv5") or
|
| 230 |
lower_text.startswith("@lightningv4") or
|
| 231 |
lower_text.startswith("@turbov3")):
|
|
|
|
| 271 |
torch.cuda.empty_cache()
|
| 272 |
|
| 273 |
selected_pipe = models.get(model_choice, pipe)
|
| 274 |
+
yield progress_bar_html("Processing Image Generation")
|
| 275 |
images = selected_pipe(**options).images
|
| 276 |
image_path = save_image(images[0])
|
| 277 |
yield gr.Image(image_path)
|
|
|
|
| 309 |
thread.start()
|
| 310 |
|
| 311 |
buffer = ""
|
| 312 |
+
yield progress_bar_html("Processing with Qwen2VL Ocr")
|
| 313 |
for new_text in streamer:
|
| 314 |
buffer += new_text
|
| 315 |
buffer = buffer.replace("<|im_end|>", "")
|
|
|
|
| 348 |
output_file = asyncio.run(text_to_speech(final_response, voice))
|
| 349 |
yield gr.Audio(output_file, autoplay=True)
|
| 350 |
|
| 351 |
+
# -----------------------
|
| 352 |
+
# GRADIO INTERFACE
|
| 353 |
+
# -----------------------
|
| 354 |
DESCRIPTION = """
|
| 355 |
# IMAGINEO CHAT β‘
|
| 356 |
"""
|
|
|
|
| 394 |
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple", placeholder="use the tags @lightningv5 @lightningv4 @turbov3 for image gen !"),
|
| 395 |
stop_btn="Stop Generation",
|
| 396 |
multimodal=True,
|
|
|
|
| 397 |
)
|
| 398 |
|
| 399 |
if __name__ == "__main__":
|