TRELLIS-TextoImagen3D

Running on Zero

App Files Files Community

cavargas10 commited on Mar 13

Commit

33b5608

verified ·

1 Parent(s): 5422690

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -54

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import random
 import uuid
 from datetime import datetime
 from diffusers import DiffusionPipeline
 os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
 import torch
@@ -18,16 +17,14 @@ from PIL import Image
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 NUM_INFERENCE_STEPS = 8
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 # Constants
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
 os.makedirs(TMP_DIR, exist_ok=True)
 def start_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
@@ -70,12 +67,10 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict]:
     gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
     gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
     gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
     mesh = edict(
         vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
         faces=torch.tensor(state['mesh']['faces'], device='cuda'),
     )
     return gs, mesh
 def get_seed(randomize_seed: bool, seed: int) -> int:
@@ -106,16 +101,13 @@ def generate_flux_image(
         generator=generator,
     ).images[0]
-    # Guardar la imagen en el directorio temporal
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     unique_id = str(uuid.uuid4())[:8]
     filename = f"{timestamp}_{unique_id}.png"
     filepath = os.path.join(user_dir, filename)
     image.save(filepath)
     return image
 @spaces.GPU
@@ -167,23 +159,16 @@ def extract_glb(
     torch.cuda.empty_cache()
     return glb_path, glb_path
-@spaces.GPU
-def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    gs, _ = unpack_state(state)
-    gaussian_path = os.path.join(user_dir, 'sample.ply')
-    gs.save_ply(gaussian_path)
-    torch.cuda.empty_cache()
-    return gaussian_path, gaussian_path
-# Gradio Interface
 with gr.Blocks() as demo:
     gr.Markdown("""
-    ## Game Asset Generation to 3D with FLUX and TRELLIS
-    * Enter a prompt to generate a game asset image, then convert it to 3D
-    * If you find the generated 3D asset satisfactory, click "Extract GLB" to extract the GLB file and download it.
     """)
     with gr.Row():
         with gr.Column():
             # Flux image generation inputs
@@ -196,25 +181,20 @@ with gr.Blocks() as demo:
                     height = gr.Slider(512, 1024, label="Height", value=1024, step=16)
                 with gr.Row():
                     guidance_scale = gr.Slider(0.0, 10.0, label="Guidance Scale", value=3.5, step=0.1)
             # Botones separados
             generate_image_btn = gr.Button("Generar Imagen")
             generate_video_btn = gr.Button("Generar Video", interactive=False)
         with gr.Column():
             generated_image = gr.Image(label="Generated Asset", type="pil")
             video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True)
-        model_output = LitModel3D(label="Extracted GLB/Gaussian", exposure=8.0, height=400)
     with gr.Row():
         extract_glb_btn = gr.Button("Extract GLB", interactive=False)
-        extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
     with gr.Row():
         download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
-        download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
     # Variables adicionales para la generación 3D
     with gr.Accordion("3D Generation Settings", open=False):
         gr.Markdown("Stage 1: Sparse Structure Generation")
@@ -225,18 +205,18 @@ with gr.Blocks() as demo:
         with gr.Row():
             slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
             slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
     # Variables para la extracción de GLB
     with gr.Accordion("GLB Extraction Settings", open=False):
         mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
         texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
     output_buf = gr.State()
     # Event handlers
     demo.load(start_session)
     demo.unload(end_session)
     # Generar imagen
     generate_image_btn.click(
         generate_flux_image,
@@ -246,7 +226,7 @@ with gr.Blocks() as demo:
         lambda: gr.Button(interactive=True),
         outputs=[generate_video_btn],
     )
     # Generar video
     generate_video_btn.click(
         get_seed,
@@ -268,13 +248,13 @@ with gr.Blocks() as demo:
         ],
         outputs=[output_buf, video_output],
     ).then(
-        lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
-        outputs=[extract_glb_btn, extract_gs_btn],
     )
     video_output.clear(
-        lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
-        outputs=[extract_glb_btn, extract_gs_btn],
     )
     # Extraer GLB
@@ -286,22 +266,12 @@ with gr.Blocks() as demo:
         lambda: gr.Button(interactive=True),
         outputs=[download_glb],
     )
-    # Extraer Gaussian
-    extract_gs_btn.click(
-        extract_gaussian,
-        inputs=[output_buf],
-        outputs=[model_output, download_gs],
-    ).then(
-        lambda: gr.Button(interactive=True),
-        outputs=[download_gs],
-    )
     model_output.clear(
         lambda: gr.Button(interactive=False),
         outputs=[download_glb],
     )
 # Initialize both pipelines
 if __name__ == "__main__":
     from diffusers import FluxTransformer2DModel, FluxPipeline, BitsAndBytesConfig, GGUFQuantizationConfig
@@ -310,23 +280,26 @@ if __name__ == "__main__":
     # Initialize Flux pipeline
     device = "cuda" if torch.cuda.is_available() else "cpu"
     huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
     dtype = torch.bfloat16
     file_url = "https://huggingface.co/gokaygokay/flux-game/blob/main/hyperflux_00001_.q8_0.gguf"
     file_url = file_url.replace("/resolve/main/", "/blob/main/").replace("?download=true", "")
     single_file_base_model = "camenduru/FLUX.1-dev-diffusers"
     quantization_config_tf = BitsAndBytesConfigTF(load_in_8bit=True, bnb_8bit_compute_dtype=torch.bfloat16)
     text_encoder_2 = T5EncoderModel.from_pretrained(single_file_base_model, subfolder="text_encoder_2", torch_dtype=dtype, config=single_file_base_model, quantization_config=quantization_config_tf, token=huggingface_token)
     if ".gguf" in file_url:
         transformer = FluxTransformer2DModel.from_single_file(file_url, subfolder="transformer", quantization_config=GGUFQuantizationConfig(compute_dtype=dtype), torch_dtype=dtype, config=single_file_base_model)
     else:
         quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16, token=huggingface_token)
         transformer = FluxTransformer2DModel.from_single_file(file_url, subfolder="transformer", torch_dtype=dtype, config=single_file_base_model, quantization_config=quantization_config, token=huggingface_token)
     flux_pipeline = FluxPipeline.from_pretrained(single_file_base_model, transformer=transformer, text_encoder_2=text_encoder_2, torch_dtype=dtype, token=huggingface_token)
     flux_pipeline.to("cuda")
     # Initialize Trellis pipeline
     trellis_pipeline = TrellisImageTo3DPipeline.from_pretrained("cavargas10/TRELLIS")
     trellis_pipeline.cuda()
     try:
         trellis_pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))
     except:

 import uuid
 from datetime import datetime
 from diffusers import DiffusionPipeline
 os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
 import torch
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 NUM_INFERENCE_STEPS = 8
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 # Constants
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
 os.makedirs(TMP_DIR, exist_ok=True)
+# Funciones auxiliares
 def start_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
     gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
     gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
     gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
     mesh = edict(
         vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
         faces=torch.tensor(state['mesh']['faces'], device='cuda'),
     )
     return gs, mesh
 def get_seed(randomize_seed: bool, seed: int) -> int:
         generator=generator,
     ).images[0]
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     unique_id = str(uuid.uuid4())[:8]
     filename = f"{timestamp}_{unique_id}.png"
     filepath = os.path.join(user_dir, filename)
     image.save(filepath)
     return image
 @spaces.GPU
     torch.cuda.empty_cache()
     return glb_path, glb_path
+# Interfaz Gradio
 with gr.Blocks() as demo:
     gr.Markdown("""
+    # UTPL - Conversión de Texto a Imagen a objetos 3D usando IA
+    ### Tesis: *"Objetos tridimensionales creados por IA: Innovación en entornos virtuales"*
+    **Autor:** Carlos Vargas
+    **Base técnica:** Adaptación de [TRELLIS](https://trellis3d.github.io/) y Flux
+    **Propósito educativo:** Demostraciones académicas e Investigación en modelado 3D automático
     """)
     with gr.Row():
         with gr.Column():
             # Flux image generation inputs
                     height = gr.Slider(512, 1024, label="Height", value=1024, step=16)
                 with gr.Row():
                     guidance_scale = gr.Slider(0.0, 10.0, label="Guidance Scale", value=3.5, step=0.1)
             # Botones separados
             generate_image_btn = gr.Button("Generar Imagen")
             generate_video_btn = gr.Button("Generar Video", interactive=False)
         with gr.Column():
             generated_image = gr.Image(label="Generated Asset", type="pil")
             video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True)
+        model_output = LitModel3D(label="Extracted GLB", exposure=8.0, height=400)
     with gr.Row():
         extract_glb_btn = gr.Button("Extract GLB", interactive=False)
     with gr.Row():
         download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
     # Variables adicionales para la generación 3D
     with gr.Accordion("3D Generation Settings", open=False):
         gr.Markdown("Stage 1: Sparse Structure Generation")
         with gr.Row():
             slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
             slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
     # Variables para la extracción de GLB
     with gr.Accordion("GLB Extraction Settings", open=False):
         mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
         texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
     output_buf = gr.State()
     # Event handlers
     demo.load(start_session)
     demo.unload(end_session)
     # Generar imagen
     generate_image_btn.click(
         generate_flux_image,
         lambda: gr.Button(interactive=True),
         outputs=[generate_video_btn],
     )
     # Generar video
     generate_video_btn.click(
         get_seed,
         ],
         outputs=[output_buf, video_output],
     ).then(
+        lambda: gr.Button(interactive=True),
+        outputs=[extract_glb_btn],
     )
     video_output.clear(
+        lambda: gr.Button(interactive=False),
+        outputs=[extract_glb_btn],
     )
     # Extraer GLB
         lambda: gr.Button(interactive=True),
         outputs=[download_glb],
     )
     model_output.clear(
         lambda: gr.Button(interactive=False),
         outputs=[download_glb],
     )
 # Initialize both pipelines
 if __name__ == "__main__":
     from diffusers import FluxTransformer2DModel, FluxPipeline, BitsAndBytesConfig, GGUFQuantizationConfig
     # Initialize Flux pipeline
     device = "cuda" if torch.cuda.is_available() else "cpu"
     huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
     dtype = torch.bfloat16
     file_url = "https://huggingface.co/gokaygokay/flux-game/blob/main/hyperflux_00001_.q8_0.gguf"
     file_url = file_url.replace("/resolve/main/", "/blob/main/").replace("?download=true", "")
     single_file_base_model = "camenduru/FLUX.1-dev-diffusers"
     quantization_config_tf = BitsAndBytesConfigTF(load_in_8bit=True, bnb_8bit_compute_dtype=torch.bfloat16)
     text_encoder_2 = T5EncoderModel.from_pretrained(single_file_base_model, subfolder="text_encoder_2", torch_dtype=dtype, config=single_file_base_model, quantization_config=quantization_config_tf, token=huggingface_token)
     if ".gguf" in file_url:
         transformer = FluxTransformer2DModel.from_single_file(file_url, subfolder="transformer", quantization_config=GGUFQuantizationConfig(compute_dtype=dtype), torch_dtype=dtype, config=single_file_base_model)
     else:
         quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.bfloat16, token=huggingface_token)
         transformer = FluxTransformer2DModel.from_single_file(file_url, subfolder="transformer", torch_dtype=dtype, config=single_file_base_model, quantization_config=quantization_config, token=huggingface_token)
     flux_pipeline = FluxPipeline.from_pretrained(single_file_base_model, transformer=transformer, text_encoder_2=text_encoder_2, torch_dtype=dtype, token=huggingface_token)
     flux_pipeline.to("cuda")
     # Initialize Trellis pipeline
     trellis_pipeline = TrellisImageTo3DPipeline.from_pretrained("cavargas10/TRELLIS")
     trellis_pipeline.cuda()
     try:
         trellis_pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))
     except: