import modal
from pathlib import Path

# Creating our Modal App
app = modal.App("mochi-finetune")

# Creating volumes for data, intermediate data, and produced weights
videos_volume = modal.Volume.from_name("mochi-tune-videos", create_if_missing=True)
videos_prepared_volume = modal.Volume.from_name("mochi-tune-videos-prepared", create_if_missing=True)
weights_volume = modal.Volume.from_name("mochi-tune-weights", create_if_missing=True)
finetunes_volume = modal.Volume.from_name("mochi-tune-finetunes", create_if_missing=True)
outputs_volume = modal.Volume.from_name("mochi-tune-outputs", create_if_missing=True)

USERNAME = "genmoai"
REPOSITORY = "mochi"
CLONE_CMD = f"git clone https://github.com/{USERNAME}/{REPOSITORY}.git"

# Building our container image
base_img = (
    modal.Image.debian_slim()
    .apt_install("git", "ffmpeg", "bc", "zlib1g-dev", "libjpeg-dev", "wget")
    .run_commands(CLONE_CMD)
    .workdir(REPOSITORY)
    .pip_install("gdown", "setuptools", "wheel")
    .run_commands('pip install -e . --no-build-isolation')
)

MINUTES = 60
HOURS = 60 * MINUTES

# Remote function for downloading a labeled video dataset from Google Drive
# Run it with:
#   modal run main::download_videos
@app.function(image=base_img,
    volumes={
        "/videos": videos_volume,
    }
)
def download_videos():
    '''Downloads videos from google drive into our volume'''
    import gdown
    import zipfile

    name = "dissolve"
    url = "https://drive.google.com/uc?id=1ldoBppcsv5Ueoikh0zCmNviojRCrGXQN"
    output = f"{name}.zip"
    gdown.download(url, output, quiet=False)
    with zipfile.ZipFile(output, "r") as zip_ref:
        zip_ref.extractall("/videos")

# Remote function for downloading the model weights from Hugging Face
# Run it with:
#   modal run main::download_weights
@app.function(image=base_img, 
    volumes={
        "/weights": weights_volume,
    },
    timeout=1*HOURS,
)
def download_weights():
    # HF-transfer and snapshot download tend to hang on the large model, so we download it manually with wget
    import subprocess
    print("🍡 Downloading weights from Hugging Face. This may take 30 minutes.")
    # ~30 min
    subprocess.run(["wget", "https://huggingface.co/genmo/mochi-1-preview/resolve/main/dit.safetensors", "-O", "/weights/dit.safetensors"])
    # ~1 min
    subprocess.run(["wget", "https://huggingface.co/genmo/mochi-1-preview/resolve/main/decoder.safetensors", "-O", "/weights/decoder.safetensors"])
    # ~20 sec
    subprocess.run(["wget", "https://huggingface.co/genmo/mochi-1-preview/resolve/main/encoder.safetensors", "-O", "/weights/encoder.safetensors"])

# Remote function for preprocessing the video dataset
# Run it with:
#   modal run main::preprocess
@app.function(
    image=base_img, 
    volumes={
        "/videos": videos_volume,
        "/videos_prepared": videos_prepared_volume,
        "/weights": weights_volume,
    },
    timeout=30*MINUTES,
    gpu="H100"
)
def preprocess():
    import subprocess
    print("🍡 Preprocessing videos. This may take 2-3 minutes.")
    video_dir = "videos_dissolve"
    subprocess.run([
        "bash", "demos/fine_tuner/preprocess.bash", 
        "-v", f"/videos/{video_dir}/",
        "-o", "/videos_prepared/", 
        "-w", "/weights/", 
        "-n", "37"
    ])

# Remote function for finetuning the model using the prepared dataset
# Configure the run in lora.yaml
# Run it with:
#   modal run main::finetune
@app.function(
    image=base_img, 
    volumes={
        "/videos": videos_volume,
        "/videos_prepared": videos_prepared_volume,
        "/weights": weights_volume,
        "/finetunes": finetunes_volume,
    },
    mounts=[modal.Mount.from_local_file("lora.yaml", remote_path=f"{REPOSITORY}/lora.yaml")],
    timeout=4*HOURS,
    gpu="H100"
)
def finetune():
    import subprocess
    print("🍡 Finetuning Mochi. This may take 3 hours.")
    print("🍡 See your mochi-tune-finetunes volume for intermediate checkpoints and samples.")
    subprocess.run([
        "bash", "demos/fine_tuner/run.bash", 
        "-c", "lora.yaml", # from our locally mounted yaml file
        "-n", "1", 
    ])

# Remote function (Modal @cls) for running inference on one or multiple videos
# Run it with the @local_entrypoint below
@app.cls(
    image = base_img,
    volumes={
        "/weights": weights_volume,
        "/finetunes": finetunes_volume,
        "/outputs": outputs_volume,
    },
    timeout=30*MINUTES,
    gpu="H100"
)
class MochiLora():
    def __init__(self, model_dir: str = "/weights", lora_path: str = None, cpu_offload: bool = False):
        self.model_dir = model_dir
        self.lora_path = lora_path
        self.cpu_offload = cpu_offload

    @modal.enter()
    def start(self):
        from genmo.mochi_preview.pipelines import (
            DecoderModelFactory,
            DitModelFactory,
            MochiMultiGPUPipeline,
            MochiSingleGPUPipeline,
            T5ModelFactory,
        )
        import torch

        """Initialize the model - this runs once when the container starts"""
        print("🍡 Loading Mochi model.")

        self.num_gpus = torch.cuda.device_count()
        
        # Configure pipeline based on GPU count
        klass = MochiSingleGPUPipeline if self.num_gpus == 1 else MochiMultiGPUPipeline
        
        kwargs = dict(
            text_encoder_factory=T5ModelFactory(),
            dit_factory=DitModelFactory(
                model_path=f"{self.model_dir}/dit.safetensors",
                lora_path=self.lora_path,
                model_dtype="bf16",
            ),
            decoder_factory=DecoderModelFactory(
                model_path=f"{self.model_dir}/decoder.safetensors",
            ),
        )

        if self.num_gpus > 1:
            assert not self.lora_path, f"Lora not supported in multi-GPU mode"
            assert not self.cpu_offload, "CPU offload not supported in multi-GPU mode"
            kwargs["world_size"] = self.num_gpus
        else:
            kwargs["cpu_offload"] = self.cpu_offload
            kwargs["decode_type"] = "tiled_spatial"
            kwargs["fast_init"] = not self.lora_path
            kwargs["strict_load"] = not self.lora_path
            kwargs["decode_args"] = dict(overlap=8)

        self.pipeline = klass(**kwargs)
        print(f"🍡 Model loaded successfully with {self.num_gpus} GPUs")

    @modal.method()
    def generate(self, 
                prompt: str,
                negative_prompt: str = "",
                width: int = 848,
                height: int = 480,
                num_frames: int = 163,
                seed: int = 1710977262,
                cfg_scale: float = 6.0,
                num_inference_steps: int = 64) -> str:
        """Generate video based on the prompt and parameters"""
        
        print("🍡 Generating video.")

        import json
        import os
        import time

        import numpy as np

        from genmo.lib.progress import progress_bar
        from genmo.lib.utils import save_video
        from genmo.mochi_preview.pipelines import linear_quadratic_schedule

        
        # Create sigma schedule
        sigma_schedule = linear_quadratic_schedule(num_inference_steps, 0.025)
        cfg_schedule = [cfg_scale] * num_inference_steps

        args = {
            "height": height,
            "width": width,
            "num_frames": num_frames,
            "sigma_schedule": sigma_schedule,
            "cfg_schedule": cfg_schedule,
            "num_inference_steps": num_inference_steps,
            "batch_cfg": False,
            "prompt": prompt,
            "negative_prompt": negative_prompt,
            "seed": seed,
        }

        with progress_bar(type="tqdm"):
            final_frames = self.pipeline(**args)
            final_frames = final_frames[0]

            assert isinstance(final_frames, np.ndarray)
            assert final_frames.dtype == np.float32

            # Save to mounted volume
            output_dir = "/outputs"  # Assuming this path exists in the mounted volume
            os.makedirs(output_dir, exist_ok=True)
            output_path = os.path.join(output_dir, f"output_{int(time.time())}.mp4")

            save_video(final_frames, output_path)
            
            # Save generation parameters
            json_path = os.path.splitext(output_path)[0] + ".json"
            json.dump(args, open(json_path, "w"), indent=4)

        print(f"🍡 Video saved to {output_path}")
        outputs_volume.commit()
        return output_path.split("/")[-1]

# Local entrypoint for using the MochiLora class
# Select the lora_path you'd want to use from the finetunes volume
# Then it with:
#   modal run main
@app.local_entrypoint()
def main(
    prompt="A pristine snowglobe featuring a winter scene sits peacefully. The glass begins to crumble into fine powder, as the entire sphere deteriorates into sparkling dust that drifts outward. The fake snow mingles with the crystalline particles, creating a glittering cloud captured in high-speed photography.",
    negative_prompt="blurry, low quality",
    width=848,
    height=480,
    num_frames=49, # (num_frames - 1) must be divisible by 6
    seed=1710977262,
    cfg_scale=6.0,
    num_inference_steps=64,
    lora_path="/finetunes/my_mochi_lora/model_2000.lora.safetensors",
    cpu_offload=True,
):
    lora = MochiLora(
        lora_path=lora_path, # your lora path
        cpu_offload=cpu_offload,
    )
    output_path = lora.generate.remote(
        prompt=prompt,
        negative_prompt=negative_prompt,
        width=width,
        height=height,
        num_frames=num_frames,
        seed=seed,
        cfg_scale=cfg_scale,
        num_inference_steps=num_inference_steps,
    )

    local_dir = Path("/tmp/mochi")
    local_dir.mkdir(exist_ok=True, parents=True)
    local_path = local_dir / output_path
    local_path.write_bytes(b"".join(outputs_volume.read_file(output_path)))
    print(f"🍡 video saved locally at {local_path}")