|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>The MLOps Engineer's Interactive Architecture Builder</title> |
|
<link rel="preconnect" href="https://fonts.googleapis.com"> |
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
|
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" rel="stylesheet"> |
|
<link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet"> |
|
<style> |
|
|
|
:root { |
|
--primary-color: #1E88E5; |
|
--primary-dark: #1565C0; |
|
--secondary-color: #004d40; |
|
--genai-color: #6A1B9A; |
|
--background-color: #f4f6f8; |
|
--card-bg-color: #ffffff; |
|
--text-color: #333; |
|
--heading-color: #212121; |
|
--subtle-text-color: #555; |
|
--border-color: #e0e0e0; |
|
--code-bg-color: #282c34; |
|
--code-text-color: #abb2bf; |
|
--shadow: 0 4px 12px rgba(0,0,0,0.1); |
|
--tile-hover-shadow: 0 6px 16px rgba(0,0,0,0.15); |
|
} |
|
|
|
body { |
|
font-family: 'Roboto', sans-serif; |
|
background-color: var(--background-color); |
|
color: var(--text-color); |
|
margin: 0; |
|
padding: 0; |
|
line-height: 1.6; |
|
} |
|
|
|
|
|
.container { max-width: 1200px; margin: 0 auto; padding: 2rem; } |
|
header { text-align: center; margin-bottom: 2rem; } |
|
header h1 { color: var(--heading-color); font-weight: 700; font-size: 2.8rem; margin-bottom: 0.5rem; } |
|
header p { font-size: 1.1rem; color: var(--subtle-text-color); max-width: 800px; margin: 0 auto; } |
|
|
|
.main-section-title { |
|
font-size: 2.2rem; color: var(--heading-color); border-bottom: 3px solid var(--primary-color); |
|
padding-bottom: 0.75rem; margin-top: 3rem; margin-bottom: 2rem; display: flex; align-items: center; |
|
} |
|
.main-section-title .material-icons { font-size: 2.8rem; margin-right: 1rem; } |
|
|
|
|
|
#architecture-builder { background-color: var(--card-bg-color); padding: 2rem; border-radius: 8px; box-shadow: var(--shadow); } |
|
.arch-type-selector { display: flex; gap: 1rem; margin-bottom: 2rem; border-bottom: 1px solid var(--border-color); padding-bottom: 1.5rem; } |
|
.arch-type-chip { padding: 0.8rem 1.5rem; border-radius: 8px; cursor: pointer; font-weight: 500; font-size: 1.1rem; border: 2px solid transparent; transition: all 0.2s ease; } |
|
.arch-type-chip.active.classic { background-color: #e3f2fd; border-color: var(--primary-color); color: var(--primary-dark); } |
|
.arch-type-chip.active.gen-ai { background-color: #f3e5f5; border-color: var(--genai-color); color: var(--genai-color); } |
|
|
|
.builder-fields { display: none; } |
|
.builder-fields.active { display: block; } |
|
|
|
.selection-group { margin-bottom: 1.5rem; transition: opacity 0.3s ease; } |
|
.selection-group.disabled { opacity: 0.5; pointer-events: none; } |
|
.selection-group h4 { margin-top: 0; margin-bottom: 1rem; font-size: 1.2rem; color: var(--secondary-color); } |
|
.selection-chips { display: flex; flex-wrap: wrap; gap: 0.75rem; } |
|
.chip { |
|
padding: 0.6rem 1.2rem; border: 2px solid var(--border-color); border-radius: 20px; |
|
cursor: pointer; transition: all 0.2s ease; font-weight: 500; background-color: #f9f9f9; |
|
} |
|
.chip:not(.disabled):hover { border-color: var(--primary-dark); background-color: #e3f2fd; } |
|
.chip.active { background-color: var(--primary-color); color: white; border-color: var(--primary-color); } |
|
.chip.disabled { opacity: 0.6; cursor: not-allowed; background-color: #f0f0f0; border-color: var(--border-color); color: #999; } |
|
|
|
#generate-btn { |
|
background-color: var(--secondary-color); color: white; border: none; padding: 0.8rem 2rem; font-size: 1.1rem; |
|
font-weight: 500; border-radius: 6px; cursor: pointer; transition: background-color 0.2s; |
|
display: block; margin-top: 2rem; width: 100%; |
|
} |
|
#generate-btn:hover { background-color: #00695C; } |
|
|
|
|
|
#architecture-diagram-output { |
|
display: none; margin-top: 2rem; background-color: #fdfdfd; border: 1px solid var(--border-color); |
|
padding: 2rem; border-radius: 8px; text-align: center; |
|
} |
|
.diagram-title { font-size: 1.5rem; font-weight: 500; margin-bottom: 2rem; } |
|
.diagram-stack { display: flex; flex-direction: column; align-items: center; gap: 0.5rem; } |
|
.diagram-layer { |
|
background-color: var(--card-bg-color); border: 2px solid var(--primary-color); border-radius: 8px; |
|
padding: 1.5rem 2.5rem; width: 80%; max-width: 500px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); text-align: center; |
|
} |
|
.diagram-layer.gen-ai-layer { border-color: var(--genai-color); } |
|
.diagram-layer.gen-ai-layer h5 { color: var(--genai-color); } |
|
.diagram-layer h5 { margin: 0 0 0.5rem 0; color: var(--primary-dark); font-size: 1.2rem; font-weight: 700; } |
|
.diagram-layer p { margin: 0; font-size: 1rem; color: var(--subtle-text-color); } |
|
.diagram-arrow { font-family: 'Material Icons'; font-size: 2.5rem; color: var(--primary-color); line-height: 1; } |
|
.diagram-arrow.gen-ai-arrow { color: var(--genai-color); } |
|
.icon-img-placeholder { |
|
height: 32px; |
|
max-width: 120px; |
|
width: auto; |
|
margin-top: 10px; |
|
} |
|
|
|
|
|
.tile-container { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1.5rem; margin-bottom: 2.5rem; } |
|
.tile { background-color: var(--card-bg-color); border: 2px solid var(--border-color); border-radius: 8px; padding: 1.5rem; text-align: center; cursor: pointer; transition: all 0.2s ease; display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 150px; } |
|
.tile:hover { transform: translateY(-5px); box-shadow: var(--tile-hover-shadow); border-color: var(--primary-color); } |
|
.tile.active { border-color: var(--primary-color); box-shadow: var(--tile-hover-shadow); background-color: #f0f7ff; } |
|
.tile-icon-img { |
|
height: 48px; |
|
width: auto; |
|
max-width: 100%; |
|
margin-bottom: 1rem; |
|
} |
|
.tile h4 { margin: 0; font-size: 1.2rem; color: var(--heading-color); } |
|
.content-panel { display: none; background-color: var(--card-bg-color); border-radius: 8px; box-shadow: var(--shadow); padding: 2.5rem; margin-top: 1rem; } |
|
.content-panel.active { display: block; } |
|
.stack-layer { margin-bottom: 2.5rem; padding-bottom: 1.5rem; border-bottom: 1px solid var(--border-color); } |
|
.stack-layer:last-child { border-bottom: none; margin-bottom: 0; } |
|
.stack-layer h3 { font-size: 1.6rem; color: var(--secondary-color); margin-top: 0; display: flex; align-items: center; } |
|
.stack-layer h3 .material-icons { margin-right: 12px; font-size: 2rem; } |
|
details { border: 1px solid var(--border-color); border-radius: 6px; margin-bottom: 1rem; background-color: #f9fafb; } |
|
summary { cursor: pointer; padding: 1rem; font-weight: 500; font-size: 1.1rem; list-style: none; display: flex; align-items: center; justify-content: space-between; } |
|
pre { background-color: var(--code-bg-color); color: var(--code-text-color); padding: 1.5rem 1rem 1rem 1rem; border-radius: 6px; overflow-x: auto; font-size: 0.9em; position: relative; } |
|
code { font-family: 'Courier New', Courier, monospace; } |
|
.copy-btn { position: absolute; top: 10px; right: 10px; background-color: #4a505c; color: #fff; border: none; padding: 6px 10px; border-radius: 4px; cursor: pointer; opacity: 0.7; } |
|
pre:hover .copy-btn { opacity: 1; } |
|
.copy-btn.copied { background-color: var(--primary-dark); } |
|
.code-block-header { font-weight: bold; color: var(--subtle-text-color); margin-bottom: -0.5rem; margin-top: 1rem; } |
|
</style> |
|
</head> |
|
<body> |
|
|
|
<div class="container"> |
|
<header> |
|
<h1>MLOps Architecture Builder & Cheatsheet</h1> |
|
<p>Design your custom model serving stack using the builder below, or explore detailed deployment guides for common frameworks.</p> |
|
</header> |
|
|
|
<main> |
|
|
|
<h2 class="main-section-title"><i class="material-icons">architecture</i>My Architecture</h2> |
|
<div id="architecture-builder"> |
|
<div class="arch-type-selector"> |
|
<div class="arch-type-chip active classic" data-type="classic">Classic ML</div> |
|
<div class="arch-type-chip gen-ai" data-type="gen-ai">Generative AI</div> |
|
</div> |
|
|
|
|
|
<div id="classic-builder-fields" class="builder-fields active"> |
|
<div class="selection-group" data-group="framework"> |
|
<h4>1. ML Framework</h4> |
|
<div class="selection-chips"> |
|
<div class="chip" data-id="scikit-learn">Scikit-learn</div> |
|
<div class="chip" data-id="xgboost">XGBoost</div> |
|
<div class="chip" data-id="pytorch">PyTorch</div> |
|
<div class="chip" data-id="tensorflow">TensorFlow</div> |
|
<div class="chip" data-id="jax">JAX</div> |
|
<div class="chip" data-id="keras">Keras</div> |
|
</div> |
|
</div> |
|
<div class="selection-group" data-group="serving"> |
|
<h4>2. Serving Container</h4> |
|
<div class="selection-chips"> |
|
<div class="chip" data-id="kserve">Kubeflow KServe</div> |
|
<div class="chip" data-id="ray-serve">Ray Serve</div> |
|
<div class="chip" data-id="torchserve">TorchServe</div> |
|
<div class="chip" data-id="tf-serving">TF Serving</div> |
|
<div class="chip" data-id="triton">NVIDIA Triton</div> |
|
<div class="chip" data-id="custom">Custom Container (FastAPI)</div> |
|
</div> |
|
</div> |
|
<div class="selection-group" data-group="orchestration"> |
|
<h4>3. Orchestration / Platform</h4> |
|
<div class="selection-chips"> |
|
<div class="chip active" data-id="kubernetes">Kubernetes</div> |
|
<div class="chip" data-id="vertex-ai">Managed: Vertex AI</div> |
|
<div class="chip" data-id="sagemaker">Managed: SageMaker</div> |
|
</div> |
|
</div> |
|
<div class="selection-group" data-group="hardware"> |
|
<h4>4. Hardware</h4> |
|
<div class="selection-chips"> |
|
<div class="chip" data-id="vm">VMs (CPU)</div> |
|
<div class="chip" data-id="gpu">GPU</div> |
|
<div class="chip" data-id="tpu">TPU</div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
|
|
<div id="genai-builder-fields" class="builder-fields"> |
|
<div class="selection-group" data-group="model-type"> |
|
<h4>0. Model Type</h4> |
|
<div class="selection-chips"> |
|
<div class="chip" data-id="llm">LLM</div> |
|
<div class="chip" data-id="vlm">Multimodal LLM (VLM)</div> |
|
<div class="chip" data-id="diffusion">Diffusion</div> |
|
</div> |
|
</div> |
|
<div class="selection-group" data-group="framework"> |
|
<h4>1. ML Framework</h4> |
|
<div class="selection-chips"> |
|
<div class="chip" data-id="pytorch">PyTorch</div> |
|
<div class="chip" data-id="tensorflow">TensorFlow</div> |
|
<div class="chip" data-id="jax">JAX</div> |
|
<div class="chip" data-id="keras">Keras</div> |
|
</div> |
|
</div> |
|
<div class="selection-group" data-group="serving"> |
|
<h4>2. Serving Container</h4> |
|
<div class="selection-chips"> |
|
<div class="chip" data-id="vllm">vLLM</div> |
|
<div class="chip" data-id="sglang">SGLang</div> |
|
<div class="chip" data-id="triton-trt-llm">NVIDIA Triton (TensorRT-LLM)</div> |
|
<div class="chip" data-id="custom">Custom Container (Diffusers, etc.)</div> |
|
</div> |
|
</div> |
|
<div class="selection-group" data-group="orchestration"> |
|
<h4>3. Orchestration / Platform</h4> |
|
<div class="selection-chips"> |
|
<div class="chip active" data-id="k8s-ray-kf">Kubernetes (KubeRay/Kubeflow)</div> |
|
<div class="chip" data-id="vertex-ai">Managed: Vertex AI</div> |
|
<div class="chip" data-id="sagemaker">Managed: SageMaker</div> |
|
</div> |
|
</div> |
|
<div class="selection-group" data-group="hardware"> |
|
<h4>4. Hardware</h4> |
|
<div class="selection-chips"> |
|
<div class="chip" data-id="gpu">GPU</div> |
|
<div class="chip" data-id="tpu">TPU</div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<button id="generate-btn">Generate Architecture Diagram</button> |
|
</div> |
|
|
|
<div id="architecture-diagram-output"></div> |
|
|
|
<h2 class="main-section-title"><i class="material-icons">menu_book</i>Reference Guides</h2> |
|
|
|
<h3 class="main-section-title" style="font-size: 1.8rem; border-color: var(--primary-color);"><i class="material-icons" style="color: var(--primary-color);">model_training</i>Classic ML</h3> |
|
<div class="tile-container"> |
|
<div class="tile" data-target="classic-pytorch"><img src="pytorch.png" class="tile-icon-img" alt="PyTorch Icon"><h4>PyTorch</h4></div> |
|
<div class="tile" data-target="classic-tensorflow"><img src="tensorflow.png" class="tile-icon-img" alt="TensorFlow Icon"><h4>TensorFlow</h4></div> |
|
<div class="tile" data-target="classic-sklearn"><img src="scikit-learn.png" class="tile-icon-img" alt="Scikit-learn Icon"><h4>Scikit-learn</h4></div> |
|
<div class="tile" data-target="classic-xgboost"><img src="xgboost.png" class="tile-icon-img" alt="XGBoost Icon"><h4>XGBoost</h4></div> |
|
<div class="tile" data-target="classic-jax"><img src="jax.png" class="tile-icon-img" alt="JAX Icon"><h4>JAX</h4></div> |
|
</div> |
|
|
|
<h3 class="main-section-title" style="font-size: 1.8rem; border-color: var(--genai-color);"><i class="material-icons" style="color: var(--genai-color);">auto_awesome</i>Generative AI</h3> |
|
<div class="tile-container"> |
|
<div class="tile" data-target="genai-llm"><img src="llm.png" class="tile-icon-img" alt="LLM Icon"><h4>LLMs</h4></div> |
|
<div class="tile" data-target="genai-vlm"><img src="vlm.png" class="tile-icon-img" alt="VLM Icon"><h4>Multimodal (VLMs)</h4></div> |
|
<div class="tile" data-target="genai-diffusion"><img src="diffusion.png" class="tile-icon-img" alt="Diffusion Icon"><h4>Diffusion Models</h4></div> |
|
</div> |
|
|
|
<div class="content-container"> |
|
|
|
<div id="classic-pytorch" class="content-panel"> |
|
<div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3> |
|
<p>A simple feed-forward network defined in PyTorch. The model's `state_dict` is saved for deployment.</p> |
|
<p class="code-block-header">model_setup.py</p> |
|
<pre><code>import torch |
|
import torch.nn as nn |
|
class SimpleNet(nn.Module): |
|
def __init__(self): |
|
super(SimpleNet, self).__init__() |
|
self.linear = nn.Linear(10, 1) |
|
def forward(self, x): return self.linear(x) |
|
model = SimpleNet() |
|
torch.save(model.state_dict(), "pytorch_model.pth")</code></pre> |
|
</div> |
|
<div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3> |
|
<p>Use a high-performance framework like FastAPI for a custom server. For dedicated solutions, TorchServe is the native choice, while Kubeflow KServe, Ray Serve, and NVIDIA Triton offer powerful, managed abstractions.</p> |
|
</div> |
|
<div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3> |
|
<p>Package the application with a multi-stage Dockerfile and define its runtime with Kubernetes Deployment, Service, and HPA objects. Managed platforms like Vertex AI abstract this away.</p> |
|
</div> |
|
<div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3> |
|
<p><strong>CPUs:</strong> Suitable for small networks. <strong>GPUs:</strong> Essential for deep learning models. <strong>TPUs:</strong> Best for massive-scale inference on GCP.</p> |
|
</div> |
|
</div> |
|
<div id="classic-tensorflow" class="content-panel"> |
|
<div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3> |
|
<p>A simple Keras model saved in TensorFlow's `SavedModel` format, which bundles the architecture and weights.</p> |
|
<p class="code-block-header">model_setup.py</p> |
|
<pre><code>import tensorflow as tf |
|
model = tf.keras.Sequential([ |
|
tf.keras.layers.Dense(10, activation='relu', input_shape=(10,)), |
|
tf.keras.layers.Dense(1) |
|
]) |
|
model.save("tf_saved_model")</code></pre> |
|
</div> |
|
<div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3> |
|
<p>TF Serving and Kubeflow KServe offer native, high-performance support for the `SavedModel` format. NVIDIA Triton is also highly optimized for TF models. A custom FastAPI server is another flexible option.</p> |
|
</div> |
|
<div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3> |
|
<p>The Kubernetes configuration is very similar to other frameworks. Ensure your Dockerfile copies the entire `tf_saved_model` directory.</p> |
|
</div> |
|
<div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3> |
|
<p><strong>CPUs:</strong> Good for smaller Keras models. <strong>GPUs:</strong> Highly recommended for deep learning models. <strong>TPUs:</strong> The premier choice for running TensorFlow models at scale on GCP.</p> |
|
</div> |
|
</div> |
|
<div id="classic-sklearn" class="content-panel"> |
|
<div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3> |
|
<p>A classic logistic regression model. Serialization is typically done with `joblib` for efficiency with NumPy structures.</p> |
|
<p class="code-block-header">model_setup.py</p> |
|
<pre><code>import joblib |
|
from sklearn.linear_model import LogisticRegression |
|
from sklearn.datasets import make_classification |
|
X, y = make_classification(n_features=4) |
|
model = LogisticRegression().fit(X, y) |
|
joblib.dump(model, "sklearn_model.joblib")</code></pre> |
|
</div> |
|
<div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3> |
|
<p>FastAPI provides a simple and fast web server. Kubeflow KServe and Ray Serve also have native support for scikit-learn models. NVIDIA Triton is an option for CPU-optimized execution using its FIL backend.</p> |
|
</div> |
|
<div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3> |
|
<p>Standard Kubernetes setup. The Docker container will be lightweight as it only needs `scikit-learn`, `joblib`, and `fastapi` for a custom server.</p> |
|
</div> |
|
<div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3> |
|
<p><strong>CPUs:</strong> Almost always sufficient. There is no GPU acceleration for standard scikit-learn algorithms.</p> |
|
</div> |
|
</div> |
|
<div id="classic-xgboost" class="content-panel"> |
|
<div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>An XGBoost model saved in its native JSON or UBJ format, which is portable and efficient.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Kubeflow KServe, Ray Serve, NVIDIA Triton (with FIL backend), and custom FastAPI servers are all excellent choices.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Standard Kubernetes setup. The Dockerfile should include the `xgboost` library.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>CPUs:</strong> Excellent performance. <strong>GPUs:</strong> XGBoost has optional GPU acceleration which can provide a significant speedup.</p></div> |
|
</div> |
|
<div id="classic-jax" class="content-panel"> |
|
<div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>JAX models are often defined as pure functions with parameters handled separately. We save the parameters using a standard serialization library like Flax's `msgpack`.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Ray Serve is an excellent fit for JAX's functional paradigm. A custom FastAPI server is also straightforward. Kubeflow KServe and NVIDIA Triton require a custom container approach wrapping the JAX logic.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>The Dockerfile needs to install `jax` and `jaxlib` corresponding to the target hardware (CPU, GPU, or TPU).</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>CPUs/GPUs/TPUs:</strong> JAX was designed for accelerators and excels on all of them due to its XLA-based compilation.</p></div> |
|
</div> |
|
|
|
|
|
<div id="genai-llm" class="content-panel"> |
|
<div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>Large Language Models (e.g., Llama, Mistral) are based on the Transformer architecture. The key inference challenge is managing the <strong>KV Cache</strong>.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Specialized serving toolkits like <strong>vLLM</strong>, <strong>SGLang</strong>, or <strong>NVIDIA Triton</strong> with its TensorRT-LLM backend are required for efficient inference, handling complexities like continuous batching and paged attention.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Kubernetes (often with KubeRay) is used to manage GPU resources and schedule serving pods. Managed services like Vertex AI and SageMaker also provide optimized runtimes for popular LLMs.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>GPUs:</strong> Essential. High-VRAM GPUs like NVIDIA A100 or H100 are required to fit the model weights and KV cache. <strong>TPUs:</strong> Viable for specific models, especially on GCP.</p></div> |
|
</div> |
|
<div id="genai-vlm" class="content-panel"> |
|
<div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>Visual Large Models (e.g., LLaVA, IDEFICS) combine a vision encoder (like ViT) with an LLM to process images and text.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>The stack must handle multi-modal inputs. Frameworks like <strong>vLLM</strong> and <strong>SGLang</strong> are adding native support for VLMs. A custom container is often needed to handle the specific image preprocessing logic.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Similar to LLMs, requires robust orchestration to manage high-resource GPU pods and potentially large input payloads.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>GPUs:</strong> High-VRAM GPUs are mandatory due to the combined size of the vision encoder, LLM, and KV cache.</p></div> |
|
</div> |
|
<div id="genai-diffusion" class="content-panel"> |
|
<div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>Diffusion models (e.g., Stable Diffusion) generate images through an iterative denoising process, making latency a key challenge.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Optimizations focus on reducing latency. Key tools include model compilers like <strong>TensorRT</strong> (often used with NVIDIA Triton), techniques like <strong>Latent Consistency Models (LCMs)</strong>, and libraries like <strong>Diffusers</strong>, typically wrapped in a custom FastAPI container.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Kubernetes or managed platforms are used to serve the GPU-intensive workload. Autoscaling is critical to handle bursty traffic patterns.</p></div> |
|
<div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>GPUs:</strong> High-end consumer or datacenter GPUs are needed for acceptable generation speeds. VRAM is the most critical resource, dictating max resolution and batch size.</p></div> |
|
</div> |
|
</div> |
|
</main> |
|
</div> |
|
|
|
<script> |
|
document.addEventListener('DOMContentLoaded', function() { |
|
const builder = document.getElementById('architecture-builder'); |
|
const generateBtn = document.getElementById('generate-btn'); |
|
const diagramOutput = document.getElementById('architecture-diagram-output'); |
|
|
|
const archTypeSelector = builder.querySelector('.arch-type-selector'); |
|
const classicFields = document.getElementById('classic-builder-fields'); |
|
const genaiFields = document.getElementById('genai-builder-fields'); |
|
|
|
function updateChipStates() { |
|
const activeArchType = archTypeSelector.querySelector('.active').dataset.type; |
|
const activeBuilderFields = (activeArchType === 'classic') ? classicFields : genaiFields; |
|
|
|
if (activeArchType === 'classic') { |
|
const activeFramework = activeBuilderFields.querySelector('.selection-group[data-group="framework"] .chip.active'); |
|
const torchserveChip = activeBuilderFields.querySelector('.chip[data-id="torchserve"]'); |
|
const tfservingChip = activeBuilderFields.querySelector('.chip[data-id="tf-serving"]'); |
|
|
|
[torchserveChip, tfservingChip].forEach(c => c.classList.remove('disabled')); |
|
|
|
if (activeFramework) { |
|
const frameworkId = activeFramework.dataset.id; |
|
const nonTfTsFrameworks = ['scikit-learn', 'xgboost', 'jax']; |
|
if (frameworkId === 'pytorch') { |
|
tfservingChip.classList.add('disabled'); |
|
if(tfservingChip.classList.contains('active')) tfservingChip.classList.remove('active'); |
|
} else if (frameworkId === 'tensorflow') { |
|
torchserveChip.classList.add('disabled'); |
|
if(torchserveChip.classList.contains('active')) torchserveChip.classList.remove('active'); |
|
} else if (nonTfTsFrameworks.includes(frameworkId)) { |
|
[torchserveChip, tfservingChip].forEach(c => { |
|
c.classList.add('disabled'); |
|
if(c.classList.contains('active')) c.classList.remove('active'); |
|
}); |
|
} |
|
} |
|
} else { |
|
const activeModelType = activeBuilderFields.querySelector('.selection-group[data-group="model-type"] .chip.active'); |
|
const vllmChip = activeBuilderFields.querySelector('.chip[data-id="vllm"]'); |
|
const sglangChip = activeBuilderFields.querySelector('.chip[data-id="sglang"]'); |
|
|
|
[vllmChip, sglangChip].forEach(c => c.classList.remove('disabled')); |
|
|
|
if (activeModelType && activeModelType.dataset.id === 'diffusion') { |
|
[vllmChip, sglangChip].forEach(c => { |
|
c.classList.add('disabled'); |
|
if(c.classList.contains('active')) c.classList.remove('active'); |
|
}); |
|
} |
|
} |
|
|
|
const activeOrchestration = activeBuilderFields.querySelector('.selection-group[data-group="orchestration"] .chip.active'); |
|
const servingGroup = activeBuilderFields.querySelector('.selection-group[data-group="serving"]'); |
|
|
|
if (activeOrchestration && (activeOrchestration.dataset.id === 'vertex-ai' || activeOrchestration.dataset.id === 'sagemaker')) { |
|
servingGroup.classList.add('disabled'); |
|
servingGroup.querySelector('.chip.active')?.classList.remove('active'); |
|
} else { |
|
servingGroup.classList.remove('disabled'); |
|
} |
|
} |
|
|
|
archTypeSelector.addEventListener('click', function(e){ |
|
if (!e.target.classList.contains('arch-type-chip')) return; |
|
archTypeSelector.querySelectorAll('.arch-type-chip').forEach(c => c.classList.remove('active')); |
|
e.target.classList.add('active'); |
|
const type = e.target.dataset.type; |
|
classicFields.classList.toggle('active', type === 'classic'); |
|
genaiFields.classList.toggle('active', type === 'gen-ai'); |
|
diagramOutput.style.display = 'none'; |
|
updateChipStates(); |
|
}); |
|
|
|
builder.addEventListener('click', function(e) { |
|
if (!e.target.classList.contains('chip') || e.target.classList.contains('disabled')) return; |
|
const chip = e.target; |
|
const group = chip.closest('.selection-group'); |
|
if (group.classList.contains('disabled')) return; |
|
group.querySelectorAll('.chip').forEach(c => c.classList.remove('active')); |
|
chip.classList.add('active'); |
|
updateChipStates(); |
|
}); |
|
|
|
generateBtn.addEventListener('click', function() { |
|
const activeArchType = archTypeSelector.querySelector('.active').dataset.type; |
|
const activeBuilderFields = document.querySelector('.builder-fields.active'); |
|
const selections = {}; |
|
let allSelected = true; |
|
|
|
const isManaged = activeBuilderFields.querySelector('.selection-group[data-group="orchestration"] .chip.active')?.dataset.id.includes('vertex') || |
|
activeBuilderFields.querySelector('.selection-group[data-group="orchestration"] .chip.active')?.dataset.id.includes('sagemaker'); |
|
|
|
activeBuilderFields.querySelectorAll('.selection-group').forEach(group => { |
|
const groupKey = group.dataset.group; |
|
if (isManaged && groupKey === 'serving') return; |
|
|
|
const activeChip = group.querySelector('.chip.active'); |
|
if (activeChip) { |
|
selections[groupKey] = { name: activeChip.innerText, id: activeChip.dataset.id }; |
|
} else { |
|
allSelected = false; |
|
} |
|
}); |
|
|
|
if (!allSelected) { |
|
alert('Please make a selection for each required layer.'); |
|
return; |
|
} |
|
|
|
let diagramHtml = `<h3 class="diagram-title">Your Custom ${activeArchType === 'gen-ai' ? 'Generative AI' : 'Classic ML'} Architecture</h3><div class="diagram-stack">`; |
|
const arrowClass = activeArchType === 'gen-ai' ? 'gen-ai-arrow' : ''; |
|
const layerClass = activeArchType === 'gen-ai' ? 'gen-ai-layer' : ''; |
|
|
|
function createImageTag(selection) { |
|
return `<img src="${selection.id}.png" alt="${selection.name} Icon" class="icon-img-placeholder">`; |
|
} |
|
|
|
if (activeArchType === 'gen-ai') { |
|
diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections['model-type'].name}</h5><p>Model Type</p>${createImageTag(selections['model-type'])}</div><div class="diagram-arrow ${arrowClass}">south</div>`; |
|
} |
|
|
|
diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.framework.name}</h5><p>ML Framework</p>${createImageTag(selections.framework)}</div><div class="diagram-arrow ${arrowClass}">south</div>`; |
|
|
|
if (isManaged) { |
|
diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.orchestration.name}</h5><p>Managed Platform</p>${createImageTag(selections.orchestration)}</div><div class="diagram-arrow ${arrowClass}">south</div>`; |
|
} else { |
|
diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.serving.name}</h5><p>Serving Container</p>${createImageTag(selections.serving)}</div><div class="diagram-arrow ${arrowClass}">south</div>`; |
|
diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.orchestration.name}</h5><p>Orchestration</p>${createImageTag(selections.orchestration)}</div><div class="diagram-arrow ${arrowClass}">south</div>`; |
|
} |
|
|
|
diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.hardware.name}</h5><p>Hardware</p>${createImageTag(selections.hardware)}</div>`; |
|
diagramHtml += `</div>`; |
|
|
|
diagramOutput.innerHTML = diagramHtml; |
|
diagramOutput.style.display = 'block'; |
|
diagramOutput.scrollIntoView({ behavior: 'smooth', block: 'center' }); |
|
}); |
|
|
|
const tiles = document.querySelectorAll('.tile'); |
|
const contentPanels = document.querySelectorAll('.content-panel'); |
|
tiles.forEach(tile => tile.addEventListener('click', (e) => { |
|
const targetId = e.currentTarget.dataset.target; |
|
tiles.forEach(t => t.classList.remove('active')); |
|
e.currentTarget.classList.add('active'); |
|
contentPanels.forEach(p => p.classList.remove('active')); |
|
const panel = document.getElementById(targetId); |
|
if (panel) { |
|
panel.classList.add('active'); |
|
panel.scrollIntoView({ behavior: 'smooth', block: 'start' }); |
|
} |
|
})); |
|
|
|
document.querySelectorAll('pre code').forEach(codeBlock => { |
|
const pre = codeBlock.parentElement; |
|
if (!pre.querySelector('.copy-btn')) { |
|
const copyButton = document.createElement('button'); |
|
copyButton.innerText = 'Copy'; |
|
copyButton.className = 'copy-btn'; |
|
pre.appendChild(copyButton); |
|
copyButton.addEventListener('click', (e) => { |
|
e.stopPropagation(); |
|
navigator.clipboard.writeText(codeBlock.innerText).then(() => { |
|
copyButton.innerText = 'Copied!'; |
|
copyButton.classList.add('copied'); |
|
setTimeout(() => { copyButton.innerText = 'Copy'; copyButton.classList.remove('copied'); }, 2000); |
|
}); |
|
}); |
|
} |
|
}); |
|
updateChipStates(); |
|
}); |
|
</script> |
|
</body> |
|
</html> |