Unable to connect the correct model yaml and checkpoint
#1
by
Steven8686
- opened
Hi there,
Thanks to your great work. However, when I use your inference demo presented in README, I encountered an error
File ".../lib/python3.11/site-packages/torch/nn/modules/module.py", line 1931, in getattr
raise AttributeError(
AttributeError: 'AAttn' object has no attribute 'qk'. Did you mean: 'qkv'?
I've checked the issues (https://github.com/sunsmarterjie/yolov12/issues/109) in YOLOv12 official repo, they said this means the .pt you provided is likely a turbo model. What's the exact yaml in your repo used?
Thanks a lot!
I am definitely using the "l" variant
Are you running this with "cuda" or something else?
Here is my code i am using to run this with cuda on an nvidia GPU:
.....truncated
from ultralytics import YOLO
.....truncated
model = None
def setup_model(**kwargs):
"""Initialize YOLO model with optimizations."""
global model
# Define persistent model path for container volume
persistent_model_path = "/data/models/yolo12l-person-seg-extended.pt"
# Check if model exists in persistent storage
if os.path.exists(persistent_model_path):
logger.info(
f"Loading YOLO12 segmentation model from persistent storage: {persistent_model_path}"
)
try:
model = YOLO(persistent_model_path)
logger.info("Segmentation model loaded from persistent volume successfully")
except Exception as e:
logger.error(
"Failed to load custom segmentation model",
error=str(e),
model_path=persistent_model_path,
traceback=True,
)
raise RuntimeError(
f"Could not load required model from {persistent_model_path}: {str(e)}"
)
else:
# Custom model not found - critical error as we have no fallback
error_msg = f"CRITICAL: Required model not found at {persistent_model_path}"
logger.critical(
error_msg,
required_model="yolo12l-person-seg.pt",
check_paths=["/data/models/", "/app/", "./"],
container_name=os.getenv("HOSTNAME", "unknown"),
)
raise FileNotFoundError(error_msg)
# Check for available GPU acceleration
if torch.cuda.is_available():
logger.info(
"CUDA is available, using NVIDIA GPU acceleration",
cuda_version=torch.version.cuda,
device_name=torch.cuda.get_device_name(0),
)
# Configure CUDA optimizations
try:
# This optimizes matrix multiplications
torch.set_float32_matmul_precision("high")
logger.info("Set float32 matmul precision to high")
# FlashAttention should be pre-installed in the container image
try:
import flash_attn
logger.info(
"FlashAttention is available and will be used automatically",
version=flash_attn.__version__,
)
flash_attn_available = True
except ImportError:
logger.warning(
"FlashAttention not found despite being in the base image - check build process",
warning="This is unexpected and may indicate an issue with the Docker image build",
)
flash_attn_available = False
except Exception as e:
logger.warning("Error setting up CUDA optimizations", error=str(e))
try:
# Move model to CUDA
model.to("cuda")
logger.info("Model successfully moved to CUDA device")
# Test inference with FP16 to verify it works
dummy_input = torch.zeros((1, 3, 640, 640), device="cuda")
# Log if using FlashAttention
if flash_attn_available:
logger.info("Running inference with FlashAttention and FP16 support")
else:
logger.info(
"Running inference with standard attention and FP16 support"
)
test_result = model.predict(dummy_input, device="cuda", half=True)
logger.info("Successfully tested FP16 inference")
except Exception as e:
logger.warning(
"Failed to test FP16 precision, will still try at runtime", error=str(e)
)
# Ensure model is still on CUDA even if test fails
model.to("cuda")
else:
logger.info("No GPU acceleration available, falling back to CPU")
logger.info("YOLO12 model initialized successfully")