File size: 3,323 Bytes
253b29a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
---
language:
- en
metrics:
- accuracy
- f1
- precision
- recall
pipeline_tag: video-classification
tags:
- i3d
- pytorch
- crime-detection
---
# Smart Surveillance System
we leveraged a pre-trained I3D model and fine-tuned it using two strategies:
Block-level tuning Adjusting and retraining groups of layers (blocks) to adapt the model to the new dataset.
Layer-level tuning Fine-tuning specific layers for more granular control over feature learning.
The final classification layer of the I3D model was removed and replaced with a custom output layer tailored to our binary classification task: predicting whether an activity represents a crime (1) or non-crime (0).
## How Run
```python
import torch
import torch.nn as nn
class UCFModel(nn.Module):
def __init__(self, model_name="i3d_r50"):
super().__init__()
self.model_name = model_name
self.model = torch.hub.load("facebookresearch/pytorchvideo", model_name, pretrained=True)
in_features = self.model.blocks[-1].proj.in_features
self.model.blocks[-1].proj = nn.Linear(in_features, 2)
def forward(self, frames):
return self.model(frames)
```
```python
import torch
from PIL import Image
from huggingface_hub import hf_hub_download
from torchvision import transforms
inference_transform = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
)
class UCFInferenceByFrames:
def __init__(self, repo_id):
self.repo_id = repo_id
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = self.load_model()
def load_model(self):
model_path = hf_hub_download(repo_id=self.repo_id, filename="ucf_model.pth")
state_dict = torch.load(model_path)
model = UCFModel().to(device=self.device)
model.load_state_dict(state_dict)
model.eval()
return model
def inference(self, frames):
video_tensor_list = []
for frame in frames:
frame_pil = Image.fromarray(frame)
frame_tensor = inference_transform(frame_pil)
video_tensor_list.append(frame_tensor)
video_tensor = torch.stack(video_tensor_list)
video_tensor = video_tensor.permute(1, 0, 2, 3).unsqueeze(0).float()
video_tensor = video_tensor.to(self.device)
with torch.no_grad():
output = self.model(video_tensor)
return output.argmax(1)
```
```python
import cv2 as cv
import numpy as np
ucf = UCFInferenceByFrames("amjad-awad/ucf-i3d-model-by-block-lr-0.01")
def inference(ucf_model, video_path, max_frames=16):
cap = cv.VideoCapture(video_path)
if not cap.isOpened():
print("No video")
return
frames = []
while True:
ret, frame = cap.read()
if not ret:
break
frames.append(frame)
length = len(frames)
indices = np.linspace(0, length - 1, max_frames, dtype=int)
frames = [frames[i] for i in indices]
predict = ucf_model.inference(frames)
return "Crime" if int(predict) == 1 else "No-Crime"
```
```python
predict = inference(ucf_model=ucf, video_path="YOUR_VIDEO_PATH.mp4")
print(predict)
``` |