Sebastien De Greef commited on
Commit
d5c3317
·
1 Parent(s): 7d64a6c

Create index.html for AI portfolio and add projects.json

Browse files

Add object detection page with YOLO implementation
Add support for PNG images in Git LFS

.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
images/object-detection-image1.PNG ADDED

Git LFS Details

  • SHA256: e8c2da55b4e78250e5a920923a4ed309c1578d6439e0c47d7bd1e87237a7c21e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.27 MB
images/object-detection-segmentation-1.PNG ADDED

Git LFS Details

  • SHA256: 1ede2c0ae778484c7525be932c4b67daa412d7261421ba947807533e5c79f2c8
  • Pointer size: 131 Bytes
  • Size of remote file: 673 kB
images/object-detection-segmentation-2.PNG ADDED

Git LFS Details

  • SHA256: 7c6696726ab92cc5752161c60e348e313df2e1a44b59c46b6d50b57287749fcb
  • Pointer size: 131 Bytes
  • Size of remote file: 758 kB
images/object-detection-segmentation-3.PNG ADDED

Git LFS Details

  • SHA256: a07db8ff0e66458a7a9e1a8cebe3bb28b3afe44a6ab6b0cf0aae7432dca89346
  • Pointer size: 131 Bytes
  • Size of remote file: 412 kB
images/object-detection-training-results.png ADDED

Git LFS Details

  • SHA256: 2d0ee75675c980f89dfceaf783703bd54df813ff9fdf4bdd8f46518daf3a9ca1
  • Pointer size: 131 Bytes
  • Size of remote file: 265 kB
index.html CHANGED
@@ -3,17 +3,51 @@
3
  <head>
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
  <link rel="stylesheet" href="style.css" />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  </head>
9
  <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
 
 
 
18
  </body>
19
- </html>
 
3
  <head>
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width" />
6
+ <title>AI Portfolio on Huggingface</title>
7
  <link rel="stylesheet" href="style.css" />
8
+ <script>
9
+ document.addEventListener("DOMContentLoaded", function () {
10
+ fetch("projects.json")
11
+ .then((response) => response.json())
12
+ .then((data) => {
13
+ const projects = data;
14
+ const projectsContainer = document.getElementById("projects");
15
+ projects.forEach((project) => {
16
+ const projectCard = document.createElement("section");
17
+ projectCard.classList.add("project-card");
18
+ const title = document.createElement("h2");
19
+ title.textContent = project.title;
20
+ const description = document.createElement("p");
21
+ description.textContent = project.description;
22
+ const link = document.createElement("a");
23
+ link.href = project.link;
24
+ link.target = "_blank";
25
+ link.textContent = "View Project";
26
+
27
+ projectCard.appendChild(title);
28
+ projectCard.appendChild(description);
29
+ projectCard.appendChild(link);
30
+
31
+ projectsContainer.appendChild(projectCard);
32
+ });
33
+
34
+ });
35
+ });
36
+
37
+
38
+ </script>
39
  </head>
40
  <body>
41
+ <header>
42
+ <h1>Welcome to My AI Portfolio</h1>
43
+ <p>Discover my projects and experiments with Artificial Intelligence on Huggingface.</p>
44
+ </header>
45
+ <article id="projects">
46
+ </article>
47
+ <footer>
48
+ <a href="https://www.linkedin.com/in/sebdg/" target="_blank">
49
+ <img src="linkedin.png" width="24" alt="LinkedIn Icon" />&nbsp; My LinkedIn Profile
50
+ </a>
51
+ </footer>
52
  </body>
53
+ </html>
linkedin.png ADDED

Git LFS Details

  • SHA256: d4defd51b5002c29c0ab618064cac2553e71a50f3e74f0e0e3780addc7094e11
  • Pointer size: 129 Bytes
  • Size of remote file: 3.06 kB
object-detection.html ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width" />
6
+ <title>Object Detection with YOLO</title>
7
+ <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
8
+ <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
9
+ <link rel="stylesheet" href="style.css" />
10
+ <link
11
+ rel="stylesheet"
12
+ href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release/build/styles/default.min.css"
13
+ />
14
+ <script src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release/build/highlight.min.js"></script>
15
+ <script>
16
+ class BC {
17
+ constructor(elementId) {
18
+ this.container = document.getElementById(elementId);
19
+ this.headings = document.querySelectorAll("h1, h2, h3, h4");
20
+ this.currentHeading = null;
21
+ }
22
+ set_breadcrumb() {
23
+ const headings = document.querySelectorAll("h1, h2, h3, h4"); // Select all heading elements
24
+ let currentHeading = null;
25
+
26
+ // Iterate through headings to see which is currently viewable
27
+ for (let i = 0; i < headings.length; i++) {
28
+ const heading = headings[i];
29
+ if (
30
+ heading.getBoundingClientRect().top <
31
+ window.innerHeight * 0.1
32
+ ) {
33
+ // Heading is at the top of the page
34
+ currentHeading = heading;
35
+ } else {
36
+ break; // Once a heading below the top is found, stop the search
37
+ }
38
+ }
39
+
40
+ // Update the breadcrumb div with the current heading information
41
+ const breadcrumb = document.getElementById("breadcrumb");
42
+ if (currentHeading) {
43
+ breadcrumb.textContent = currentHeading.textContent; // Set text or build a more complex breadcrumb
44
+ }
45
+ }
46
+ }
47
+ console.log(marked);
48
+ document.addEventListener("DOMContentLoaded", function () {
49
+ fetch("object-detection.md")
50
+ .then((response) => response.text())
51
+ .then((text) => {
52
+ const html = marked.marked(text);
53
+ document.getElementById("markdown-container").innerHTML = html;
54
+ document.querySelectorAll("pre code").forEach((block) => {
55
+ hljs.highlightBlock(block);
56
+ });
57
+ const bc = new BC("markdown-container");
58
+ bc.set_breadcrumb();
59
+ document.addEventListener("scroll", bc.set_breadcrumb);
60
+ })
61
+ .catch((error) =>
62
+ console.error("Error loading the Markdown file:", error)
63
+ );
64
+ });
65
+ </script>
66
+ </head>
67
+ <body>
68
+ <div id="breadcrumb"></div>
69
+ <div id="markdown-container"></div>
70
+ </body>
71
+ </html>
object-detection.md ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Object Detection with YOLO
2
+
3
+ ## Introduction
4
+
5
+ In the realm of AI, the rapid advancement of technology, computer-vision has paved the way for innovations that could significantly enhance automated systems
6
+ and improve public safety, especially in public transportation.
7
+
8
+ Among the myriad of object detection systems and models, [Yolo](https://docs.ultralytics.com/) (You Only Look Once) stands out due to its unique ability to detect objects in real-time with remarkable accuracy and low computing requirements. The small size of the models makes them optimal for deployment and use in on-board equiment with limited resouces. While remaining very performant and a good fit in dynamic environments where rapid decision-making is crucial, such as in public transportation and autonomous driving.
9
+
10
+ This experiment, led by a job openning I was applying to delves into the application of the YOLO object detection model on cab ride videos from trains and trams. These videos, were captured directly from the front-facing cameras of public transit vehicles and posted on Youtube, offer a rich dataset that reflects the diverse and unpredictable urban environment through which these vehicles travel.
11
+
12
+ The primary goal of utilizing such a model in this context is to enhance transportation safety and operational efficiency by identifying potential hazards and improving route management based on real-world data. Other applications could be related to line-optimizations, passenger counting, track condition analysis and of course full autonomous driving.
13
+
14
+ While exploring the process of training such a YOLO model with these specific videos, I wanted to document this experiment to highlight the challenges faced during the model's adaptation to the peculiarities of railway and tram systems. While keeping the dataset and effort rather limited I want to demonstrate my amazing skills, humm... lol, not really, but demonstrate how such rather complex tasks can be done with a rather limited set of human annotations and intervention using foundational models to train very specialized ones.
15
+
16
+ ## Methodology
17
+
18
+ The methodology used to train the vision model using cab ride videos encompasses several critical steps, from data collection to model training and validation.
19
+ Each step is vital, and it's quality will influence the model's accuracy and functionality in real-world scenarios.
20
+ This is _just_ an experiment and a real-world model would require more data, quality assurance and intermediate models or specific models for sub-tasks.
21
+
22
+ ### Data Collection
23
+
24
+ The primary data for this project consists of YouTube cab ride videos recorded in trains and trams. These videos are typically captured through front-facing cameras mounted on the vehicles, providing a driver's-eye view of the route. These footage includes diverse scenes from urban and rural settings under various weather and lighting conditions making them good source of input data.
25
+
26
+ ### Data Characteristics
27
+
28
+ The videos are characterized by high-resolution imagery that captures details necessary for accurate object detection, such as obstacles on tracks, signals, and other vehicles. The collection spans a couple of hours of footage, ensuring a comprehensive dataset that includes a wide range of scenarios and anomalies. These videos are often very lengthy and are including stops, tunnels or portions of very similar frames. To select which video segments are interesting and offer a variety of situation as a first step we will split them into segments.
29
+
30
+ ```python
31
+ def split_video(video_path, output_folder, segment_length=120):
32
+ """Split a video file into segments of a fixed length and save them as separate files."""
33
+ # Load the video
34
+ video = VideoFileClip(video_path)
35
+ duration = video.duration
36
+
37
+ # create the output folder
38
+ if not os.path.exists(output_folder):
39
+ os.makedirs(output_folder)
40
+
41
+ # Calculate the number of segments needed
42
+ num_segments = math.ceil(duration / segment_length)
43
+ print(f"Splitting the video into {num_segments} segments.")
44
+ # Loop through all segments
45
+ for i in range(num_segments):
46
+ # Calculate start and end times
47
+ start_time = i * segment_length
48
+ end_time = min((i + 1) * segment_length, duration)
49
+
50
+ # Cut the segment
51
+ segment = video.subclip(start_time, end_time)
52
+
53
+ video_name = os.path.basename(video_path)
54
+
55
+ # Define the output file name
56
+ output_filename = f"{output_folder}/{video_name[:-4]}_{i+1}.mp4"
57
+
58
+ # Write the segment to a file
59
+ segment.write_videofile(output_filename, codec='libx264', audio_codec='aac')
60
+
61
+ print(f"Segment {i+1} is done.")
62
+ ```
63
+
64
+ ### Preparation and Preprocessing
65
+
66
+ Now that we have a set of videos of 2 minutes each lets only focus on those having relevant content, a couple of techniques to identify frame similarity came to my mind but they would not highlight sequences having different objects or situation. So I quickly shuffled through the videos manually and kept about 15 miinutes of footage to analyse from the 7 hours of initial footprint, covering urban and rural situation, trams and trains, with or without other vehicules or pedestrians.
67
+
68
+ #### Frame extraction
69
+
70
+ Due to the continuous nature of video files, the next step involves extracting _some_ frames at a fixed interval (strides). This
71
+ process reduces the volume of data to a manageable size for annotation and training.
72
+
73
+ ```python
74
+
75
+ def extract_frames(video_path, output_folder, stride=12):
76
+ """Extract frames from a video file and save them as PNG images."""
77
+ # load the video
78
+ cap = cv2.VideoCapture(video_path)
79
+ if not cap.isOpened():
80
+ print("Error: could not open the video.")
81
+ return
82
+
83
+ # create the output folder
84
+ if not os.path.exists(output_folder):
85
+ os.makedirs(output_folder)
86
+
87
+ # extract frames
88
+ frame_count = 0
89
+ while True:
90
+ ret, frame = cap.read()
91
+ if not ret:
92
+ break
93
+ frame_count += 1
94
+ if frame_count % stride != 0:
95
+ continue
96
+ frame_path = os.path.join(output_folder, f"{frame_count:06d}.png")
97
+ cv2.imwrite(frame_path, frame)
98
+
99
+ print(f"Extracted {frame_count} frames to {output_folder}")
100
+
101
+ ```
102
+
103
+ #### Annotation Process
104
+
105
+ ![Label Studio Annotations](images/object-detection-image1.png "Labelling interface in Label Studio")
106
+
107
+ From the 15minutes I've selected as less as 170 frames again creating small but comprehensive set of situations and conditions to be labelled.
108
+ Each of these frames is then manually annotated by a team of trained annotators (Me, Myself and I) using label-studio. This involves identifying and labeling various objects of interest, such as pedestrians, vehicles, signals, signs, rails etc. The annotations are exported in the YOLO format, which includes bounding boxes and object class labels.
109
+
110
+ ```html
111
+ <View>
112
+ <image name="image" value="$image" />
113
+ <RectangleLabels name="label" toName="image">
114
+ <label value="TrafficLight" background="#FFA39E" />
115
+ <label value="Train" background="#FFC069" />
116
+ <label value="CargoWagon" background="#AD8B00" />
117
+ <label value="Sign" background="#a09eff" />
118
+ <label value="Pedestrian" background="#cf0c02" />
119
+ <label value="Car" background="#0dd311" />
120
+ <label value="Bike" background="#fb8313" />
121
+ <label value="Tram" background="#FFA39E" />
122
+ <label value="Bus" background="#D4380D" />
123
+ </RectangleLabels>
124
+ </View>
125
+ ```
126
+
127
+ ## Model Training
128
+
129
+ The YOLOv8n (nano) model is selected for this project due to its balance of speed and accuracy, making it suitable for real-time detection tasks.
130
+ YOLOv8 is known for its improved performance over previous versions through enhancements in architecture and training techniques.
131
+
132
+ ### Dataset Configuration
133
+
134
+ ```yaml
135
+ path: ../object-detection/datasets
136
+ train: detection
137
+ val: detection
138
+ names:
139
+ 0: Bike
140
+ 1: Bus
141
+ 2: Car
142
+ 3: CargoWagon
143
+ 4: Pedestrian
144
+ 5: Sign
145
+ 6: TrafficLight
146
+ 7: Train
147
+ 8: Tram
148
+ ```
149
+
150
+ ### Training Process
151
+
152
+ The training process involves feeding the annotated frames into the YOLO model. Data augmentation techniques such as rotation, scaling, and color
153
+ adjustment are employed to improve the model’s robustness by simulating various operational scenarios. The model undergoes several iterations of
154
+ training and validation cycles to minimize overfitting and enhance its generalization capabilities.
155
+
156
+ ```bash
157
+ yolo.exe train detect data=trainz.detect.yaml model=yolov8n.pt
158
+ ```
159
+
160
+ ### Validation and Testing
161
+
162
+ Post-training, the model is validated using a separate set of video frames that were not included in the training set. This step is crucial to evaluate the model's performance and accuracy in detecting objects under different conditions.
163
+
164
+ ```bash
165
+ Validating runs\detection\weights\best.pt...
166
+ Ultralytics YOLOv8.1.47 🚀 Python-3.11.9 torch-2.2.2+cpu CPU
167
+ Model summary (fused): 168 layers, 3007403 parameters, 0 gradients, 8.1 GFLOPs
168
+ Class Images Instances Box(P R mAP50 mAP50-95): 100%|██| 6/6 [2.24s/it]
169
+ all 177 1011 0.955 0.876 0.931 0.755
170
+ Bike 177 4 1 0.644 0.764 0.705
171
+ Bus 177 11 1 0.886 0.995 0.895
172
+ Car 177 389 0.943 0.925 0.975 0.761
173
+ CargoWagon 177 33 0.869 0.848 0.865 0.677
174
+ Pedestrian 177 149 0.901 0.94 0.963 0.734
175
+ Sign 177 212 0.949 0.789 0.898 0.604
176
+ TrafficLight 177 157 0.964 0.924 0.969 0.701
177
+ Train 177 34 0.992 0.971 0.975 0.856
178
+ Tram 177 22 0.976 0.955 0.978 0.862
179
+ Speed: 0.9ms preprocess, 55.5ms inference, 0.0ms loss, 0.7ms postprocess per image
180
+ Results saved to runs\detection\
181
+ ```
182
+
183
+ ### Performance Metrics
184
+
185
+ The effectiveness of the trained model is measured using standard metrics such as precision, recall, and Intersection over Union (IoU).
186
+ In the validation and testing phase of training a YOLO vision model, it is essential to measure its performance to ensure it can reliably identify objects under various conditions. Two critical metrics used for this purpose are precision and recall. These metrics provide insight into the model's accuracy and its ability to detect all relevant objects within the video frames. Note the performance of the model without any futher optimization or attention, the 55ms of inference time, reaching ~20 frames per second without any GPU assistance is a very good performance, futher parameter fine-tuning or resolution reductions could make significant differences but are beyond the scope of this POC.
187
+
188
+ ![Training Results](images/object-detection-training-results.png "Training result plots")
189
+
190
+ **Precision** (or positive predictive value) measures the accuracy of the detections made by the model. In the context of the YOLO vision model for cab ride videos, precision reflects the proportion of correct positive detections out of all positive detections made. For example, if the model identifies 100 objects as vehicles and 90 of these identifications are correct, the precision is 90%. High precision is crucial in transportation settings to minimize false alarms, which can lead to unnecessary disruptions or desensitization to alerts.
191
+
192
+ **Recall** (or sensitivity) measures the model's ability to find all the relevant cases (or objects) within the dataset. In terms of the project, recall assesses the proportion of actual objects in the video frames that the model successfully detects. For instance, if there are 100 actual vehicles in the video and the model correctly identifies 85 of them, the recall is 85%. High recall is particularly important in safety-critical applications like transportation to ensure that potential hazards are not overlooked.
193
+
194
+ Both metrics are especially important because they help balance the model's performance. A high precision rate with a low recall rate might indicate that the model is too conservative, missing potential hazards. Conversely, a high recall rate with low precision might mean the model generates many false positives, which could reduce the trust in or efficiency of the system. Therefore, tuning the model to achieve a balanced trade-off between precision and recall is vital for practical deployment in public transportation monitoring systems.
195
+
196
+ **Intersection over Union** (IoU) is another metric used alongside precision and recall. It measures the overlap between the predicted bounding box and the actual bounding box, providing a direct measurement of localization accuracy, which is essential for precise object detection in dynamic environments like those captured in train and tram cab ride videos.
197
+
198
+ ## From Object Detection to Segmentation
199
+
200
+ Meta's SAM (Segment-Anything Model) provides a powerful tool for generating segmentation datasets using an initial set of detection data. This is particularly useful for situations where you have a dataset labeled for object detection and you want to extend it to include segmentation labels, which are typically more detailed and involve classifying each pixel of the object.
201
+
202
+ ![Segmentation Results](images/object-detection-segmentation-3.png "Segmentation of bounding boxes")
203
+
204
+ ### Extending Detection Models to Generate a Segmentation Dataset
205
+
206
+ Building upon the foundation laid by the initial object detection model, this project took a significant step forward by employing Meta's Segment-Anything Model (SAM) to enhance our dataset with segmentation labels. The integration of SAM into our methodology allowed us to transform standard detection outputs—specifically, bounding boxes—into detailed pixel-level segmentation maps. This process bridged the gap between detection and segmentation, providing a comprehensive understanding of each object's precise contours and boundaries within the urban transit environment captured in our cab ride videos.
207
+
208
+ ![Segmentation Results](images/object-detection-segmentation-2.png "Segmentation of bounding boxes")
209
+
210
+ ### Integration of SAM with Detection Outputs
211
+
212
+ Initially, our project utilized a robust detection model trained to identify various objects, such as vehicles, pedestrians, and other significant elements, within the urban landscape. The detection model efficiently located these objects and outlined them with bounding boxes. The transition from detection to segmentation began by feeding these bounding box coordinates into SAM. SAM's sophisticated algorithms were then applied to precisely delineate the shapes enclosed within these boxes, focusing on the texture, color, and form contrasts between the objects and their backgrounds.
213
+
214
+ ![Segmentation Results](images/object-detection-segmentation-1.png "Segmentation of bounding boxes")
215
+
216
+ ### Creating a Rich Segmentation Dataset
217
+
218
+ The result of this integration was a series of high-quality segmentation masks that corresponded to each detected object. These masks detailed the objects at a pixel level, thus providing a far more nuanced dataset than was originally available with mere detection labels. To compile this enriched dataset, each original image was paired with its newly generated segmentation mask. This pairing formed a comprehensive set of data that included both the original detection information and the advanced segmentation details.
219
+
220
+ ```python
221
+ from ultralytics.data.annotator import auto_annotate
222
+ auto_annotate(
223
+ data="datasets\\track-detection",
224
+ det_model="runs\\detection\\weights\\best.pt",
225
+ sam_model='sam_b.pt',
226
+ output_dir="datasets\\autosegment")
227
+ ```
228
+
229
+ ### Quality Assurance and Dataset Refinement
230
+
231
+ Critical to this methodology is the quality assurance phase. Each generated segmentation mask should undergo a thorough review to ensure that it meets the project’s standards for accuracy and consistency. This step is essential but much less time and resource consuming than manual annotation. The precision of segmentation masks will directly influence the effectiveness of subsequent models trained using this data. Where discrepancies or inaccuracies were noted, adjustments should be done through manual corrections to the masks, ensuring that the dataset upholds the integrity required for advanced computer vision applications.
232
+
233
+ ### Utilization for Advanced Model Training
234
+
235
+ The enriched segmentation dataset prepared in this manner is not merely an exercise but a practical toolkit for further research and development. With these detailed segmentation maps, we could train more sophisticated models capable of performing complex tasks that rely on an intricate understanding of the spatial and textural context of objects within an image. Those annotated masks can now be used to help the annotation of futher data or crop/hide parts of the frames for different subtask processing. Such tasks include object tracking, distance estimation, obstacle detection, signs reading, signal interpretation. All these tasks might require different specialized models and can varing performance requirements therefore the initial segmentation and mask generation of these from the live images at low cost is essential.
236
+
237
+ ## Conclusion
238
+
239
+ This exploration into the use of YOLO for object detection on cab ride videos has revealed the significant potential of AI in public transportation. The successful application of YOLOv8n demonstrates not just a technological triumph but also a blueprint for future innovations in autonomous navigation and safety enhancements. By creatively leveraging YouTube videos as a data source and employing Meta's SAM for segmentation, I have shown that even with constrained resources, and very limited amount of annotated data, one can generate a dataset rich enough to train a sophisticated model.
240
+
241
+ My takeaways from this experience include:
242
+
243
+ * The feasibility of applying advanced AI models like YOLO to real-world situations with limited data.
244
+ * The importance of precision and recall balance in model performance, particularly in safety-critical applications.
245
+ * The versatility of YOLO, which extends beyond detection to enable comprehensive scene understanding through segmentation.
246
+ * The power of leveraging large compute and resource intensive models to train small lightweight specialized models.
247
+
248
+ This work paves the way for more intricate applications and sets the stage for further refinement and application of AI in public transportation, promising a future where safety and efficiency are greatly enhanced by intelligent systems.
projects.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "1",
4
+ "title": "Object Detection with YOLO",
5
+ "description": ["This project is about object detection using YOLO algorithm.",
6
+ "YOLO is a state-of-the-art, real-time object detection system."],
7
+ "skills": ["Python", "OpenCV", "YOLO"],
8
+ "tasks": ["object-detection", "image-segmentation", "real-time-processing", "computer-vision"],
9
+ "link": "object-detection.html"
10
+ }
11
+ ]
style.css CHANGED
@@ -1,28 +1,182 @@
 
 
 
 
 
 
 
1
  body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  }
5
 
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
  }
10
 
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
  }
17
 
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
  }
25
 
26
- .card p:last-child {
27
- margin-bottom: 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
 
1
+
2
+ /* HTML5 display-role reset for older browsers */
3
+ article, aside, details, figcaption, figure,
4
+ footer, header, hgroup, menu, nav, section {
5
+ display: block;
6
+ }
7
+
8
  body {
9
+ margin: 0; /* Remove default margin */
10
+ padding: 0; /* Remove default padding */
11
+ line-height: 1;
12
+ font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
13
+ background-color: #f4f4f4; /* Light grey background */
14
+ color: #333; /* Main text color */
15
+ }
16
+
17
+ a {
18
+ color: #067df7; /* Huggingface blue for links */
19
+ text-decoration: none; /* No underlines on links */
20
+ }
21
+
22
+ a:hover {
23
+ text-decoration: underline; /* Underline on hover for links */
24
+ }
25
+
26
+ header {
27
+ background-color: #fff; /* White background for the header */
28
+ padding: 2em; /* Padding around header content */
29
+ text-align: center; /* Centered header text */
30
+ border-bottom: 2px solid #eaeaea; /* Light grey border at the bottom */
31
+ }
32
+
33
+ header h1 {
34
+ color: #333; /* Dark grey color for the main title */
35
+ font-size: 2.5rem; /* Larger font size for the main title */
36
+ margin-bottom: 0.5em; /* Spacing below the main title */
37
+ }
38
+
39
+ header p {
40
+ color: #666; /* Medium grey for the subtitle */
41
+ font-size: 1.2rem; /* Subtitle size */
42
+ margin-top: 0; /* Align top of subtitle with title's bottom */
43
+ }
44
+
45
+ footer {
46
+ background-color: #fff; /* White background for the footer */
47
+ padding: 1em; /* Padding around footer content */
48
+ text-align: center; /* Centered footer text */
49
+ border-top: 2px solid #eaeaea; /* Light grey border at the top */
50
+ font-size: 0.9rem; /* Smaller font size for footer */
51
+ }
52
+
53
+ footer img {
54
+ vertical-align: middle; /* Align images with text */
55
+ margin-right: 0.5em; /* Space between icon and text */
56
+ }
57
+
58
+ footer a {
59
+ color: #067df7; /* Consistent link color */
60
+ }
61
+
62
+ .project-card {
63
+ background-color: #fff; /* White background for projects */
64
+ margin: 1em; /* Margin around cards */
65
+ padding: 2em; /* Padding inside cards */
66
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); /* Subtle shadow for depth */
67
+ border-radius: 10px; /* Rounded corners for cards */
68
+ transition: transform 0.3s ease, box-shadow 0.3s ease; /* Smooth transitions for hover effects */
69
  }
70
 
71
+ .project-card:hover {
72
+ transform: translateY(-5px); /* Slight lift effect on hover */
73
+ box-shadow: 0 10px 15px rgba(0, 0, 0, 0.15); /* Enhanced shadow on hover */
74
  }
75
 
76
+ .project-card h2 {
77
+ color: #333; /* Dark grey for titles */
78
+ margin-bottom: 0.8em; /* Space below title */
 
 
79
  }
80
 
81
+ .project-card p {
82
+ color: #666; /* Medium grey for descriptions */
83
+ margin-bottom: 1em; /* Space below description */
 
 
 
84
  }
85
 
86
+ .project-card a {
87
+ font-weight: bold; /* Make "View Project" links bold */
88
+ color: #067df7; /* Use blue for call to action */
89
+ padding: 0.5em 1em; /* Padding for clickable area */
90
+ border: 2px solid #067df7; /* Border matching the text color */
91
+ border-radius: 5px; /* Rounded corners for buttons */
92
+ display: inline-block; /* Allow for padding and border */
93
+ }
94
+
95
+ .project-card a:hover {
96
+ background-color: #067df7; /* Background color on hover */
97
+ color: #fff; /* Text color on hover */
98
+ }
99
+ /* Breadcrumb styling */
100
+ #breadcrumb {
101
+ padding: 0.5em 1em;
102
+ background-color: #f9f9f9; /* Light grey background to stand out */
103
+ border-left: 3px solid #fae901; /* Blue accent line */
104
+ color: #333; /* Dark text for readability */
105
+ font-size: 0.9rem; /* Smaller font size for the breadcrumb */
106
+ margin-bottom: 1em; /* Space before the main content */
107
+ position: sticky; /* Stick to the top when scrolling */
108
+ top: 0;
109
+ z-index: 10; /* Ensure it's above other content */
110
+ }
111
+
112
+ /* Markdown container styling for better reading experience */
113
+ #markdown-container {
114
+ padding: 1em; /* Padding around the text */
115
+ background-color: #fff; /* White background for reading */
116
+ border-radius: 5px; /* Slightly rounded corners */
117
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); /* Subtle shadow for depth */
118
+ max-width: 800px; /* Max width to maintain optimal line length for reading */
119
+ margin: 1em auto; /* Center the container and add margin around it */
120
+ word-wrap: break-word; /* Ensure long words don't overflow */
121
+ }
122
+
123
+ /* Style the Table of Contents */
124
+ #toc {
125
+ padding: 1em; /* Padding around the content */
126
+ background-color: #fff; /* White background */
127
+ border-radius: 5px; /* Slightly rounded corners */
128
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); /* Subtle shadow for depth */
129
+ max-width: 800px; /* Max width to maintain optimal line length for reading */
130
+ margin: 1em auto; /* Center the container and add margin around it */
131
+ list-style: none; /* Remove default list styling */
132
+ }
133
+
134
+ #toc li a {
135
+ color: #067df7; /* Blue color for links to match the theme */
136
+ text-decoration: none; /* No underline */
137
+ display: block; /* Block level to add padding */
138
+ padding: 0.2em 0; /* Padding for each link */
139
+ }
140
+
141
+ #toc li a:hover {
142
+ background-color: #f0f0f0; /* Light grey background on hover */
143
+ border-radius: 3px; /* Slight rounding on hover */
144
+ }
145
+
146
+ /* Enhance the appearance of code blocks */
147
+ pre code {
148
+ display: block;
149
+ padding: 1em; /* Padding inside code blocks */
150
+ background-color: #f0f0f0; /* Light grey background for code blocks */
151
+ border-radius: 5px; /* Rounded corners for code blocks */
152
+ overflow-x: auto; /* Enable horizontal scrolling if the code is too wide */
153
+ }
154
+
155
+ /* Make images responsive */
156
+ img {
157
+ max-width: 100%; /* Make images responsive */
158
+ height: auto; /* Adjust height automatically */
159
+ display: block; /* Images are block level to apply max-width */
160
+ margin: 1em 0; /* Margin above and below images */
161
+ }
162
+
163
+ /* Responsive design for smaller screens */
164
+ @media (max-width: 768px) {
165
+ #markdown-container,
166
+ #toc {
167
+ padding: 0.5em; /* Smaller padding on small screens */
168
+ margin: 0.5em; /* Smaller margin on small screens */
169
+ }
170
+
171
+ header h1 {
172
+ font-size: 2rem; /* Slightly smaller font for smaller screens */
173
+ }
174
+
175
+ header p {
176
+ font-size: 1rem; /* Smaller subtitle on smaller screens */
177
+ }
178
+
179
+ .project-card {
180
+ margin: 0.5em;
181
+ }
182
  }