testing-roboflow / dino.txt
muhammadsalmanalfaridzi's picture
Update dino.txt
c440f41 verified
raw
history blame contribute delete
11.2 kB
import gradio as gr
import cv2
import numpy as np
import tempfile
import os
import requests
from dds_cloudapi_sdk import Config, Client
from dds_cloudapi_sdk.tasks.dinox import DinoxTask
from dds_cloudapi_sdk import TextPrompt
from dds_cloudapi_sdk.tasks.types import DetectionTarget
from roboflow import Roboflow
from dotenv import load_dotenv
# ========== Konfigurasi ==========
load_dotenv()
# Roboflow Config
rf_api_key = os.getenv("ROBOFLOW_API_KEY")
workspace = os.getenv("ROBOFLOW_WORKSPACE")
project_name = os.getenv("ROBOFLOW_PROJECT")
model_version = int(os.getenv("ROBOFLOW_MODEL_VERSION"))
# DINO-X Config
DINOX_API_KEY = os.getenv("DINO_X_API_KEY")
DINOX_PROMPT = "beverage . bottle . cans . boxed milk . milk"
# Inisialisasi Model YOLO (Roboflow)
rf = Roboflow(api_key=rf_api_key)
project = rf.workspace(workspace).project(project_name)
yolo_model = project.version(model_version).model
# Inisialisasi DINO-X API Client
dinox_config = Config(DINOX_API_KEY)
dinox_client = Client(dinox_config)
# Fungsi untuk mendeteksi objek pada gambar dan video
def detect_combined(image_path_or_video_path, is_video=False):
# Jika input adalah video
if is_video:
return detect_objects_in_video(image_path_or_video_path)
# Jika input adalah gambar
return detect_objects_in_image(image_path_or_video_path)
def detect_objects_in_image(image_path):
try:
# Membaca gambar
img = cv2.imread(image_path)
# --- Deteksi menggunakan YOLO (Nestlé) ---
yolo_pred = yolo_model.predict(image_path, confidence=50, overlap=80).json()
# Hitung produk Nestlé per kelas
nestle_class_count = {}
nestle_boxes = []
for pred in yolo_pred['predictions']:
class_name = pred['class']
nestle_class_count[class_name] = nestle_class_count.get(class_name, 0) + 1
nestle_boxes.append((pred['x'], pred['y'], pred['width'], pred['height']))
# --- Deteksi menggunakan DINO-X (Unclassified Products) ---
image_url = dinox_client.upload_file(image_path)
task = DinoxTask(
image_url=image_url,
prompts=[TextPrompt(text=DINOX_PROMPT)],
bbox_threshold=0.25,
targets=[DetectionTarget.BBox]
)
dinox_client.run_task(task)
dinox_pred = task.result.objects
# Hitung produk kompetitor yang tidak tumpang tindih dengan deteksi YOLO
competitor_class_count = {}
competitor_boxes = []
for obj in dinox_pred:
dinox_box = obj.bbox
# Filter objek yang sudah terdeteksi oleh YOLO (Overlap detection)
if not is_overlap(dinox_box, nestle_boxes): # Ignore if overlap with YOLO detections
class_name = obj.category.strip().lower()
competitor_class_count[class_name] = competitor_class_count.get(class_name, 0) + 1
competitor_boxes.append({
"class": class_name,
"box": dinox_box,
"confidence": obj.score
})
# --- Overlay Teks untuk Total Produk ---
nestle_count_text = ""
total_nestle = 0
for class_name, count in nestle_class_count.items():
nestle_count_text += f"{class_name}: {count}\n"
total_nestle += count
nestle_count_text += f"\nTotal Nestlé Products: {total_nestle}"
unclassified_count_text = ""
total_unclassified = 0
for class_name, count in competitor_class_count.items():
unclassified_count_text += f"{class_name}: {count}\n"
total_unclassified += count
unclassified_count_text += f"\nTotal Unclassified Products: {total_unclassified}"
# --- Visualisasi Deteksi YOLO (Nestlé) ---
for pred in yolo_pred['predictions']:
x, y, w, h = pred['x'], pred['y'], pred['width'], pred['height']
cv2.rectangle(img, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (0,255,0), 2)
cv2.putText(img, pred['class'], (int(x-w/2), int(y-h/2-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
# --- Visualisasi Deteksi DINO-X (Unclassified) ---
for comp in competitor_boxes:
x1, y1, x2, y2 = comp['box']
display_name = "unclassified"
cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
cv2.putText(img, f"{display_name} {comp['confidence']:.2f}",
(int(x1), int(y1-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
# Simpan gambar output
output_path = "/tmp/combined_output_image.jpg"
cv2.imwrite(output_path, img)
return output_path, nestle_count_text + "\n" + unclassified_count_text
except Exception as e:
return image_path, f"Error: {str(e)}"
def detect_objects_in_video(video_path):
temp_output_path = "/tmp/output_video.mp4"
temp_frames_dir = tempfile.mkdtemp()
frame_count = 0
previous_detections = {} # Untuk menyimpan deteksi objek dari frame sebelumnya
# Membuka video
video = cv2.VideoCapture(video_path)
frame_rate = int(video.get(cv2.CAP_PROP_FPS))
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_size = (frame_width, frame_height)
# VideoWriter untuk menyimpan hasil video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_video = cv2.VideoWriter(temp_output_path, fourcc, frame_rate, frame_size)
while True:
ret, frame = video.read()
if not ret:
break
# Simpan frame sementara untuk prediksi
frame_path = os.path.join(temp_frames_dir, f"frame_{frame_count}.jpg")
cv2.imwrite(frame_path, frame)
# --- Deteksi menggunakan YOLO (Nestlé) ---
yolo_pred = yolo_model.predict(frame_path, confidence=50, overlap=80).json()
# Hitung produk Nestlé per kelas
nestle_class_count = {}
nestle_boxes = []
for pred in yolo_pred['predictions']:
class_name = pred['class']
nestle_class_count[class_name] = nestle_class_count.get(class_name, 0) + 1
nestle_boxes.append((pred['x'], pred['y'], pred['width'], pred['height']))
# --- Deteksi menggunakan DINO-X (Unclassified Products) ---
image_url = dinox_client.upload_file(frame_path)
task = DinoxTask(
image_url=image_url,
prompts=[TextPrompt(text=DINOX_PROMPT)],
bbox_threshold=0.25,
targets=[DetectionTarget.BBox]
)
dinox_client.run_task(task)
dinox_pred = task.result.objects
# Hitung produk kompetitor yang tidak tumpang tindih dengan deteksi YOLO
competitor_class_count = {}
competitor_boxes = []
for obj in dinox_pred:
dinox_box = obj.bbox
# Filter objek yang sudah terdeteksi oleh YOLO (Overlap detection)
if not is_overlap(dinox_box, nestle_boxes): # Ignore if overlap with YOLO detections
class_name = obj.category.strip().lower()
competitor_class_count[class_name] = competitor_class_count.get(class_name, 0) + 1
competitor_boxes.append({
"class": class_name,
"box": dinox_box,
"confidence": obj.score
})
# --- Overlay Teks untuk Total Produk ---
nestle_count_text = ""
total_nestle = 0
for class_name, count in nestle_class_count.items():
nestle_count_text += f"{class_name}: {count}\n"
total_nestle += count
nestle_count_text += f"\nTotal Nestlé Products: {total_nestle}"
unclassified_count_text = ""
total_unclassified = 0
for class_name, count in competitor_class_count.items():
unclassified_count_text += f"{class_name}: {count}\n"
total_unclassified += count
unclassified_count_text += f"\nTotal Unclassified Products: {total_unclassified}"
# Overlay teks ke frame
y_offset = 20
for line in nestle_count_text.split("\n"):
cv2.putText(frame, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
y_offset += 30
y_offset += 30 # Slight gap between sections
for line in unclassified_count_text.split("\n"):
cv2.putText(frame, line, (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
y_offset += 30
# --- Visualisasi Deteksi YOLO (Nestlé) ---
for pred in yolo_pred['predictions']:
x, y, w, h = pred['x'], pred['y'], pred['width'], pred['height']
cv2.rectangle(frame, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (0,255,0), 2)
cv2.putText(frame, pred['class'], (int(x-w/2), int(y-h/2-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
# --- Visualisasi Deteksi DINO-X (Unclassified) ---
for comp in competitor_boxes:
x1, y1, x2, y2 = comp['box']
display_name = "unclassified"
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
cv2.putText(frame, f"{display_name} {comp['confidence']:.2f}",
(int(x1), int(y1-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
# Tulis frame ke video output
output_video.write(frame)
frame_count += 1
video.release()
output_video.release()
return temp_output_path
def is_overlap(box1, boxes2, threshold=0.3):
# Fungsi untuk deteksi overlap bounding box
x1_min, y1_min, x1_max, y1_max = box1
for b2 in boxes2:
x2, y2, w2, h2 = b2
x2_min = x2 - w2/2
x2_max = x2 + w2/2
y2_min = y2 - h2/2
y2_max = y2 + h2/2
# Hitung area overlap
dx = min(x1_max, x2_max) - max(x1_min, x2_min)
dy = min(y1_max, y2_max) - max(y1_min, y2_min)
if (dx >= 0) and (dy >= 0):
area_overlap = dx * dy
area_box1 = (x1_max - x1_min) * (y1_max - y1_min)
if area_overlap / area_box1 > threshold:
return True
return False
# ========== Gradio Interface ==========
with gr.Blocks(theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate")) as iface:
gr.Markdown("""<div style="text-align: center;"><h1>NESTLE - STOCK COUNTING</h1></div>""")
with gr.Row():
with gr.Column():
input_image = gr.Image(type="pil", label="Input Image")
detect_image_button = gr.Button("Detect Image")
output_image = gr.Image(label="Detect Object")
output_text = gr.Textbox(label="Counting Object")
detect_image_button.click(fn=detect_combined, inputs=input_image, outputs=[output_image, output_text])
with gr.Column():
input_video = gr.Video(label="Input Video")
detect_video_button = gr.Button("Detect Video")
output_video = gr.Video(label="Output Video")
detect_video_button.click(fn=detect_objects_in_video, inputs=input_video, outputs=[output_video])
iface.launch()