crop_desease_detection / crop_desease_detection.py
IsmatS's picture
Upload folder using huggingface_hub
ddf9b49 verified
# -*- coding: utf-8 -*-
"""crop_desease_detection.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1PCO8YxMl3tqzsbMVP1iiSylwED-u_VfW
"""
# Complete Pipeline for Tree Disease Detection with PDT Dataset
# Cell 1: Install required packages
!pip install ultralytics torch torchvision opencv-python matplotlib
!pip install huggingface_hub
import os
import shutil
import zipfile
from ultralytics import YOLO
import torch
import cv2
import matplotlib.pyplot as plt
import numpy as np
from huggingface_hub import snapshot_download
from IPython.display import Image, display
# Cell 2: Download the PDT dataset from HuggingFace
print("Downloading PDT dataset from HuggingFace...")
try:
dataset_path = snapshot_download(
repo_id='qwer0213/PDT_dataset',
repo_type='dataset',
local_dir='/content/PDT_dataset',
resume_download=True
)
print(f"Dataset downloaded to: {dataset_path}")
except Exception as e:
print(f"Error downloading dataset: {e}")
# Cell 3: Find and extract the zip file
print("\nLooking for zip file in downloaded dataset...")
# Find the zip file
zip_file_path = None
for root, dirs, files in os.walk('/content/PDT_dataset'):
for file in files:
if file.endswith('.zip'):
zip_file_path = os.path.join(root, file)
print(f"Found zip file: {zip_file_path}")
break
if zip_file_path:
break
if not zip_file_path:
print("No zip file found in the downloaded dataset!")
else:
# Extract the zip file
extract_path = '/content/PDT_dataset_extracted'
os.makedirs(extract_path, exist_ok=True)
print(f"Extracting {zip_file_path} to {extract_path}")
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
zip_ref.extractall(extract_path)
print("Extraction completed!")
# Cell 4: Explore the extracted dataset structure
print("\nExploring dataset structure...")
def explore_dataset_structure(base_path):
"""Explore and find the actual dataset structure"""
dataset_info = {
'yolo_txt_path': None,
'voc_xml_path': None,
'train_path': None,
'val_path': None,
'test_path': None
}
for root, dirs, files in os.walk(base_path):
# Look for YOLO_txt directory
if 'YOLO_txt' in root:
dataset_info['yolo_txt_path'] = root
print(f"Found YOLO_txt at: {root}")
# Check for train/val/test
for split in ['train', 'val', 'test']:
split_path = os.path.join(root, split)
if os.path.exists(split_path):
dataset_info[f'{split}_path'] = split_path
print(f"Found {split} at: {split_path}")
# Look for VOC_xml directory
if 'VOC_xml' in root:
dataset_info['voc_xml_path'] = root
print(f"Found VOC_xml at: {root}")
return dataset_info
dataset_info = explore_dataset_structure('/content/PDT_dataset_extracted')
# Cell 5: Setup YOLO dataset from the PDT dataset
def setup_yolo_dataset(dataset_info, output_dir='/content/PDT_yolo'):
"""Setup YOLO dataset from the extracted PDT dataset"""
print(f"\nSetting up YOLO dataset to {output_dir}")
# Clean output directory
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)
# Create directory structure
for split in ['train', 'val', 'test']:
os.makedirs(os.path.join(output_dir, 'images', split), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'labels', split), exist_ok=True)
total_copied = 0
# Process each split
for split in ['train', 'val', 'test']:
split_path = dataset_info[f'{split}_path']
if not split_path or not os.path.exists(split_path):
print(f"Warning: {split} split not found")
continue
print(f"\nProcessing {split} from: {split_path}")
# Find images and labels directories
img_dir = os.path.join(split_path, 'images')
lbl_dir = os.path.join(split_path, 'labels')
if not os.path.exists(img_dir) or not os.path.exists(lbl_dir):
print(f"Warning: Could not find images or labels for {split}")
continue
# Copy images and labels
img_files = [f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
print(f"Found {len(img_files)} images in {split}")
for img_file in img_files:
# Copy image
src_img = os.path.join(img_dir, img_file)
dst_img = os.path.join(output_dir, 'images', split, img_file)
shutil.copy2(src_img, dst_img)
# Copy corresponding label
base_name = os.path.splitext(img_file)[0]
txt_file = base_name + '.txt'
src_txt = os.path.join(lbl_dir, txt_file)
dst_txt = os.path.join(output_dir, 'labels', split, txt_file)
if os.path.exists(src_txt):
shutil.copy2(src_txt, dst_txt)
total_copied += 1
# Create data.yaml
data_yaml_content = f"""# PDT dataset configuration
path: {os.path.abspath(output_dir)}
train: images/train
val: images/val
test: images/test
# Classes
names:
0: unhealthy
nc: 1
"""
yaml_path = os.path.join(output_dir, 'data.yaml')
with open(yaml_path, 'w') as f:
f.write(data_yaml_content)
print(f"\nDataset setup completed!")
print(f"Total images copied: {total_copied}")
# Verify the dataset
for split in ['train', 'val', 'test']:
img_dir = os.path.join(output_dir, 'images', split)
lbl_dir = os.path.join(output_dir, 'labels', split)
if os.path.exists(img_dir):
img_count = len([f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))])
lbl_count = len([f for f in os.listdir(lbl_dir) if f.endswith('.txt')])
print(f"{split}: {img_count} images, {lbl_count} labels")
return yaml_path
# Setup the dataset
data_yaml_path = setup_yolo_dataset(dataset_info)
# Cell 6: Train the model
print("\nStarting model training...")
# Use YOLOv8s model
model = YOLO('yolov8s.yaml')
# Train the model
results = model.train(
data=data_yaml_path,
epochs=50, # Adjust based on your needs
imgsz=640,
batch=16, # Adjust based on GPU memory
name='yolov8s_pdt',
patience=10,
save=True,
device='0' if torch.cuda.is_available() else 'cpu',
workers=4,
project='runs/train',
exist_ok=True,
pretrained=False,
optimizer='SGD',
lr0=0.01,
momentum=0.9,
weight_decay=0.001,
verbose=True,
plots=True,
)
print("Training completed!")
# Cell 7: Evaluate the model
print("\nEvaluating model performance...")
# Load the best model
best_model_path = 'runs/train/yolov8s_pdt/weights/best.pt'
model = YOLO(best_model_path)
# Validate
metrics = model.val()
print(f"\nValidation Metrics:")
print(f"mAP50: {metrics.box.map50:.3f}")
print(f"mAP50-95: {metrics.box.map:.3f}")
print(f"Precision: {metrics.box.p.mean():.3f}")
print(f"Recall: {metrics.box.r.mean():.3f}")
# Cell 8: Test the model
print("\nTesting on sample images...")
# Test on validation images
val_img_dir = '/content/PDT_yolo/images/val'
val_images = [f for f in os.listdir(val_img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))][:5]
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.ravel()
for i, img_name in enumerate(val_images[:6]):
img_path = os.path.join(val_img_dir, img_name)
# Run inference
results = model(img_path, conf=0.25)
# Plot results
img_with_boxes = results[0].plot()
axes[i].imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB))
axes[i].set_title(f'{img_name}')
axes[i].axis('off')
# Hide empty subplot
if len(val_images) < 6:
axes[5].axis('off')
plt.tight_layout()
plt.show()
# Cell 9: Create inference function
def detect_tree_disease(image_path, conf_threshold=0.25):
"""Detect unhealthy trees in an image"""
results = model(image_path, conf=conf_threshold)
detections = []
for result in results:
boxes = result.boxes
if boxes is not None:
for box in boxes:
detection = {
'confidence': float(box.conf[0]),
'bbox': box.xyxy[0].tolist(),
'class': 'unhealthy'
}
detections.append(detection)
# Visualize
img_with_boxes = results[0].plot()
plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.title(f'Detected {len(detections)} unhealthy tree(s)')
plt.show()
return detections
# Cell 10: Save the model
print("\nSaving model...")
final_model_path = 'tree_disease_detector.pt'
model.save(final_model_path)
print(f"Model saved to: {final_model_path}")
# Cell 11: Save to Google Drive (optional)
from google.colab import drive
try:
drive.mount('/content/drive')
save_dir = '/content/drive/MyDrive/tree_disease_detection'
os.makedirs(save_dir, exist_ok=True)
# Copy files
shutil.copy(best_model_path, os.path.join(save_dir, 'best_model.pt'))
shutil.copy(final_model_path, os.path.join(save_dir, 'tree_disease_detector.pt'))
# Copy training results
results_png = 'runs/train/yolov8s_pdt/results.png'
if os.path.exists(results_png):
shutil.copy(results_png, os.path.join(save_dir, 'training_results.png'))
print(f"Results saved to Google Drive: {save_dir}")
except:
print("Google Drive not mounted. Results saved locally.")
# Cell 12: Summary
print("\n=== Training Complete ===")
print("Model: YOLOv8s")
print("Dataset: PDT (Pests and Diseases Tree)")
print(f"Best Model: {best_model_path}")
print("The model is ready for tree disease detection!")
# Test with your own image
print("\nTo test with your own image:")
print("detections = detect_tree_disease('path/to/your/image.jpg')")
# Cell 1: Install Hugging Face Hub
!pip install huggingface_hub
# Cell 2: Login to Hugging Face
from huggingface_hub import login, HfApi, create_repo
import os
import shutil
# Login to Hugging Face (you'll need your token)
# Get your token from: https://huggingface.co/settings/tokens
login()
# Cell 3: Prepare model files for upload
# Create a directory for model files
model_dir = "pdt_tree_disease_model"
os.makedirs(model_dir, exist_ok=True)
# Copy the trained model
best_model_path = 'runs/train/yolov8s_pdt/weights/best.pt'
if os.path.exists(best_model_path):
shutil.copy(best_model_path, os.path.join(model_dir, "best.pt"))
# Copy the final saved model
if os.path.exists('tree_disease_detector.pt'):
shutil.copy('tree_disease_detector.pt', os.path.join(model_dir, "tree_disease_detector.pt"))
# Copy training results
results_path = 'runs/train/yolov8s_pdt/results.png'
if os.path.exists(results_path):
shutil.copy(results_path, os.path.join(model_dir, "training_results.png"))
# Copy confusion matrix if exists
confusion_matrix_path = 'runs/train/yolov8s_pdt/confusion_matrix.png'
if os.path.exists(confusion_matrix_path):
shutil.copy(confusion_matrix_path, os.path.join(model_dir, "confusion_matrix.png"))
# Copy other training plots
for plot_file in ['F1_curve.png', 'P_curve.png', 'R_curve.png', 'PR_curve.png']:
plot_path = f'runs/train/yolov8s_pdt/{plot_file}'
if os.path.exists(plot_path):
shutil.copy(plot_path, os.path.join(model_dir, plot_file))
# Cell 4: Create model card (README.md)
model_card = """---
tags:
- object-detection
- yolov8
- tree-disease-detection
- pdt-dataset
library_name: ultralytics
datasets:
- qwer0213/PDT_dataset
metrics:
- mAP50
- mAP50-95
---
# YOLOv8 Tree Disease Detection Model
This model is trained on the PDT (Pests and Diseases Tree) dataset for detecting unhealthy trees using YOLOv8.
## Model Description
- **Architecture**: YOLOv8s
- **Task**: Object Detection (Tree Disease Detection)
- **Classes**: 1 (unhealthy)
- **Input Size**: 640x640
- **Framework**: Ultralytics YOLOv8
## Training Details
- **Dataset**: PDT (Pests and Diseases Tree) dataset
- **Training Images**: 4,536
- **Validation Images**: 567
- **Test Images**: 567
- **Epochs**: 50
- **Batch Size**: 16
- **Optimizer**: SGD
- **Learning Rate**: 0.01
## Performance Metrics
| Metric | Value |
|--------|-------|
| mAP50 | 0.xxx |
| mAP50-95 | 0.xxx |
| Precision | 0.xxx |
| Recall | 0.xxx |
## Usage
```python
from ultralytics import YOLO
# Load model
model = YOLO('tree_disease_detector.pt')
# Run inference
results = model('path/to/image.jpg')
# Process results
for result in results:
boxes = result.boxes
if boxes is not None:
for box in boxes:
confidence = box.conf[0]
bbox = box.xyxy[0].tolist()
print(f"Unhealthy tree detected with confidence: {confidence}")
Dataset
This model was trained on the PDT dataset, which contains high-resolution UAV images of trees with pest and disease annotations.
Citation
bibtex@dataset{pdt_dataset,
title={PDT: UAV Pests and Diseases Tree Dataset},
author={Zhou et al.},
year={2024},
publisher={HuggingFace}
}
License
MIT License
"""
Fill in the actual metrics
if 'metrics' in globals() and metrics is not None:
model_card = model_card.replace('0.xxx', f'{metrics.box.map50:.3f}')
model_card = model_card.replace('0.xxx', f'{metrics.box.map:.3f}')
model_card = model_card.replace('0.xxx', f'{metrics.box.p.mean():.3f}')
model_card = model_card.replace('0.xxx', f'{metrics.box.r.mean():.3f}')
Save model card
with open(os.path.join(model_dir, "README.md"), "w") as f:
f.write(model_card)
Cell 5: Create config file
config_content = """# YOLOv8 Tree Disease Detection Configuration
model_type: yolov8s
task: detect
nc: 1 # number of classes
names: ['unhealthy'] # class names
Input
imgsz: 640
Inference settings
conf: 0.25 # confidence threshold
iou: 0.45 # IoU threshold for NMS
"""
with open(os.path.join(model_dir, "config.yaml"), "w") as f:
f.write(config_content)
Cell 6: Push to Hugging Face Hub
from huggingface_hub import HfApi
Initialize API
api = HfApi()
Create repository (replace 'your-username' with your HuggingFace username)
repo_id = "your-username/yolov8-tree-disease-detection" # Change this!
Create the repository
try:
create_repo(
repo_id=repo_id,
repo_type="model",
exist_ok=True
)
print(f"Repository created: https://huggingface.co/{repo_id}")
except Exception as e:
print(f"Repository might already exist or error: {e}")
Upload all files in the model directory
api.upload_folder(
folder_path=model_dir,
repo_id=repo_id,
repo_type="model",
)
print(f"Model uploaded successfully to: https://huggingface.co/{repo_id}")
Cell 7: Create a simple inference script for users
inference_script = """# Tree Disease Detection Inference
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
Download and load model from Hugging Face
model = YOLO('https://huggingface.co/{}/resolve/main/tree_disease_detector.pt')
def detect_tree_disease(image_path):
# Run inference
results = model(image_path, conf=0.25)
# Process results
detections = []
for result in results:
boxes = result.boxes
if boxes is not None:
for box in boxes:
detection = {
'confidence': float(box.conf[0]),
'bbox': box.xyxy[0].tolist(),
'class': 'unhealthy'
}
detections.append(detection)
# Visualize
annotated_img = results[0].plot()
plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.title(f'Detected {len(detections)} unhealthy tree(s)')
plt.show()
return detections
Example usage
if name == "main":
detections = detect_tree_disease('path/to/your/image.jpg')
print(f"Found {len(detections)} unhealthy trees")
""".format(repo_id)
with open(os.path.join(model_dir, "inference.py"), "w") as f:
f.write(inference_script)
Upload the inference script
api.upload_file(
path_or_fileobj=os.path.join(model_dir, "inference.py"),
path_in_repo="inference.py",
repo_id=repo_id,
repo_type="model",
)
Cell 8: Create requirements.txt
requirements = """ultralytics>=8.0.0
torch>=2.0.0
opencv-python>=4.8.0
matplotlib>=3.7.0
pillow>=10.0.0
"""
with open(os.path.join(model_dir, "requirements.txt"), "w") as f:
f.write(requirements)
Upload requirements
api.upload_file(
path_or_fileobj=os.path.join(model_dir, "requirements.txt"),
path_in_repo="requirements.txt",
repo_id=repo_id,
repo_type="model",
)
print("\nModel successfully uploaded to Hugging Face!")
print(f"View your model at: https://huggingface.co/{repo_id}")
print("\nTo use your model:")
print(f"model = YOLO('https://huggingface.co/{repo_id}/resolve/main/tree_disease_detector.pt')")
## Steps to upload your model:
1. **Get a Hugging Face token**:
- Go to https://huggingface.co/settings/tokens
- Create a new token with write permissions
- Copy the token
2. **Replace placeholder values**:
- Change `your-username` to your actual Hugging Face username
- Update the metrics in the model card with actual values
3. **Run the cells** in order
## After uploading, others can use your model like this:
```python
from ultralytics import YOLO
# Load model directly from Hugging Face
model = YOLO('https://huggingface.co/your-username/yolov8-tree-disease-detection/resolve/main/tree_disease_detector.pt')
# Run inference
results = model('image.jpg')