# -*- coding: utf-8 -*- """crop_desease_detection.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1PCO8YxMl3tqzsbMVP1iiSylwED-u_VfW """ # Complete Pipeline for Tree Disease Detection with PDT Dataset # Cell 1: Install required packages !pip install ultralytics torch torchvision opencv-python matplotlib !pip install huggingface_hub import os import shutil import zipfile from ultralytics import YOLO import torch import cv2 import matplotlib.pyplot as plt import numpy as np from huggingface_hub import snapshot_download from IPython.display import Image, display # Cell 2: Download the PDT dataset from HuggingFace print("Downloading PDT dataset from HuggingFace...") try: dataset_path = snapshot_download( repo_id='qwer0213/PDT_dataset', repo_type='dataset', local_dir='/content/PDT_dataset', resume_download=True ) print(f"Dataset downloaded to: {dataset_path}") except Exception as e: print(f"Error downloading dataset: {e}") # Cell 3: Find and extract the zip file print("\nLooking for zip file in downloaded dataset...") # Find the zip file zip_file_path = None for root, dirs, files in os.walk('/content/PDT_dataset'): for file in files: if file.endswith('.zip'): zip_file_path = os.path.join(root, file) print(f"Found zip file: {zip_file_path}") break if zip_file_path: break if not zip_file_path: print("No zip file found in the downloaded dataset!") else: # Extract the zip file extract_path = '/content/PDT_dataset_extracted' os.makedirs(extract_path, exist_ok=True) print(f"Extracting {zip_file_path} to {extract_path}") with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: zip_ref.extractall(extract_path) print("Extraction completed!") # Cell 4: Explore the extracted dataset structure print("\nExploring dataset structure...") def explore_dataset_structure(base_path): """Explore and find the actual dataset structure""" dataset_info = { 'yolo_txt_path': None, 'voc_xml_path': None, 'train_path': None, 'val_path': None, 'test_path': None } for root, dirs, files in os.walk(base_path): # Look for YOLO_txt directory if 'YOLO_txt' in root: dataset_info['yolo_txt_path'] = root print(f"Found YOLO_txt at: {root}") # Check for train/val/test for split in ['train', 'val', 'test']: split_path = os.path.join(root, split) if os.path.exists(split_path): dataset_info[f'{split}_path'] = split_path print(f"Found {split} at: {split_path}") # Look for VOC_xml directory if 'VOC_xml' in root: dataset_info['voc_xml_path'] = root print(f"Found VOC_xml at: {root}") return dataset_info dataset_info = explore_dataset_structure('/content/PDT_dataset_extracted') # Cell 5: Setup YOLO dataset from the PDT dataset def setup_yolo_dataset(dataset_info, output_dir='/content/PDT_yolo'): """Setup YOLO dataset from the extracted PDT dataset""" print(f"\nSetting up YOLO dataset to {output_dir}") # Clean output directory if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir, exist_ok=True) # Create directory structure for split in ['train', 'val', 'test']: os.makedirs(os.path.join(output_dir, 'images', split), exist_ok=True) os.makedirs(os.path.join(output_dir, 'labels', split), exist_ok=True) total_copied = 0 # Process each split for split in ['train', 'val', 'test']: split_path = dataset_info[f'{split}_path'] if not split_path or not os.path.exists(split_path): print(f"Warning: {split} split not found") continue print(f"\nProcessing {split} from: {split_path}") # Find images and labels directories img_dir = os.path.join(split_path, 'images') lbl_dir = os.path.join(split_path, 'labels') if not os.path.exists(img_dir) or not os.path.exists(lbl_dir): print(f"Warning: Could not find images or labels for {split}") continue # Copy images and labels img_files = [f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))] print(f"Found {len(img_files)} images in {split}") for img_file in img_files: # Copy image src_img = os.path.join(img_dir, img_file) dst_img = os.path.join(output_dir, 'images', split, img_file) shutil.copy2(src_img, dst_img) # Copy corresponding label base_name = os.path.splitext(img_file)[0] txt_file = base_name + '.txt' src_txt = os.path.join(lbl_dir, txt_file) dst_txt = os.path.join(output_dir, 'labels', split, txt_file) if os.path.exists(src_txt): shutil.copy2(src_txt, dst_txt) total_copied += 1 # Create data.yaml data_yaml_content = f"""# PDT dataset configuration path: {os.path.abspath(output_dir)} train: images/train val: images/val test: images/test # Classes names: 0: unhealthy nc: 1 """ yaml_path = os.path.join(output_dir, 'data.yaml') with open(yaml_path, 'w') as f: f.write(data_yaml_content) print(f"\nDataset setup completed!") print(f"Total images copied: {total_copied}") # Verify the dataset for split in ['train', 'val', 'test']: img_dir = os.path.join(output_dir, 'images', split) lbl_dir = os.path.join(output_dir, 'labels', split) if os.path.exists(img_dir): img_count = len([f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]) lbl_count = len([f for f in os.listdir(lbl_dir) if f.endswith('.txt')]) print(f"{split}: {img_count} images, {lbl_count} labels") return yaml_path # Setup the dataset data_yaml_path = setup_yolo_dataset(dataset_info) # Cell 6: Train the model print("\nStarting model training...") # Use YOLOv8s model model = YOLO('yolov8s.yaml') # Train the model results = model.train( data=data_yaml_path, epochs=50, # Adjust based on your needs imgsz=640, batch=16, # Adjust based on GPU memory name='yolov8s_pdt', patience=10, save=True, device='0' if torch.cuda.is_available() else 'cpu', workers=4, project='runs/train', exist_ok=True, pretrained=False, optimizer='SGD', lr0=0.01, momentum=0.9, weight_decay=0.001, verbose=True, plots=True, ) print("Training completed!") # Cell 7: Evaluate the model print("\nEvaluating model performance...") # Load the best model best_model_path = 'runs/train/yolov8s_pdt/weights/best.pt' model = YOLO(best_model_path) # Validate metrics = model.val() print(f"\nValidation Metrics:") print(f"mAP50: {metrics.box.map50:.3f}") print(f"mAP50-95: {metrics.box.map:.3f}") print(f"Precision: {metrics.box.p.mean():.3f}") print(f"Recall: {metrics.box.r.mean():.3f}") # Cell 8: Test the model print("\nTesting on sample images...") # Test on validation images val_img_dir = '/content/PDT_yolo/images/val' val_images = [f for f in os.listdir(val_img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))][:5] fig, axes = plt.subplots(2, 3, figsize=(18, 12)) axes = axes.ravel() for i, img_name in enumerate(val_images[:6]): img_path = os.path.join(val_img_dir, img_name) # Run inference results = model(img_path, conf=0.25) # Plot results img_with_boxes = results[0].plot() axes[i].imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB)) axes[i].set_title(f'{img_name}') axes[i].axis('off') # Hide empty subplot if len(val_images) < 6: axes[5].axis('off') plt.tight_layout() plt.show() # Cell 9: Create inference function def detect_tree_disease(image_path, conf_threshold=0.25): """Detect unhealthy trees in an image""" results = model(image_path, conf=conf_threshold) detections = [] for result in results: boxes = result.boxes if boxes is not None: for box in boxes: detection = { 'confidence': float(box.conf[0]), 'bbox': box.xyxy[0].tolist(), 'class': 'unhealthy' } detections.append(detection) # Visualize img_with_boxes = results[0].plot() plt.figure(figsize=(12, 8)) plt.imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB)) plt.axis('off') plt.title(f'Detected {len(detections)} unhealthy tree(s)') plt.show() return detections # Cell 10: Save the model print("\nSaving model...") final_model_path = 'tree_disease_detector.pt' model.save(final_model_path) print(f"Model saved to: {final_model_path}") # Cell 11: Save to Google Drive (optional) from google.colab import drive try: drive.mount('/content/drive') save_dir = '/content/drive/MyDrive/tree_disease_detection' os.makedirs(save_dir, exist_ok=True) # Copy files shutil.copy(best_model_path, os.path.join(save_dir, 'best_model.pt')) shutil.copy(final_model_path, os.path.join(save_dir, 'tree_disease_detector.pt')) # Copy training results results_png = 'runs/train/yolov8s_pdt/results.png' if os.path.exists(results_png): shutil.copy(results_png, os.path.join(save_dir, 'training_results.png')) print(f"Results saved to Google Drive: {save_dir}") except: print("Google Drive not mounted. Results saved locally.") # Cell 12: Summary print("\n=== Training Complete ===") print("Model: YOLOv8s") print("Dataset: PDT (Pests and Diseases Tree)") print(f"Best Model: {best_model_path}") print("The model is ready for tree disease detection!") # Test with your own image print("\nTo test with your own image:") print("detections = detect_tree_disease('path/to/your/image.jpg')") # Cell 1: Install Hugging Face Hub !pip install huggingface_hub # Cell 2: Login to Hugging Face from huggingface_hub import login, HfApi, create_repo import os import shutil # Login to Hugging Face (you'll need your token) # Get your token from: https://huggingface.co/settings/tokens login() # Cell 3: Prepare model files for upload # Create a directory for model files model_dir = "pdt_tree_disease_model" os.makedirs(model_dir, exist_ok=True) # Copy the trained model best_model_path = 'runs/train/yolov8s_pdt/weights/best.pt' if os.path.exists(best_model_path): shutil.copy(best_model_path, os.path.join(model_dir, "best.pt")) # Copy the final saved model if os.path.exists('tree_disease_detector.pt'): shutil.copy('tree_disease_detector.pt', os.path.join(model_dir, "tree_disease_detector.pt")) # Copy training results results_path = 'runs/train/yolov8s_pdt/results.png' if os.path.exists(results_path): shutil.copy(results_path, os.path.join(model_dir, "training_results.png")) # Copy confusion matrix if exists confusion_matrix_path = 'runs/train/yolov8s_pdt/confusion_matrix.png' if os.path.exists(confusion_matrix_path): shutil.copy(confusion_matrix_path, os.path.join(model_dir, "confusion_matrix.png")) # Copy other training plots for plot_file in ['F1_curve.png', 'P_curve.png', 'R_curve.png', 'PR_curve.png']: plot_path = f'runs/train/yolov8s_pdt/{plot_file}' if os.path.exists(plot_path): shutil.copy(plot_path, os.path.join(model_dir, plot_file)) # Cell 4: Create model card (README.md) model_card = """--- tags: - object-detection - yolov8 - tree-disease-detection - pdt-dataset library_name: ultralytics datasets: - qwer0213/PDT_dataset metrics: - mAP50 - mAP50-95 --- # YOLOv8 Tree Disease Detection Model This model is trained on the PDT (Pests and Diseases Tree) dataset for detecting unhealthy trees using YOLOv8. ## Model Description - **Architecture**: YOLOv8s - **Task**: Object Detection (Tree Disease Detection) - **Classes**: 1 (unhealthy) - **Input Size**: 640x640 - **Framework**: Ultralytics YOLOv8 ## Training Details - **Dataset**: PDT (Pests and Diseases Tree) dataset - **Training Images**: 4,536 - **Validation Images**: 567 - **Test Images**: 567 - **Epochs**: 50 - **Batch Size**: 16 - **Optimizer**: SGD - **Learning Rate**: 0.01 ## Performance Metrics | Metric | Value | |--------|-------| | mAP50 | 0.xxx | | mAP50-95 | 0.xxx | | Precision | 0.xxx | | Recall | 0.xxx | ## Usage ```python from ultralytics import YOLO # Load model model = YOLO('tree_disease_detector.pt') # Run inference results = model('path/to/image.jpg') # Process results for result in results: boxes = result.boxes if boxes is not None: for box in boxes: confidence = box.conf[0] bbox = box.xyxy[0].tolist() print(f"Unhealthy tree detected with confidence: {confidence}") Dataset This model was trained on the PDT dataset, which contains high-resolution UAV images of trees with pest and disease annotations. Citation bibtex@dataset{pdt_dataset, title={PDT: UAV Pests and Diseases Tree Dataset}, author={Zhou et al.}, year={2024}, publisher={HuggingFace} } License MIT License """ Fill in the actual metrics if 'metrics' in globals() and metrics is not None: model_card = model_card.replace('0.xxx', f'{metrics.box.map50:.3f}') model_card = model_card.replace('0.xxx', f'{metrics.box.map:.3f}') model_card = model_card.replace('0.xxx', f'{metrics.box.p.mean():.3f}') model_card = model_card.replace('0.xxx', f'{metrics.box.r.mean():.3f}') Save model card with open(os.path.join(model_dir, "README.md"), "w") as f: f.write(model_card) Cell 5: Create config file config_content = """# YOLOv8 Tree Disease Detection Configuration model_type: yolov8s task: detect nc: 1 # number of classes names: ['unhealthy'] # class names Input imgsz: 640 Inference settings conf: 0.25 # confidence threshold iou: 0.45 # IoU threshold for NMS """ with open(os.path.join(model_dir, "config.yaml"), "w") as f: f.write(config_content) Cell 6: Push to Hugging Face Hub from huggingface_hub import HfApi Initialize API api = HfApi() Create repository (replace 'your-username' with your HuggingFace username) repo_id = "your-username/yolov8-tree-disease-detection" # Change this! Create the repository try: create_repo( repo_id=repo_id, repo_type="model", exist_ok=True ) print(f"Repository created: https://huggingface.co/{repo_id}") except Exception as e: print(f"Repository might already exist or error: {e}") Upload all files in the model directory api.upload_folder( folder_path=model_dir, repo_id=repo_id, repo_type="model", ) print(f"Model uploaded successfully to: https://huggingface.co/{repo_id}") Cell 7: Create a simple inference script for users inference_script = """# Tree Disease Detection Inference from ultralytics import YOLO import cv2 import matplotlib.pyplot as plt Download and load model from Hugging Face model = YOLO('https://huggingface.co/{}/resolve/main/tree_disease_detector.pt') def detect_tree_disease(image_path): # Run inference results = model(image_path, conf=0.25) # Process results detections = [] for result in results: boxes = result.boxes if boxes is not None: for box in boxes: detection = { 'confidence': float(box.conf[0]), 'bbox': box.xyxy[0].tolist(), 'class': 'unhealthy' } detections.append(detection) # Visualize annotated_img = results[0].plot() plt.figure(figsize=(12, 8)) plt.imshow(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)) plt.axis('off') plt.title(f'Detected {len(detections)} unhealthy tree(s)') plt.show() return detections Example usage if name == "main": detections = detect_tree_disease('path/to/your/image.jpg') print(f"Found {len(detections)} unhealthy trees") """.format(repo_id) with open(os.path.join(model_dir, "inference.py"), "w") as f: f.write(inference_script) Upload the inference script api.upload_file( path_or_fileobj=os.path.join(model_dir, "inference.py"), path_in_repo="inference.py", repo_id=repo_id, repo_type="model", ) Cell 8: Create requirements.txt requirements = """ultralytics>=8.0.0 torch>=2.0.0 opencv-python>=4.8.0 matplotlib>=3.7.0 pillow>=10.0.0 """ with open(os.path.join(model_dir, "requirements.txt"), "w") as f: f.write(requirements) Upload requirements api.upload_file( path_or_fileobj=os.path.join(model_dir, "requirements.txt"), path_in_repo="requirements.txt", repo_id=repo_id, repo_type="model", ) print("\nModel successfully uploaded to Hugging Face!") print(f"View your model at: https://huggingface.co/{repo_id}") print("\nTo use your model:") print(f"model = YOLO('https://huggingface.co/{repo_id}/resolve/main/tree_disease_detector.pt')") ## Steps to upload your model: 1. **Get a Hugging Face token**: - Go to https://huggingface.co/settings/tokens - Create a new token with write permissions - Copy the token 2. **Replace placeholder values**: - Change `your-username` to your actual Hugging Face username - Update the metrics in the model card with actual values 3. **Run the cells** in order ## After uploading, others can use your model like this: ```python from ultralytics import YOLO # Load model directly from Hugging Face model = YOLO('https://huggingface.co/your-username/yolov8-tree-disease-detection/resolve/main/tree_disease_detector.pt') # Run inference results = model('image.jpg')