# -*- coding: utf-8 -*-
"""crop_desease_detection.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1PCO8YxMl3tqzsbMVP1iiSylwED-u_VfW
"""


# Complete Pipeline for Tree Disease Detection with PDT Dataset

# Cell 1: Install required packages
!pip install ultralytics torch torchvision opencv-python matplotlib
!pip install huggingface_hub

import os
import shutil
import zipfile
from ultralytics import YOLO
import torch
import cv2
import matplotlib.pyplot as plt
import numpy as np
from huggingface_hub import snapshot_download
from IPython.display import Image, display

# Cell 2: Download the PDT dataset from HuggingFace
print("Downloading PDT dataset from HuggingFace...")

try:
    dataset_path = snapshot_download(
        repo_id='qwer0213/PDT_dataset',
        repo_type='dataset',
        local_dir='/content/PDT_dataset',
        resume_download=True
    )
    print(f"Dataset downloaded to: {dataset_path}")
except Exception as e:
    print(f"Error downloading dataset: {e}")

# Cell 3: Find and extract the zip file
print("\nLooking for zip file in downloaded dataset...")

# Find the zip file
zip_file_path = None
for root, dirs, files in os.walk('/content/PDT_dataset'):
    for file in files:
        if file.endswith('.zip'):
            zip_file_path = os.path.join(root, file)
            print(f"Found zip file: {zip_file_path}")
            break
    if zip_file_path:
        break

if not zip_file_path:
    print("No zip file found in the downloaded dataset!")
else:
    # Extract the zip file
    extract_path = '/content/PDT_dataset_extracted'
    os.makedirs(extract_path, exist_ok=True)

    print(f"Extracting {zip_file_path} to {extract_path}")
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Extraction completed!")

# Cell 4: Explore the extracted dataset structure
print("\nExploring dataset structure...")

def explore_dataset_structure(base_path):
    """Explore and find the actual dataset structure"""
    dataset_info = {
        'yolo_txt_path': None,
        'voc_xml_path': None,
        'train_path': None,
        'val_path': None,
        'test_path': None
    }

    for root, dirs, files in os.walk(base_path):
        # Look for YOLO_txt directory
        if 'YOLO_txt' in root:
            dataset_info['yolo_txt_path'] = root
            print(f"Found YOLO_txt at: {root}")

            # Check for train/val/test
            for split in ['train', 'val', 'test']:
                split_path = os.path.join(root, split)
                if os.path.exists(split_path):
                    dataset_info[f'{split}_path'] = split_path
                    print(f"Found {split} at: {split_path}")

        # Look for VOC_xml directory
        if 'VOC_xml' in root:
            dataset_info['voc_xml_path'] = root
            print(f"Found VOC_xml at: {root}")

    return dataset_info

dataset_info = explore_dataset_structure('/content/PDT_dataset_extracted')

# Cell 5: Setup YOLO dataset from the PDT dataset
def setup_yolo_dataset(dataset_info, output_dir='/content/PDT_yolo'):
    """Setup YOLO dataset from the extracted PDT dataset"""
    print(f"\nSetting up YOLO dataset to {output_dir}")

    # Clean output directory
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir, exist_ok=True)

    # Create directory structure
    for split in ['train', 'val', 'test']:
        os.makedirs(os.path.join(output_dir, 'images', split), exist_ok=True)
        os.makedirs(os.path.join(output_dir, 'labels', split), exist_ok=True)

    total_copied = 0

    # Process each split
    for split in ['train', 'val', 'test']:
        split_path = dataset_info[f'{split}_path']

        if not split_path or not os.path.exists(split_path):
            print(f"Warning: {split} split not found")
            continue

        print(f"\nProcessing {split} from: {split_path}")

        # Find images and labels directories
        img_dir = os.path.join(split_path, 'images')
        lbl_dir = os.path.join(split_path, 'labels')

        if not os.path.exists(img_dir) or not os.path.exists(lbl_dir):
            print(f"Warning: Could not find images or labels for {split}")
            continue

        # Copy images and labels
        img_files = [f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
        print(f"Found {len(img_files)} images in {split}")

        for img_file in img_files:
            # Copy image
            src_img = os.path.join(img_dir, img_file)
            dst_img = os.path.join(output_dir, 'images', split, img_file)
            shutil.copy2(src_img, dst_img)

            # Copy corresponding label
            base_name = os.path.splitext(img_file)[0]
            txt_file = base_name + '.txt'
            src_txt = os.path.join(lbl_dir, txt_file)
            dst_txt = os.path.join(output_dir, 'labels', split, txt_file)

            if os.path.exists(src_txt):
                shutil.copy2(src_txt, dst_txt)
                total_copied += 1

    # Create data.yaml
    data_yaml_content = f"""# PDT dataset configuration
path: {os.path.abspath(output_dir)}
train: images/train
val: images/val
test: images/test

# Classes
names:
  0: unhealthy
nc: 1
"""

    yaml_path = os.path.join(output_dir, 'data.yaml')
    with open(yaml_path, 'w') as f:
        f.write(data_yaml_content)

    print(f"\nDataset setup completed!")
    print(f"Total images copied: {total_copied}")

    # Verify the dataset
    for split in ['train', 'val', 'test']:
        img_dir = os.path.join(output_dir, 'images', split)
        lbl_dir = os.path.join(output_dir, 'labels', split)
        if os.path.exists(img_dir):
            img_count = len([f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))])
            lbl_count = len([f for f in os.listdir(lbl_dir) if f.endswith('.txt')])
            print(f"{split}: {img_count} images, {lbl_count} labels")

    return yaml_path

# Setup the dataset
data_yaml_path = setup_yolo_dataset(dataset_info)

# Cell 6: Train the model
print("\nStarting model training...")

# Use YOLOv8s model
model = YOLO('yolov8s.yaml')

# Train the model
results = model.train(
    data=data_yaml_path,
    epochs=50,  # Adjust based on your needs
    imgsz=640,
    batch=16,  # Adjust based on GPU memory
    name='yolov8s_pdt',
    patience=10,
    save=True,
    device='0' if torch.cuda.is_available() else 'cpu',
    workers=4,
    project='runs/train',
    exist_ok=True,
    pretrained=False,
    optimizer='SGD',
    lr0=0.01,
    momentum=0.9,
    weight_decay=0.001,
    verbose=True,
    plots=True,
)

print("Training completed!")

# Cell 7: Evaluate the model
print("\nEvaluating model performance...")

# Load the best model
best_model_path = 'runs/train/yolov8s_pdt/weights/best.pt'
model = YOLO(best_model_path)

# Validate
metrics = model.val()

print(f"\nValidation Metrics:")
print(f"mAP50: {metrics.box.map50:.3f}")
print(f"mAP50-95: {metrics.box.map:.3f}")
print(f"Precision: {metrics.box.p.mean():.3f}")
print(f"Recall: {metrics.box.r.mean():.3f}")

# Cell 8: Test the model
print("\nTesting on sample images...")

# Test on validation images
val_img_dir = '/content/PDT_yolo/images/val'
val_images = [f for f in os.listdir(val_img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))][:5]

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.ravel()

for i, img_name in enumerate(val_images[:6]):
    img_path = os.path.join(val_img_dir, img_name)

    # Run inference
    results = model(img_path, conf=0.25)

    # Plot results
    img_with_boxes = results[0].plot()
    axes[i].imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB))
    axes[i].set_title(f'{img_name}')
    axes[i].axis('off')

# Hide empty subplot
if len(val_images) < 6:
    axes[5].axis('off')

plt.tight_layout()
plt.show()

# Cell 9: Create inference function
def detect_tree_disease(image_path, conf_threshold=0.25):
    """Detect unhealthy trees in an image"""
    results = model(image_path, conf=conf_threshold)

    detections = []
    for result in results:
        boxes = result.boxes
        if boxes is not None:
            for box in boxes:
                detection = {
                    'confidence': float(box.conf[0]),
                    'bbox': box.xyxy[0].tolist(),
                    'class': 'unhealthy'
                }
                detections.append(detection)

    # Visualize
    img_with_boxes = results[0].plot()
    plt.figure(figsize=(12, 8))
    plt.imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.title(f'Detected {len(detections)} unhealthy tree(s)')
    plt.show()

    return detections

# Cell 10: Save the model
print("\nSaving model...")
final_model_path = 'tree_disease_detector.pt'
model.save(final_model_path)
print(f"Model saved to: {final_model_path}")

# Cell 11: Save to Google Drive (optional)
from google.colab import drive

try:
    drive.mount('/content/drive')

    save_dir = '/content/drive/MyDrive/tree_disease_detection'
    os.makedirs(save_dir, exist_ok=True)

    # Copy files
    shutil.copy(best_model_path, os.path.join(save_dir, 'best_model.pt'))
    shutil.copy(final_model_path, os.path.join(save_dir, 'tree_disease_detector.pt'))

    # Copy training results
    results_png = 'runs/train/yolov8s_pdt/results.png'
    if os.path.exists(results_png):
        shutil.copy(results_png, os.path.join(save_dir, 'training_results.png'))

    print(f"Results saved to Google Drive: {save_dir}")
except:
    print("Google Drive not mounted. Results saved locally.")

# Cell 12: Summary
print("\n=== Training Complete ===")
print("Model: YOLOv8s")
print("Dataset: PDT (Pests and Diseases Tree)")
print(f"Best Model: {best_model_path}")
print("The model is ready for tree disease detection!")

# Test with your own image
print("\nTo test with your own image:")
print("detections = detect_tree_disease('path/to/your/image.jpg')")


# Cell 1: Install Hugging Face Hub
!pip install huggingface_hub

# Cell 2: Login to Hugging Face
from huggingface_hub import login, HfApi, create_repo
import os
import shutil

# Login to Hugging Face (you'll need your token)
# Get your token from: https://huggingface.co/settings/tokens
login()

# Cell 3: Prepare model files for upload
# Create a directory for model files
model_dir = "pdt_tree_disease_model"
os.makedirs(model_dir, exist_ok=True)

# Copy the trained model
best_model_path = 'runs/train/yolov8s_pdt/weights/best.pt'
if os.path.exists(best_model_path):
    shutil.copy(best_model_path, os.path.join(model_dir, "best.pt"))

# Copy the final saved model
if os.path.exists('tree_disease_detector.pt'):
    shutil.copy('tree_disease_detector.pt', os.path.join(model_dir, "tree_disease_detector.pt"))

# Copy training results
results_path = 'runs/train/yolov8s_pdt/results.png'
if os.path.exists(results_path):
    shutil.copy(results_path, os.path.join(model_dir, "training_results.png"))

# Copy confusion matrix if exists
confusion_matrix_path = 'runs/train/yolov8s_pdt/confusion_matrix.png'
if os.path.exists(confusion_matrix_path):
    shutil.copy(confusion_matrix_path, os.path.join(model_dir, "confusion_matrix.png"))

# Copy other training plots
for plot_file in ['F1_curve.png', 'P_curve.png', 'R_curve.png', 'PR_curve.png']:
    plot_path = f'runs/train/yolov8s_pdt/{plot_file}'
    if os.path.exists(plot_path):
        shutil.copy(plot_path, os.path.join(model_dir, plot_file))

# Cell 4: Create model card (README.md)
model_card = """---
tags:
- object-detection
- yolov8
- tree-disease-detection
- pdt-dataset
library_name: ultralytics
datasets:
- qwer0213/PDT_dataset
metrics:
- mAP50
- mAP50-95
---

# YOLOv8 Tree Disease Detection Model

This model is trained on the PDT (Pests and Diseases Tree) dataset for detecting unhealthy trees using YOLOv8.

## Model Description

- **Architecture**: YOLOv8s
- **Task**: Object Detection (Tree Disease Detection)
- **Classes**: 1 (unhealthy)
- **Input Size**: 640x640
- **Framework**: Ultralytics YOLOv8

## Training Details

- **Dataset**: PDT (Pests and Diseases Tree) dataset
- **Training Images**: 4,536
- **Validation Images**: 567
- **Test Images**: 567
- **Epochs**: 50
- **Batch Size**: 16
- **Optimizer**: SGD
- **Learning Rate**: 0.01

## Performance Metrics

| Metric | Value |
|--------|-------|
| mAP50 | 0.xxx |
| mAP50-95 | 0.xxx |
| Precision | 0.xxx |
| Recall | 0.xxx |

## Usage

```python
from ultralytics import YOLO

# Load model
model = YOLO('tree_disease_detector.pt')

# Run inference
results = model('path/to/image.jpg')

# Process results
for result in results:
    boxes = result.boxes
    if boxes is not None:
        for box in boxes:
            confidence = box.conf[0]
            bbox = box.xyxy[0].tolist()
            print(f"Unhealthy tree detected with confidence: {confidence}")
Dataset
This model was trained on the PDT dataset, which contains high-resolution UAV images of trees with pest and disease annotations.
Citation
bibtex@dataset{pdt_dataset,
  title={PDT: UAV Pests and Diseases Tree Dataset},
  author={Zhou et al.},
  year={2024},
  publisher={HuggingFace}
}
License
MIT License
"""
Fill in the actual metrics
if 'metrics' in globals() and metrics is not None:
model_card = model_card.replace('0.xxx', f'{metrics.box.map50:.3f}')
model_card = model_card.replace('0.xxx', f'{metrics.box.map:.3f}')
model_card = model_card.replace('0.xxx', f'{metrics.box.p.mean():.3f}')
model_card = model_card.replace('0.xxx', f'{metrics.box.r.mean():.3f}')
Save model card
with open(os.path.join(model_dir, "README.md"), "w") as f:
f.write(model_card)
Cell 5: Create config file
config_content = """# YOLOv8 Tree Disease Detection Configuration
model_type: yolov8s
task: detect
nc: 1  # number of classes
names: ['unhealthy']  # class names
Input
imgsz: 640
Inference settings
conf: 0.25  # confidence threshold
iou: 0.45   # IoU threshold for NMS
"""
with open(os.path.join(model_dir, "config.yaml"), "w") as f:
f.write(config_content)
Cell 6: Push to Hugging Face Hub
from huggingface_hub import HfApi
Initialize API
api = HfApi()
Create repository (replace 'your-username' with your HuggingFace username)
repo_id = "your-username/yolov8-tree-disease-detection"  # Change this!
Create the repository
try:
create_repo(
repo_id=repo_id,
repo_type="model",
exist_ok=True
)
print(f"Repository created: https://huggingface.co/{repo_id}")
except Exception as e:
print(f"Repository might already exist or error: {e}")
Upload all files in the model directory
api.upload_folder(
folder_path=model_dir,
repo_id=repo_id,
repo_type="model",
)
print(f"Model uploaded successfully to: https://huggingface.co/{repo_id}")
Cell 7: Create a simple inference script for users
inference_script = """# Tree Disease Detection Inference
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
Download and load model from Hugging Face
model = YOLO('https://huggingface.co/{}/resolve/main/tree_disease_detector.pt')
def detect_tree_disease(image_path):
# Run inference
results = model(image_path, conf=0.25)
# Process results
detections = []
for result in results:
    boxes = result.boxes
    if boxes is not None:
        for box in boxes:
            detection = {
                'confidence': float(box.conf[0]),
                'bbox': box.xyxy[0].tolist(),
                'class': 'unhealthy'
            }
            detections.append(detection)

# Visualize
annotated_img = results[0].plot()
plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.title(f'Detected {len(detections)} unhealthy tree(s)')
plt.show()

return detections
Example usage
if name == "main":
detections = detect_tree_disease('path/to/your/image.jpg')
print(f"Found {len(detections)} unhealthy trees")
""".format(repo_id)
with open(os.path.join(model_dir, "inference.py"), "w") as f:
f.write(inference_script)
Upload the inference script
api.upload_file(
path_or_fileobj=os.path.join(model_dir, "inference.py"),
path_in_repo="inference.py",
repo_id=repo_id,
repo_type="model",
)
Cell 8: Create requirements.txt
requirements = """ultralytics>=8.0.0
torch>=2.0.0
opencv-python>=4.8.0
matplotlib>=3.7.0
pillow>=10.0.0
"""
with open(os.path.join(model_dir, "requirements.txt"), "w") as f:
f.write(requirements)
Upload requirements
api.upload_file(
path_or_fileobj=os.path.join(model_dir, "requirements.txt"),
path_in_repo="requirements.txt",
repo_id=repo_id,
repo_type="model",
)
print("\nModel successfully uploaded to Hugging Face!")
print(f"View your model at: https://huggingface.co/{repo_id}")
print("\nTo use your model:")
print(f"model = YOLO('https://huggingface.co/{repo_id}/resolve/main/tree_disease_detector.pt')")

## Steps to upload your model:

1. **Get a Hugging Face token**:
   - Go to https://huggingface.co/settings/tokens
   - Create a new token with write permissions
   - Copy the token

2. **Replace placeholder values**:
   - Change `your-username` to your actual Hugging Face username
   - Update the metrics in the model card with actual values

3. **Run the cells** in order

## After uploading, others can use your model like this:

```python
from ultralytics import YOLO

# Load model directly from Hugging Face
model = YOLO('https://huggingface.co/your-username/yolov8-tree-disease-detection/resolve/main/tree_disease_detector.pt')

# Run inference
results = model('image.jpg')