crop_desease_detection / crop_desease_detection.py

Upload folder using huggingface_hub

ddf9b49 verified about 2 months ago

17.5 kB

	# -- coding: utf-8 --
	"""crop_desease_detection.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1PCO8YxMl3tqzsbMVP1iiSylwED-u_VfW
	"""







	# Complete Pipeline for Tree Disease Detection with PDT Dataset

	# Cell 1: Install required packages
	!pip install ultralytics torch torchvision opencv-python matplotlib
	!pip install huggingface_hub

	import os
	import shutil
	import zipfile
	from ultralytics import YOLO
	import torch
	import cv2
	import matplotlib.pyplot as plt
	import numpy as np
	from huggingface_hub import snapshot_download
	from IPython.display import Image, display

	# Cell 2: Download the PDT dataset from HuggingFace
	print("Downloading PDT dataset from HuggingFace...")

	try:
	dataset_path = snapshot_download(
	repo_id='qwer0213/PDT_dataset',
	repo_type='dataset',
	local_dir='/content/PDT_dataset',
	resume_download=True
	)
	print(f"Dataset downloaded to: {dataset_path}")
	except Exception as e:
	print(f"Error downloading dataset: {e}")

	# Cell 3: Find and extract the zip file
	print("\nLooking for zip file in downloaded dataset...")

	# Find the zip file
	zip_file_path = None
	for root, dirs, files in os.walk('/content/PDT_dataset'):
	for file in files:
	if file.endswith('.zip'):
	zip_file_path = os.path.join(root, file)
	print(f"Found zip file: {zip_file_path}")
	break
	if zip_file_path:
	break

	if not zip_file_path:
	print("No zip file found in the downloaded dataset!")
	else:
	# Extract the zip file
	extract_path = '/content/PDT_dataset_extracted'
	os.makedirs(extract_path, exist_ok=True)

	print(f"Extracting {zip_file_path} to {extract_path}")
	with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
	zip_ref.extractall(extract_path)
	print("Extraction completed!")

	# Cell 4: Explore the extracted dataset structure
	print("\nExploring dataset structure...")

	def explore_dataset_structure(base_path):
	"""Explore and find the actual dataset structure"""
	dataset_info = {
	'yolo_txt_path': None,
	'voc_xml_path': None,
	'train_path': None,
	'val_path': None,
	'test_path': None
	}

	for root, dirs, files in os.walk(base_path):
	# Look for YOLO_txt directory
	if 'YOLO_txt' in root:
	dataset_info['yolo_txt_path'] = root
	print(f"Found YOLO_txt at: {root}")

	# Check for train/val/test
	for split in ['train', 'val', 'test']:
	split_path = os.path.join(root, split)
	if os.path.exists(split_path):
	dataset_info[f'{split}_path'] = split_path
	print(f"Found {split} at: {split_path}")

	# Look for VOC_xml directory
	if 'VOC_xml' in root:
	dataset_info['voc_xml_path'] = root
	print(f"Found VOC_xml at: {root}")

	return dataset_info

	dataset_info = explore_dataset_structure('/content/PDT_dataset_extracted')

	# Cell 5: Setup YOLO dataset from the PDT dataset
	def setup_yolo_dataset(dataset_info, output_dir='/content/PDT_yolo'):
	"""Setup YOLO dataset from the extracted PDT dataset"""
	print(f"\nSetting up YOLO dataset to {output_dir}")

	# Clean output directory
	if os.path.exists(output_dir):
	shutil.rmtree(output_dir)
	os.makedirs(output_dir, exist_ok=True)

	# Create directory structure
	for split in ['train', 'val', 'test']:
	os.makedirs(os.path.join(output_dir, 'images', split), exist_ok=True)
	os.makedirs(os.path.join(output_dir, 'labels', split), exist_ok=True)

	total_copied = 0

	# Process each split
	for split in ['train', 'val', 'test']:
	split_path = dataset_info[f'{split}_path']

	if not split_path or not os.path.exists(split_path):
	print(f"Warning: {split} split not found")
	continue

	print(f"\nProcessing {split} from: {split_path}")

	# Find images and labels directories
	img_dir = os.path.join(split_path, 'images')
	lbl_dir = os.path.join(split_path, 'labels')

	if not os.path.exists(img_dir) or not os.path.exists(lbl_dir):
	print(f"Warning: Could not find images or labels for {split}")
	continue

	# Copy images and labels
	img_files = [f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
	print(f"Found {len(img_files)} images in {split}")

	for img_file in img_files:
	# Copy image
	src_img = os.path.join(img_dir, img_file)
	dst_img = os.path.join(output_dir, 'images', split, img_file)
	shutil.copy2(src_img, dst_img)

	# Copy corresponding label
	base_name = os.path.splitext(img_file)[0]
	txt_file = base_name + '.txt'
	src_txt = os.path.join(lbl_dir, txt_file)
	dst_txt = os.path.join(output_dir, 'labels', split, txt_file)

	if os.path.exists(src_txt):
	shutil.copy2(src_txt, dst_txt)
	total_copied += 1

	# Create data.yaml
	data_yaml_content = f"""# PDT dataset configuration
	path: {os.path.abspath(output_dir)}
	train: images/train
	val: images/val
	test: images/test

	# Classes
	names:
	0: unhealthy
	nc: 1
	"""

	yaml_path = os.path.join(output_dir, 'data.yaml')
	with open(yaml_path, 'w') as f:
	f.write(data_yaml_content)

	print(f"\nDataset setup completed!")
	print(f"Total images copied: {total_copied}")

	# Verify the dataset
	for split in ['train', 'val', 'test']:
	img_dir = os.path.join(output_dir, 'images', split)
	lbl_dir = os.path.join(output_dir, 'labels', split)
	if os.path.exists(img_dir):
	img_count = len([f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))])
	lbl_count = len([f for f in os.listdir(lbl_dir) if f.endswith('.txt')])
	print(f"{split}: {img_count} images, {lbl_count} labels")

	return yaml_path

	# Setup the dataset
	data_yaml_path = setup_yolo_dataset(dataset_info)

	# Cell 6: Train the model
	print("\nStarting model training...")

	# Use YOLOv8s model
	model = YOLO('yolov8s.yaml')

	# Train the model
	results = model.train(
	data=data_yaml_path,
	epochs=50, # Adjust based on your needs
	imgsz=640,
	batch=16, # Adjust based on GPU memory
	name='yolov8s_pdt',
	patience=10,
	save=True,
	device='0' if torch.cuda.is_available() else 'cpu',
	workers=4,
	project='runs/train',
	exist_ok=True,
	pretrained=False,
	optimizer='SGD',
	lr0=0.01,
	momentum=0.9,
	weight_decay=0.001,
	verbose=True,
	plots=True,
	)

	print("Training completed!")

	# Cell 7: Evaluate the model
	print("\nEvaluating model performance...")

	# Load the best model
	best_model_path = 'runs/train/yolov8s_pdt/weights/best.pt'
	model = YOLO(best_model_path)

	# Validate
	metrics = model.val()

	print(f"\nValidation Metrics:")
	print(f"mAP50: {metrics.box.map50:.3f}")
	print(f"mAP50-95: {metrics.box.map:.3f}")
	print(f"Precision: {metrics.box.p.mean():.3f}")
	print(f"Recall: {metrics.box.r.mean():.3f}")

	# Cell 8: Test the model
	print("\nTesting on sample images...")

	# Test on validation images
	val_img_dir = '/content/PDT_yolo/images/val'
	val_images = [f for f in os.listdir(val_img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))][:5]

	fig, axes = plt.subplots(2, 3, figsize=(18, 12))
	axes = axes.ravel()

	for i, img_name in enumerate(val_images[:6]):
	img_path = os.path.join(val_img_dir, img_name)

	# Run inference
	results = model(img_path, conf=0.25)

	# Plot results
	img_with_boxes = results[0].plot()
	axes[i].imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB))
	axes[i].set_title(f'{img_name}')
	axes[i].axis('off')

	# Hide empty subplot
	if len(val_images) < 6:
	axes[5].axis('off')

	plt.tight_layout()
	plt.show()

	# Cell 9: Create inference function
	def detect_tree_disease(image_path, conf_threshold=0.25):
	"""Detect unhealthy trees in an image"""
	results = model(image_path, conf=conf_threshold)

	detections = []
	for result in results:
	boxes = result.boxes
	if boxes is not None:
	for box in boxes:
	detection = {
	'confidence': float(box.conf[0]),
	'bbox': box.xyxy[0].tolist(),
	'class': 'unhealthy'
	}
	detections.append(detection)

	# Visualize
	img_with_boxes = results[0].plot()
	plt.figure(figsize=(12, 8))
	plt.imshow(cv2.cvtColor(img_with_boxes, cv2.COLOR_BGR2RGB))
	plt.axis('off')
	plt.title(f'Detected {len(detections)} unhealthy tree(s)')
	plt.show()

	return detections

	# Cell 10: Save the model
	print("\nSaving model...")
	final_model_path = 'tree_disease_detector.pt'
	model.save(final_model_path)
	print(f"Model saved to: {final_model_path}")

	# Cell 11: Save to Google Drive (optional)
	from google.colab import drive

	try:
	drive.mount('/content/drive')

	save_dir = '/content/drive/MyDrive/tree_disease_detection'
	os.makedirs(save_dir, exist_ok=True)

	# Copy files
	shutil.copy(best_model_path, os.path.join(save_dir, 'best_model.pt'))
	shutil.copy(final_model_path, os.path.join(save_dir, 'tree_disease_detector.pt'))

	# Copy training results
	results_png = 'runs/train/yolov8s_pdt/results.png'
	if os.path.exists(results_png):
	shutil.copy(results_png, os.path.join(save_dir, 'training_results.png'))

	print(f"Results saved to Google Drive: {save_dir}")
	except:
	print("Google Drive not mounted. Results saved locally.")

	# Cell 12: Summary
	print("\n=== Training Complete ===")
	print("Model: YOLOv8s")
	print("Dataset: PDT (Pests and Diseases Tree)")
	print(f"Best Model: {best_model_path}")
	print("The model is ready for tree disease detection!")

	# Test with your own image
	print("\nTo test with your own image:")
	print("detections = detect_tree_disease('path/to/your/image.jpg')")









	# Cell 1: Install Hugging Face Hub
	!pip install huggingface_hub

	# Cell 2: Login to Hugging Face
	from huggingface_hub import login, HfApi, create_repo
	import os
	import shutil

	# Login to Hugging Face (you'll need your token)
	# Get your token from: https://huggingface.co/settings/tokens
	login()

	# Cell 3: Prepare model files for upload
	# Create a directory for model files
	model_dir = "pdt_tree_disease_model"
	os.makedirs(model_dir, exist_ok=True)

	# Copy the trained model
	best_model_path = 'runs/train/yolov8s_pdt/weights/best.pt'
	if os.path.exists(best_model_path):
	shutil.copy(best_model_path, os.path.join(model_dir, "best.pt"))

	# Copy the final saved model
	if os.path.exists('tree_disease_detector.pt'):
	shutil.copy('tree_disease_detector.pt', os.path.join(model_dir, "tree_disease_detector.pt"))

	# Copy training results
	results_path = 'runs/train/yolov8s_pdt/results.png'
	if os.path.exists(results_path):
	shutil.copy(results_path, os.path.join(model_dir, "training_results.png"))

	# Copy confusion matrix if exists
	confusion_matrix_path = 'runs/train/yolov8s_pdt/confusion_matrix.png'
	if os.path.exists(confusion_matrix_path):
	shutil.copy(confusion_matrix_path, os.path.join(model_dir, "confusion_matrix.png"))

	# Copy other training plots
	for plot_file in ['F1_curve.png', 'P_curve.png', 'R_curve.png', 'PR_curve.png']:
	plot_path = f'runs/train/yolov8s_pdt/{plot_file}'
	if os.path.exists(plot_path):
	shutil.copy(plot_path, os.path.join(model_dir, plot_file))

	# Cell 4: Create model card (README.md)
	model_card = """---
	tags:
	- object-detection
	- yolov8
	- tree-disease-detection
	- pdt-dataset
	library_name: ultralytics
	datasets:
	- qwer0213/PDT_dataset
	metrics:
	- mAP50
	- mAP50-95
	---

	# YOLOv8 Tree Disease Detection Model

	This model is trained on the PDT (Pests and Diseases Tree) dataset for detecting unhealthy trees using YOLOv8.

	## Model Description

	- Architecture: YOLOv8s
	- Task: Object Detection (Tree Disease Detection)
	- Classes: 1 (unhealthy)
	- Input Size: 640x640
	- Framework: Ultralytics YOLOv8

	## Training Details

	- Dataset: PDT (Pests and Diseases Tree) dataset
	- Training Images: 4,536
	- Validation Images: 567
	- Test Images: 567
	- Epochs: 50
	- Batch Size: 16
	- Optimizer: SGD
	- Learning Rate: 0.01

	## Performance Metrics

	\| Metric \| Value \|
	\|--------\|-------\|
	\| mAP50 \| 0.xxx \|
	\| mAP50-95 \| 0.xxx \|
	\| Precision \| 0.xxx \|
	\| Recall \| 0.xxx \|

	## Usage

	```python
	from ultralytics import YOLO

	# Load model
	model = YOLO('tree_disease_detector.pt')

	# Run inference
	results = model('path/to/image.jpg')

	# Process results
	for result in results:
	boxes = result.boxes
	if boxes is not None:
	for box in boxes:
	confidence = box.conf[0]
	bbox = box.xyxy[0].tolist()
	print(f"Unhealthy tree detected with confidence: {confidence}")
	Dataset
	This model was trained on the PDT dataset, which contains high-resolution UAV images of trees with pest and disease annotations.
	Citation
	bibtex@dataset{pdt_dataset,
	title={PDT: UAV Pests and Diseases Tree Dataset},
	author={Zhou et al.},
	year={2024},
	publisher={HuggingFace}
	}
	License
	MIT License
	"""
	Fill in the actual metrics
	if 'metrics' in globals() and metrics is not None:
	model_card = model_card.replace('0.xxx', f'{metrics.box.map50:.3f}')
	model_card = model_card.replace('0.xxx', f'{metrics.box.map:.3f}')
	model_card = model_card.replace('0.xxx', f'{metrics.box.p.mean():.3f}')
	model_card = model_card.replace('0.xxx', f'{metrics.box.r.mean():.3f}')
	Save model card
	with open(os.path.join(model_dir, "README.md"), "w") as f:
	f.write(model_card)
	Cell 5: Create config file
	config_content = """# YOLOv8 Tree Disease Detection Configuration
	model_type: yolov8s
	task: detect
	nc: 1 # number of classes
	names: ['unhealthy'] # class names
	Input
	imgsz: 640
	Inference settings
	conf: 0.25 # confidence threshold
	iou: 0.45 # IoU threshold for NMS
	"""
	with open(os.path.join(model_dir, "config.yaml"), "w") as f:
	f.write(config_content)
	Cell 6: Push to Hugging Face Hub
	from huggingface_hub import HfApi
	Initialize API
	api = HfApi()
	Create repository (replace 'your-username' with your HuggingFace username)
	repo_id = "your-username/yolov8-tree-disease-detection" # Change this!
	Create the repository
	try:
	create_repo(
	repo_id=repo_id,
	repo_type="model",
	exist_ok=True
	)
	print(f"Repository created: https://huggingface.co/{repo_id}")
	except Exception as e:
	print(f"Repository might already exist or error: {e}")
	Upload all files in the model directory
	api.upload_folder(
	folder_path=model_dir,
	repo_id=repo_id,
	repo_type="model",
	)
	print(f"Model uploaded successfully to: https://huggingface.co/{repo_id}")
	Cell 7: Create a simple inference script for users
	inference_script = """# Tree Disease Detection Inference
	from ultralytics import YOLO
	import cv2
	import matplotlib.pyplot as plt
	Download and load model from Hugging Face
	model = YOLO('https://huggingface.co/{}/resolve/main/tree_disease_detector.pt')
	def detect_tree_disease(image_path):
	# Run inference
	results = model(image_path, conf=0.25)
	# Process results
	detections = []
	for result in results:
	boxes = result.boxes
	if boxes is not None:
	for box in boxes:
	detection = {
	'confidence': float(box.conf[0]),
	'bbox': box.xyxy[0].tolist(),
	'class': 'unhealthy'
	}
	detections.append(detection)

	# Visualize
	annotated_img = results[0].plot()
	plt.figure(figsize=(12, 8))
	plt.imshow(cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB))
	plt.axis('off')
	plt.title(f'Detected {len(detections)} unhealthy tree(s)')
	plt.show()

	return detections
	Example usage
	if name == "main":
	detections = detect_tree_disease('path/to/your/image.jpg')
	print(f"Found {len(detections)} unhealthy trees")
	""".format(repo_id)
	with open(os.path.join(model_dir, "inference.py"), "w") as f:
	f.write(inference_script)
	Upload the inference script
	api.upload_file(
	path_or_fileobj=os.path.join(model_dir, "inference.py"),
	path_in_repo="inference.py",
	repo_id=repo_id,
	repo_type="model",
	)
	Cell 8: Create requirements.txt
	requirements = """ultralytics>=8.0.0
	torch>=2.0.0
	opencv-python>=4.8.0
	matplotlib>=3.7.0
	pillow>=10.0.0
	"""
	with open(os.path.join(model_dir, "requirements.txt"), "w") as f:
	f.write(requirements)
	Upload requirements
	api.upload_file(
	path_or_fileobj=os.path.join(model_dir, "requirements.txt"),
	path_in_repo="requirements.txt",
	repo_id=repo_id,
	repo_type="model",
	)
	print("\nModel successfully uploaded to Hugging Face!")
	print(f"View your model at: https://huggingface.co/{repo_id}")
	print("\nTo use your model:")
	print(f"model = YOLO('https://huggingface.co/{repo_id}/resolve/main/tree_disease_detector.pt')")

	## Steps to upload your model:

	1. Get a Hugging Face token:
	- Go to https://huggingface.co/settings/tokens
	- Create a new token with write permissions
	- Copy the token

	2. Replace placeholder values:
	- Change `your-username` to your actual Hugging Face username
	- Update the metrics in the model card with actual values

	3. Run the cells in order

	## After uploading, others can use your model like this:

	```python
	from ultralytics import YOLO

	# Load model directly from Hugging Face
	model = YOLO('https://huggingface.co/your-username/yolov8-tree-disease-detection/resolve/main/tree_disease_detector.pt')

	# Run inference
	results = model('image.jpg')