Upload folder using huggingface_hub
Browse files- .DS_Store +0 -0
- checkpoints/complete_diffusion_model.pth +3 -0
- checkpoints/diffusion_model_final.pth +3 -0
- checkpoints/inference_example.py +29 -0
- checkpoints/model_info.json +36 -0
- cifar10-diffusion-model.zip +3 -0
- implementation.ipynb +0 -0
- readme.md +92 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
checkpoints/complete_diffusion_model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4266de3549124b61530bf87d88d108d0dca3602161f47a9a0979af9fd0d76c71
|
3 |
+
size 67281530
|
checkpoints/diffusion_model_final.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3766a051e2774e04e0d07f33b86faf4e14581077660e8882851a3016c23f2c8
|
3 |
+
size 201861354
|
checkpoints/inference_example.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Inference script for the trained diffusion model
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch.nn.functional as F
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
from tqdm import tqdm
|
8 |
+
import math
|
9 |
+
|
10 |
+
# [Copy all the model architecture classes here - TimeEmbedding, ResidualBlock, etc.]
|
11 |
+
|
12 |
+
def load_model(checkpoint_path, device='cuda'):
|
13 |
+
"""Load the trained diffusion model"""
|
14 |
+
checkpoint = torch.load(checkpoint_path, map_location=device)
|
15 |
+
|
16 |
+
# Initialize model with saved config
|
17 |
+
model = SimpleUNet(**checkpoint['model_config'])
|
18 |
+
model.load_state_dict(checkpoint['model_state_dict'])
|
19 |
+
model.to(device)
|
20 |
+
model.eval()
|
21 |
+
|
22 |
+
# Initialize scheduler
|
23 |
+
scheduler = DDPMScheduler(**checkpoint['diffusion_config'], device=device)
|
24 |
+
|
25 |
+
return model, scheduler, checkpoint['model_info']
|
26 |
+
|
27 |
+
# Usage example:
|
28 |
+
# model, scheduler, info = load_model('complete_diffusion_model.pth')
|
29 |
+
# generated_images = generate_images(model, scheduler, num_images=4)
|
checkpoints/model_info.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_name": "CIFAR-10 Diffusion Model",
|
3 |
+
"architecture": "SimpleUNet",
|
4 |
+
"dataset": "CIFAR-10",
|
5 |
+
"training_details": {
|
6 |
+
"epochs": 20,
|
7 |
+
"batch_size": 128,
|
8 |
+
"learning_rate": 0.0001,
|
9 |
+
"optimizer": "AdamW",
|
10 |
+
"scheduler": "CosineAnnealingLR",
|
11 |
+
"parameters": 16808835,
|
12 |
+
"training_time_minutes": 14.54,
|
13 |
+
"final_loss": 0.0363,
|
14 |
+
"best_loss": 0.0358
|
15 |
+
},
|
16 |
+
"model_config": {
|
17 |
+
"in_channels": 3,
|
18 |
+
"out_channels": 3,
|
19 |
+
"time_emb_dim": 128,
|
20 |
+
"image_size": 32
|
21 |
+
},
|
22 |
+
"diffusion_config": {
|
23 |
+
"num_timesteps": 1000,
|
24 |
+
"beta_start": 0.0001,
|
25 |
+
"beta_end": 0.02,
|
26 |
+
"schedule": "linear"
|
27 |
+
},
|
28 |
+
"hardware": {
|
29 |
+
"gpu": "NVIDIA GeForce RTX 3060",
|
30 |
+
"vram_used": "0.43 GB",
|
31 |
+
"total_vram": "11.66 GB"
|
32 |
+
},
|
33 |
+
"created_date": "2025-07-19T17:59:48.665409",
|
34 |
+
"framework": "PyTorch",
|
35 |
+
"python_version": "3.12"
|
36 |
+
}
|
cifar10-diffusion-model.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:514be590d30a8d5a8207e3f18cb7fd46d4aebbc8fb2130df30645e20dff9b412
|
3 |
+
size 246459190
|
implementation.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
readme.md
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# CIFAR-10 Diffusion Model
|
2 |
+
|
3 |
+
🎨 **A diffusion model trained from scratch on CIFAR-10 dataset**
|
4 |
+
|
5 |
+
## Model Details
|
6 |
+
- **Architecture**: SimpleUNet with 16.8M parameters
|
7 |
+
- **Dataset**: CIFAR-10 (50,000 training images)
|
8 |
+
- **Training Time**: 14.54 minutes on RTX 3060
|
9 |
+
- **Final Loss**: 0.0363
|
10 |
+
- **Image Size**: 32x32 RGB
|
11 |
+
- **Framework**: PyTorch
|
12 |
+
|
13 |
+
## Quick Start
|
14 |
+
|
15 |
+
```python
|
16 |
+
import torch
|
17 |
+
from model import SimpleUNet, DDPMScheduler, generate_images
|
18 |
+
|
19 |
+
# Load the trained model
|
20 |
+
checkpoint = torch.load('complete_diffusion_model.pth')
|
21 |
+
model = SimpleUNet(**checkpoint['model_config'])
|
22 |
+
model.load_state_dict(checkpoint['model_state_dict'])
|
23 |
+
model.eval()
|
24 |
+
|
25 |
+
# Initialize scheduler
|
26 |
+
scheduler = DDPMScheduler(**checkpoint['diffusion_config'])
|
27 |
+
|
28 |
+
# Generate images
|
29 |
+
generated_images = generate_images(model, scheduler, num_images=8)
|
30 |
+
```
|
31 |
+
|
32 |
+
## Installation
|
33 |
+
|
34 |
+
```bash
|
35 |
+
pip install torch>=2.0.0 torchvision>=0.15.0 matplotlib tqdm pillow numpy
|
36 |
+
```
|
37 |
+
|
38 |
+
## Files Included
|
39 |
+
- `complete_diffusion_model.pth` - Complete model with config (64MB)
|
40 |
+
- `model_info.json` - Training details and metadata
|
41 |
+
- `diffusion_model_final.pth` - Final training checkpoint (64MB)
|
42 |
+
- `inference_example.py` - Ready-to-use inference script
|
43 |
+
|
44 |
+
## Training Details
|
45 |
+
- **Epochs**: 20
|
46 |
+
- **Batch Size**: 128
|
47 |
+
- **Learning Rate**: 1e-4 (CosineAnnealingLR)
|
48 |
+
- **Optimizer**: AdamW
|
49 |
+
- **GPU**: NVIDIA RTX 3060 (0.43GB VRAM used)
|
50 |
+
- **Loss Reduction**: 73% (from 0.1349 to 0.0363)
|
51 |
+
|
52 |
+
## Hardware Requirements
|
53 |
+
- **Minimum**: 1GB VRAM for inference
|
54 |
+
- **Recommended**: 2GB+ VRAM for training extensions
|
55 |
+
- **CPU**: Works but slower
|
56 |
+
|
57 |
+
## Results
|
58 |
+
The model generates colorful abstract patterns that capture CIFAR-10's color distributions.
|
59 |
+
With more training epochs (50-100), it should produce more recognizable objects.
|
60 |
+
|
61 |
+
## Improvements
|
62 |
+
To get better results:
|
63 |
+
1. **Train longer**: 50-100 epochs instead of 20
|
64 |
+
2. **Larger model**: Increase channels/layers
|
65 |
+
3. **Advanced sampling**: DDIM, DPM-Solver
|
66 |
+
4. **Better datasets**: CelebA, ImageNet
|
67 |
+
5. **Learning rate**: Experiment with schedules
|
68 |
+
|
69 |
+
## Model Architecture
|
70 |
+
- **U-Net based** with ResNet blocks
|
71 |
+
- **Time embedding** for diffusion timesteps
|
72 |
+
- **Attention layers** at multiple resolutions
|
73 |
+
- **Skip connections** for better gradient flow
|
74 |
+
|
75 |
+
## Citation
|
76 |
+
```bibtex
|
77 |
+
@misc{cifar10-diffusion-2025,
|
78 |
+
title={CIFAR-10 Diffusion Model},
|
79 |
+
author={Your Name},
|
80 |
+
year={2025},
|
81 |
+
url={https://github.com/your-username/cifar10-diffusion}
|
82 |
+
}
|
83 |
+
```
|
84 |
+
|
85 |
+
## License
|
86 |
+
MIT License - Feel free to use and modify!
|
87 |
+
|
88 |
+
---
|
89 |
+
**Created**: July 19, 2025
|
90 |
+
**Training Time**: 14.54 minutes
|
91 |
+
**GPU**: NVIDIA RTX 3060
|
92 |
+
**Framework**: PyTorch
|