gemma-3-12b-it-sft / archive_checkpoints.sh
pvbhanuteja's picture
Training in progress, step 300
a4c0495 verified
#!/bin/bash
# Path to the parent directory containing checkpoint folders
SOURCE_DIR="/disk2/bhanu/research/open-r1-main/data/gemma-3-12b-it-sft"
# Azure destination
AZURE_DEST="azai:private-model-checkpoints/gemma-3-12b-it-sft-graphllm"
# Log file
LOG_FILE="rclone_archive.log"
# Get current date and time for logging
CURRENT_TIME=$(date "+%Y-%m-%d %H:%M:%S")
echo "[$CURRENT_TIME] Starting checkpoint archiving" >> $LOG_FILE
# Use rclone copy with optimizations to avoid unnecessary transfers
rclone copy "$SOURCE_DIR" "$AZURE_DEST" \
--progress \
--include "checkpoint-*/**" \
--log-file=$LOG_FILE \
--stats 10s \
--checksum \
--skip-links \
--transfers=4 \
--checkers=8
# Log completion
CURRENT_TIME=$(date "+%Y-%m-%d %H:%M:%S")
echo "[$CURRENT_TIME] Checkpoint archiving completed" >> $LOG_FILE
# Optional: List all checkpoints currently in Azure for verification
echo "[$CURRENT_TIME] Current checkpoints in Azure:" >> $LOG_FILE
rclone lsf "$AZURE_DEST" --include "checkpoint-*/" >> $LOG_FILE