Upload upload_to_huggingface.py with huggingface_hub
Browse files- upload_to_huggingface.py +36 -0
upload_to_huggingface.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
# Get the list of checkpoints from the file system
|
| 4 |
+
checkpoints = sorted([f for f in os.listdir() if f.startswith("step-000")])
|
| 5 |
+
|
| 6 |
+
# Read the last uploaded checkpoint
|
| 7 |
+
try:
|
| 8 |
+
with open("checkpoint_uploaded.txt", "r") as log_file:
|
| 9 |
+
uploaded_checkpoints = log_file.read().splitlines()
|
| 10 |
+
last_uploaded = uploaded_checkpoints[-1] if uploaded_checkpoints else ""
|
| 11 |
+
except FileNotFoundError:
|
| 12 |
+
last_uploaded = ""
|
| 13 |
+
print("Last file uploaded", last_uploaded)
|
| 14 |
+
|
| 15 |
+
# Consider only files after the last uploaded checkpoint
|
| 16 |
+
if last_uploaded:
|
| 17 |
+
remaining_checkpoints = [ckpt for ckpt in checkpoints if ckpt > last_uploaded]
|
| 18 |
+
else:
|
| 19 |
+
remaining_checkpoints = checkpoints
|
| 20 |
+
print("Remaining files", remaining_checkpoints)
|
| 21 |
+
|
| 22 |
+
# Select 10 evenly spaced checkpoints from remaining ones
|
| 23 |
+
if remaining_checkpoints:
|
| 24 |
+
selected_checkpoints = [remaining_checkpoints[i] for i in range(0, len(remaining_checkpoints), max(1, len(remaining_checkpoints)//10))][:10]
|
| 25 |
+
else:
|
| 26 |
+
selected_checkpoints = []
|
| 27 |
+
print("Files to upload", selected_checkpoints)
|
| 28 |
+
|
| 29 |
+
# Upload each checkpoint using Hugging Face CLI and log the uploaded checkpoints
|
| 30 |
+
repo = "keeeeenw/MicroLlama2-checkpoints"
|
| 31 |
+
with open("checkpoint_uploaded.txt", "a") as log_file:
|
| 32 |
+
for checkpoint in selected_checkpoints:
|
| 33 |
+
command = ["huggingface-cli", "upload", repo, checkpoint, checkpoint]
|
| 34 |
+
print(f"Uploading {checkpoint}...")
|
| 35 |
+
os.system(" ".join(command))
|
| 36 |
+
log_file.write(checkpoint + "\n")
|