feat: add util script to upload/download checkpoints

main
thinhlpg 1 month ago
parent 9009440663
commit e7915a6a8e

@ -0,0 +1,37 @@
"""Download model from HuggingFace Hub.
This script downloads a model repository from HuggingFace Hub to local directory.
"""
import os
from dotenv import load_dotenv
from huggingface_hub import snapshot_download
load_dotenv(override=True)
# Configuration
REPO_ID = "janhq/250403-runpod-qwen7b-r1-distil"
LOCAL_DIR = "downloaded_model" # Where to save the model
HF_TOKEN = os.getenv("HF_TOKEN")
# Files to ignore during download
IGNORE_PATTERNS = [
"*.log", # Log files
"*.pyc", # Python cache
".git*", # Git files
"*.bin", # Binary files
"*.pt", # PyTorch checkpoints
"*.ckpt", # Checkpoints
"events.*", # Tensorboard
"wandb/*", # Weights & Biases
"runs/*", # Training runs
]
# Download the model
snapshot_download(
token=HF_TOKEN,
repo_id=REPO_ID,
local_dir=LOCAL_DIR,
# ignore_patterns=IGNORE_PATTERNS,
)
print(f"✅ Done: {REPO_ID} -> {LOCAL_DIR}")

@ -0,0 +1,39 @@
"""Upload local directory to HuggingFace Hub.
This script uploads a specified local directory to HuggingFace Hub as a private repository.
It uses API token from HuggingFace for authentication.
"""
import os
from dotenv import load_dotenv
from huggingface_hub import HfApi
load_dotenv(override=True)
# Configuration
LOCAL_DIR = "trainer_output_deepseek-ai_DeepSeek-R1-Distill-Qwen-7B_gpu0_20250403_050520"
REPO_ID = "janhq/250403-runpod-qwen7b-r1-distil"
HF_TOKEN = os.getenv("HF_TOKEN")
# Files to ignore during upload
IGNORE_PATTERNS = [
"*.log", # Log files
"*.pyc", # Python cache
".git*", # Git files
"*.bin", # Binary files
"*.pt", # PyTorch checkpoints
"*.ckpt", # Checkpoints
"events.*", # Tensorboard
"wandb/*", # Weights & Biases
"runs/*", # Training runs
]
api = HfApi(token=HF_TOKEN)
api.create_repo(repo_id=REPO_ID, private=True, exist_ok=True, repo_type="model")
api.upload_folder(
folder_path=LOCAL_DIR,
repo_id=REPO_ID,
repo_type="model",
# ignore_patterns=IGNORE_PATTERNS,
)
print(f"✅ Done: {LOCAL_DIR} -> {REPO_ID}")
Loading…
Cancel
Save