feat: refactor download and upload scripts for improved argument handling (more notebook friendly :D)
parent
fa3c0562fe
commit
da60b52bd1
@ -1,37 +1,63 @@
|
||||
"""Download model from HuggingFace Hub.
|
||||
This script downloads a model repository from HuggingFace Hub to local directory.
|
||||
|
||||
Example:
|
||||
python download_checkpoint.py --repo-id "org/model-name" --local-dir "models"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Configuration
|
||||
REPO_ID = "janhq/250403-runpod-qwen7b-r1-distil"
|
||||
LOCAL_DIR = "downloaded_model" # Where to save the model
|
||||
HF_TOKEN = os.getenv("HF_TOKEN")
|
||||
|
||||
# Files to ignore during download
|
||||
IGNORE_PATTERNS = [
|
||||
"*.log", # Log files
|
||||
"*.pyc", # Python cache
|
||||
".git*", # Git files
|
||||
"*.bin", # Binary files
|
||||
"*.pt", # PyTorch checkpoints
|
||||
"*.ckpt", # Checkpoints
|
||||
"events.*", # Tensorboard
|
||||
"wandb/*", # Weights & Biases
|
||||
"runs/*", # Training runs
|
||||
]
|
||||
|
||||
# Download the model
|
||||
snapshot_download(
|
||||
token=HF_TOKEN,
|
||||
repo_id=REPO_ID,
|
||||
local_dir=LOCAL_DIR,
|
||||
# ignore_patterns=IGNORE_PATTERNS,
|
||||
)
|
||||
print(f"✅ Done: {REPO_ID} -> {LOCAL_DIR}")
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""Parse command line arguments.
|
||||
|
||||
Returns:
|
||||
argparse.Namespace: Parsed arguments
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="Download model from HuggingFace Hub")
|
||||
parser.add_argument(
|
||||
"--repo-id", type=str, default="janhq/250403-llama-3.2-3b-instruct-grpo", help="HuggingFace repository ID"
|
||||
)
|
||||
parser.add_argument("--local-dir", type=str, default="downloaded_model", help="Local directory to save model")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to download model."""
|
||||
args = parse_args()
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Configuration
|
||||
HF_TOKEN = os.getenv("HF_TOKEN")
|
||||
|
||||
# Files to ignore during download
|
||||
IGNORE_PATTERNS = [
|
||||
"*.log", # Log files
|
||||
"*.pyc", # Python cache
|
||||
".git*", # Git files
|
||||
"*.bin", # Binary files
|
||||
"*.pt", # PyTorch checkpoints
|
||||
"*.ckpt", # Checkpoints
|
||||
"events.*", # Tensorboard
|
||||
"wandb/*", # Weights & Biases
|
||||
"runs/*", # Training runs
|
||||
]
|
||||
|
||||
# Download the model
|
||||
snapshot_download(
|
||||
token=HF_TOKEN,
|
||||
repo_id=args.repo_id,
|
||||
local_dir=args.local_dir,
|
||||
repo_type="model",
|
||||
# ignore_patterns=IGNORE_PATTERNS
|
||||
)
|
||||
print(f"✅ Done: {args.repo_id} -> {args.local_dir}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -1,39 +1,61 @@
|
||||
"""Upload local directory to HuggingFace Hub.
|
||||
This script uploads a specified local directory to HuggingFace Hub as a private repository.
|
||||
It uses API token from HuggingFace for authentication.
|
||||
|
||||
Example:
|
||||
python upload_checkpoint.py --local-dir "models/my_model" --repo-id "org/model-name"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from huggingface_hub import HfApi
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Configuration
|
||||
LOCAL_DIR = "trainer_output_deepseek-ai_DeepSeek-R1-Distill-Qwen-7B_gpu0_20250403_050520"
|
||||
REPO_ID = "janhq/250403-runpod-qwen7b-r1-distil"
|
||||
HF_TOKEN = os.getenv("HF_TOKEN")
|
||||
|
||||
# Files to ignore during upload
|
||||
IGNORE_PATTERNS = [
|
||||
"*.log", # Log files
|
||||
"*.pyc", # Python cache
|
||||
".git*", # Git files
|
||||
"*.bin", # Binary files
|
||||
"*.pt", # PyTorch checkpoints
|
||||
"*.ckpt", # Checkpoints
|
||||
"events.*", # Tensorboard
|
||||
"wandb/*", # Weights & Biases
|
||||
"runs/*", # Training runs
|
||||
]
|
||||
|
||||
api = HfApi(token=HF_TOKEN)
|
||||
api.create_repo(repo_id=REPO_ID, private=True, exist_ok=True, repo_type="model")
|
||||
api.upload_folder(
|
||||
folder_path=LOCAL_DIR,
|
||||
repo_id=REPO_ID,
|
||||
repo_type="model",
|
||||
# ignore_patterns=IGNORE_PATTERNS,
|
||||
)
|
||||
print(f"✅ Done: {LOCAL_DIR} -> {REPO_ID}")
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""Parse command line arguments.
|
||||
|
||||
Returns:
|
||||
argparse.Namespace: Parsed arguments
|
||||
"""
|
||||
parser = argparse.ArgumentParser(description="Upload model to HuggingFace Hub")
|
||||
parser.add_argument("--local-dir", type=str, required=True, help="Local directory to upload")
|
||||
parser.add_argument("--repo-id", type=str, required=True, help="HuggingFace repository ID")
|
||||
parser.add_argument("--public", action="store_true", help="Make repository public (default: private)")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to upload model."""
|
||||
args = parse_args()
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Configuration
|
||||
HF_TOKEN = os.getenv("HF_TOKEN")
|
||||
|
||||
# Files to ignore during upload
|
||||
IGNORE_PATTERNS = [
|
||||
"*.log", # Log files
|
||||
"*.pyc", # Python cache
|
||||
".git*", # Git files
|
||||
"*.bin", # Binary files
|
||||
"*.pt", # PyTorch checkpoints
|
||||
"*.ckpt", # Checkpoints
|
||||
"events.*", # Tensorboard
|
||||
"wandb/*", # Weights & Biases
|
||||
"runs/*", # Training runs
|
||||
]
|
||||
|
||||
api = HfApi(token=HF_TOKEN)
|
||||
api.create_repo(repo_id=args.repo_id, private=not args.public, exist_ok=True, repo_type="model")
|
||||
api.upload_folder(
|
||||
folder_path=args.local_dir,
|
||||
repo_id=args.repo_id,
|
||||
repo_type="model",
|
||||
# ignore_patterns=IGNORE_PATTERNS
|
||||
)
|
||||
print(f"✅ Done: {args.local_dir} -> {args.repo_id}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
Reference in new issue