From 7ee65269fb2e6d3e0ce234d9111d44eeb662c391 Mon Sep 17 00:00:00 2001 From: thinhlpg Date: Tue, 15 Apr 2025 05:02:29 +0000 Subject: [PATCH] feat: add new evaluation notebook for model testing and checkpoint evaluation --- notebooks/eval-resero.ipynb | 299 ++++++++++++++++++++++++++++++++++++ 1 file changed, 299 insertions(+) create mode 100644 notebooks/eval-resero.ipynb diff --git a/notebooks/eval-resero.ipynb b/notebooks/eval-resero.ipynb new file mode 100644 index 0000000..a388267 --- /dev/null +++ b/notebooks/eval-resero.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oqetzlVnGTh5" + }, + "source": [ + "\n", + "# Eval" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Yh8KQT1_vsnr", + "outputId": "834eec12-4822-4593-bb2e-4867b84bdd28" + }, + "outputs": [], + "source": [ + "!python scripts/download_checkpoint.py --repo-id \"janhq/250404-llama-3.2-3b-instruct-grpo-03\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "bGqFk1NscN_S", + "outputId": "bcace81c-cc84-4d8c-fdc6-8fab23621d30" + }, + "outputs": [], + "source": [ + "# Update packages\n", + "# %pip install --force-reinstall unsloth transformers trl unsloth_zoo vllm==0.8.2 -q" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IgKlwY2yGhDe", + "outputId": "ba653f89-1aeb-492e-9c1e-d735783b4875" + }, + "outputs": [], + "source": [ + "# Base model\n", + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_base.py --model_name \"\" # \"meta-llama/Llama-3.2-3B-Instruct\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-50\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xUSegJWzGkQS", + "outputId": "098e6dcd-97b5-44f1-9ae4-ff9691189b0f" + }, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-100\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-150\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "x0kY1u3yeRgr", + "outputId": "a1c20afe-14f1-4923-9bd2-974f66352f11" + }, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-200\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-250\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bpFIRhYPeTC8", + "outputId": "3cdaad40-fded-4f6b-e584-6917e8a2b8ed" + }, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-300\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-350\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BTFZ3S45eUDW", + "outputId": "75ea7813-f1a3-4861-e51c-063a31301cdc" + }, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-400\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-450\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "61VvimFYeVCl", + "outputId": "3057f675-6829-4613-d489-1b18ac64de0c" + }, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-500\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-550\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-600\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-650\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-700\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-750\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-800\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-850\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-900\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-950\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!CUDA_VISIBLE_DEVICES=0 python scripts/eval_lora.py --model_name \"meta-llama/Llama-3.2-3B-Instruct\" --lora_path \"./downloaded_model/checkpoint-1000\"" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "L4", + "machine_shape": "hm", + "provenance": [] + }, + "kernelspec": { + "display_name": "deepsearch-py311-2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}