From 3f011ecd3a56d42f928ab9fb81fc207dc5a852a2 Mon Sep 17 00:00:00 2001 From: Zack Date: Sun, 10 Dec 2023 10:14:18 -0800 Subject: [PATCH] feat: add huggingface token --- app.py | 41 ++++++++++++++++++++++------------------- serving.yaml | 20 -------------------- 2 files changed, 22 insertions(+), 39 deletions(-) delete mode 100644 serving.yaml diff --git a/app.py b/app.py index 62cd433b..1c86ba74 100644 --- a/app.py +++ b/app.py @@ -1,4 +1,5 @@ import boto3 +from transformers import AutoTokenizer from botocore.exceptions import NoCredentialsError import tokenize import requests @@ -321,15 +322,13 @@ def clear_history(): yield gr.update(visible=True, value=return_msg) -# Add this function to fetch the tokenizer from the Hugging Face Model Hub API -def fetch_tokenizer(model_name: str): - response = requests.get(f"https://huggingface.co/{model_name}/resolve/main/tokenizer_config.json") - if response.status_code == 200: - tokenizer_config = response.json() - return tokenizer_config.get("tokenizer_class") - else: - return "Tokenizer not found for the selected model" +def fetch_tokenizer(model_name): + try: + tokenizer = AutoTokenizer.from_pretrained(model_name) + return f"Tokenizer for {model_name} loaded successfully." + except Exception as e: + return f"Error loading tokenizer: {str(e)}" # Add this function to handle the button click def deploy_on_sky_pilot(model_name: str, tokenizer: str, accelerators: str): @@ -340,7 +339,7 @@ def deploy_on_sky_pilot(model_name: str, tokenizer: str, accelerators: str): }, "envs": { "MODEL_NAME": model_name, - "TOKENIZER": tokenizer + "TOKENIZER": AutoTokenizer.from_pretrained(model_name) }, "setup": "conda create -n vllm python=3.9 -y\nconda activate vllm\ngit clone https://github.com/vllm-project/vllm.git\ncd vllm\npip install .\npip install gradio", "run": "conda activate vllm\necho 'Starting vllm api server...'\npython -u -m vllm.entrypoints.api_server --model $MODEL_NAME --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE --tokenizer $TOKENIZER 2>&1 | tee api_server.log &\necho 'Waiting for vllm api server to start...'\nwhile ! `cat api_server.log | grep -q 'Uvicorn running on'`; do sleep 1; done\necho 'Starting gradio server...'\npython vllm/examples/gradio_webserver.py" @@ -385,6 +384,7 @@ with gr.Blocks() as demo: SCENEX_API_KEY = gr.Textbox(label="Scenex api key:", placeholder="Key to use sceneXplain", type="text") STEAMSHIP_API_KEY = gr.Textbox(label="Steamship api key:", placeholder="Key to use image generation", type="text") HUGGINGFACE_API_KEY = gr.Textbox(label="Huggingface api key:", placeholder="Key to use models in huggingface hub", type="text") + HUGGINGFACE_TOKEN = gr.Textbox(label="HuggingFace Token:", placeholder="Token for huggingface", type="text"), AMADEUS_ID = gr.Textbox(label="Amadeus id:", placeholder="Id to use Amadeus", type="text") AMADEUS_KEY = gr.Textbox(label="Amadeus key:", placeholder="Key to use Amadeus", type="text") AWS_ACCESS_KEY_ID = gr.Textbox(label="AWS Access Key ID:", placeholder="AWS Access Key ID", type="text") @@ -416,10 +416,19 @@ with gr.Blocks() as demo: buttonDownload = gr.Button("Download Model"); buttonDownload.click(fn=download_model, inputs=[model_url, memory_utilization]); model_chosen = gr.Dropdown( - list(available_models), value=DEFAULTMODEL, multiselect=False, label="Model provided", + list(available_models), + value=DEFAULTMODEL, + multiselect=False, + label="Model provided", info="Choose the model to solve your question, Default means ChatGPT." - - ) + ) + tokenizer_output = gr.outputs.Textbox(label="Tokenizer") + model_chosen.change(fetch_tokenizer, outputs=tokenizer_output) + available_accelerators = ["A100", "V100", "P100", "K80", "T4", "P4"] + accelerators = gr.Dropdown(available_accelerators, label="Accelerators:") + buttonDeploy = gr.Button("Deploy on SkyPilot") + + buttonDeploy.click(deploy_on_sky_pilot, [model_chosen, tokenizer_output, accelerators, HUGGINGFACE_TOKEN]) with gr.Row(): tools_search = gr.Textbox( lines=1, @@ -434,13 +443,6 @@ with gr.Blocks() as demo: info="Choose the tools to solve your question.", ) - tokenizer_output = gr.outputs.Textbox() - model_chosen.change(fetch_tokenizer, outputs=tokenizer_output) - available_accelerators = ["A100", "V100", "P100", "K80", "T4", "P4"] - accelerators = gr.Dropdown(available_accelerators, label="Accelerators:") - buttonDeploy = gr.Button("Deploy on SkyPilot") - - buttonDeploy.click(deploy_on_sky_pilot, [model_chosen, tokenizer_output, accelerators]) # TODO finish integrating model flow # with gr.Tab("model"): @@ -468,6 +470,7 @@ with gr.Blocks() as demo: SCENEX_API_KEY, STEAMSHIP_API_KEY, HUGGINGFACE_API_KEY, + HUGGINGFACE_TOKEN, AMADEUS_ID, AMADEUS_KEY, ], outputs=key_set_btn) diff --git a/serving.yaml b/serving.yaml deleted file mode 100644 index 31027155..00000000 --- a/serving.yaml +++ /dev/null @@ -1,20 +0,0 @@ -envs: - MODEL_NAME: decapoda-research/llama-13b-hf - TOKENIZER: Tokenizer not found for the selected model -resources: - accelerators: A100 -run: |- - conda activate vllm - echo 'Starting vllm api server...' - python -u -m vllm.entrypoints.api_server --model $MODEL_NAME --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE --tokenizer $TOKENIZER 2>&1 | tee api_server.log & - echo 'Waiting for vllm api server to start...' - while ! `cat api_server.log | grep -q 'Uvicorn running on'`; do sleep 1; done - echo 'Starting gradio server...' - python vllm/examples/gradio_webserver.py -setup: |- - conda create -n vllm python=3.9 -y - conda activate vllm - git clone https://github.com/vllm-project/vllm.git - cd vllm - pip install . - pip install gradio