feat: add cluster name stop start and reset functions to skypilot

pull/298/head
Zack 1 year ago
parent 3f011ecd3a
commit 7442144d7a

@ -1,3 +1,4 @@
import sky
import boto3 import boto3
from transformers import AutoTokenizer from transformers import AutoTokenizer
from botocore.exceptions import NoCredentialsError from botocore.exceptions import NoCredentialsError
@ -178,7 +179,15 @@ def load_tools():
print(f"all_tools_list: {all_tools_list}") # Debugging line print(f"all_tools_list: {all_tools_list}") # Debugging line
return gr.update(choices=all_tools_list) return gr.update(choices=all_tools_list)
def set_environ(OPENAI_API_KEY: str = "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", def start_sky_pilot(cluster_name: str):
sky.start(cluster_name)
def stop_sky_pilot(cluster_name: str):
sky.stop(cluster_name)
def status_sky_pilot(cluster_name: str):
return sky.status(cluster_name)
def set_environ(OPENAI_API_KEY: str = "",
WOLFRAMALPH_APP_ID: str = "", WOLFRAMALPH_APP_ID: str = "",
WEATHER_API_KEYS: str = "", WEATHER_API_KEYS: str = "",
BING_SUBSCRIPT_KEY: str = "", BING_SUBSCRIPT_KEY: str = "",
@ -331,26 +340,25 @@ def fetch_tokenizer(model_name):
return f"Error loading tokenizer: {str(e)}" return f"Error loading tokenizer: {str(e)}"
# Add this function to handle the button click # Add this function to handle the button click
import sky
def deploy_on_sky_pilot(model_name: str, tokenizer: str, accelerators: str): def deploy_on_sky_pilot(model_name: str, tokenizer: str, accelerators: str):
# Create serving.yaml for SkyPilot deployment # Create a SkyPilot Task
serving_yaml = { #TODO have ai generate a yaml file for the configuration the user desires add this as a tool
"resources": { task = sky.Task(
"accelerators": accelerators setup="conda create -n vllm python=3.9 -y\nconda activate vllm\ngit clone https://github.com/vllm-project/vllm.git\ncd vllm\npip install .\npip install gradio",
}, run="conda activate vllm\necho 'Starting vllm api server...'\npython -u -m vllm.entrypoints.api_server --model $MODEL_NAME --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE --tokenizer $TOKENIZER 2>&1 | tee api_server.log &\necho 'Waiting for vllm api server to start...'\nwhile ! `cat api_server.log | grep -q 'Uvicorn running on'`; do sleep 1; done\necho 'Starting gradio server...'\npython vllm/examples/gradio_webserver.py",
"envs": { envs={
"MODEL_NAME": model_name, "MODEL_NAME": model_name,
"TOKENIZER": AutoTokenizer.from_pretrained(model_name) "TOKENIZER": AutoTokenizer.from_pretrained(model_name)
}, },
"setup": "conda create -n vllm python=3.9 -y\nconda activate vllm\ngit clone https://github.com/vllm-project/vllm.git\ncd vllm\npip install .\npip install gradio", resources={
"run": "conda activate vllm\necho 'Starting vllm api server...'\npython -u -m vllm.entrypoints.api_server --model $MODEL_NAME --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE --tokenizer $TOKENIZER 2>&1 | tee api_server.log &\necho 'Waiting for vllm api server to start...'\nwhile ! `cat api_server.log | grep -q 'Uvicorn running on'`; do sleep 1; done\necho 'Starting gradio server...'\npython vllm/examples/gradio_webserver.py" "accelerators": accelerators
} }
)
# Write serving.yaml to file # Launch the task on SkyPilot
with open('serving.yaml', 'w') as f: sky.launch(task,cluster_name=cluster_name)
yaml.dump(serving_yaml, f)
# Deploy on SkyPilot
os.system("sky launch serving.yaml")
# Add this line where you define your Gradio interface # Add this line where you define your Gradio interface
@ -384,9 +392,9 @@ with gr.Blocks() as demo:
SCENEX_API_KEY = gr.Textbox(label="Scenex api key:", placeholder="Key to use sceneXplain", type="text") SCENEX_API_KEY = gr.Textbox(label="Scenex api key:", placeholder="Key to use sceneXplain", type="text")
STEAMSHIP_API_KEY = gr.Textbox(label="Steamship api key:", placeholder="Key to use image generation", type="text") STEAMSHIP_API_KEY = gr.Textbox(label="Steamship api key:", placeholder="Key to use image generation", type="text")
HUGGINGFACE_API_KEY = gr.Textbox(label="Huggingface api key:", placeholder="Key to use models in huggingface hub", type="text") HUGGINGFACE_API_KEY = gr.Textbox(label="Huggingface api key:", placeholder="Key to use models in huggingface hub", type="text")
HUGGINGFACE_TOKEN = gr.Textbox(label="HuggingFace Token:", placeholder="Token for huggingface", type="text"),
AMADEUS_ID = gr.Textbox(label="Amadeus id:", placeholder="Id to use Amadeus", type="text")
AMADEUS_KEY = gr.Textbox(label="Amadeus key:", placeholder="Key to use Amadeus", type="text") AMADEUS_KEY = gr.Textbox(label="Amadeus key:", placeholder="Key to use Amadeus", type="text")
AMADEUS_ID = gr.Textbox(label="Amadeus ID:", placeholder="Amadeus ID",
type="text")
AWS_ACCESS_KEY_ID = gr.Textbox(label="AWS Access Key ID:", placeholder="AWS Access Key ID", type="text") AWS_ACCESS_KEY_ID = gr.Textbox(label="AWS Access Key ID:", placeholder="AWS Access Key ID", type="text")
AWS_SECRET_ACCESS_KEY = gr.Textbox(label="AWS Secret Access Key:", placeholder="AWS Secret Access Key", type="text") AWS_SECRET_ACCESS_KEY = gr.Textbox(label="AWS Secret Access Key:", placeholder="AWS Secret Access Key", type="text")
AWS_DEFAULT_REGION = gr.Textbox(label="AWS Default Region:", placeholder="AWS Default Region", type="text") AWS_DEFAULT_REGION = gr.Textbox(label="AWS Default Region:", placeholder="AWS Default Region", type="text")
@ -423,12 +431,21 @@ with gr.Blocks() as demo:
info="Choose the model to solve your question, Default means ChatGPT." info="Choose the model to solve your question, Default means ChatGPT."
) )
tokenizer_output = gr.outputs.Textbox(label="Tokenizer") tokenizer_output = gr.outputs.Textbox(label="Tokenizer")
cluster_name = gr.outputs.Textbox(label="Cluster")
model_chosen.change(fetch_tokenizer, outputs=tokenizer_output) model_chosen.change(fetch_tokenizer, outputs=tokenizer_output)
available_accelerators = ["A100", "V100", "P100", "K80", "T4", "P4"] available_accelerators = ["A100", "V100", "P100", "K80", "T4", "P4"]
accelerators = gr.Dropdown(available_accelerators, label="Accelerators:") accelerators = gr.Dropdown(available_accelerators, label="Accelerators:")
buttonDeploy = gr.Button("Deploy on SkyPilot") buttonDeploy = gr.Button("Deploy on SkyPilot")
buttonDeploy.click(deploy_on_sky_pilot, [model_chosen, tokenizer_output, accelerators, HUGGINGFACE_TOKEN]) buttonDeploy.click(deploy_on_sky_pilot, [model_chosen, tokenizer_output, accelerators, HUGGINGFACE_API_KEY])
buttonStart = gr.Button("Start SkyPilot")
buttonStart.click(start_sky_pilot, [cluster_name])
buttonStop = gr.Button("Stop SkyPilot")
buttonStop.click(stop_sky_pilot, [cluster_name])
buttonStatus = gr.Button("Check SkyPilot Status")
buttonStatus.click(status_sky_pilot, [cluster_name])
with gr.Row(): with gr.Row():
tools_search = gr.Textbox( tools_search = gr.Textbox(
lines=1, lines=1,
@ -470,7 +487,6 @@ with gr.Blocks() as demo:
SCENEX_API_KEY, SCENEX_API_KEY,
STEAMSHIP_API_KEY, STEAMSHIP_API_KEY,
HUGGINGFACE_API_KEY, HUGGINGFACE_API_KEY,
HUGGINGFACE_TOKEN,
AMADEUS_ID, AMADEUS_ID,
AMADEUS_KEY, AMADEUS_KEY,
], outputs=key_set_btn) ], outputs=key_set_btn)

Loading…
Cancel
Save