From bd02305efbfdda5da9c96e049c612d81e61194d8 Mon Sep 17 00:00:00 2001 From: thinhlpg Date: Wed, 9 Apr 2025 07:07:13 +0000 Subject: [PATCH] chores: add cook notebooks --- notebooks/250407_cook_vllm_sglang.ipynb | 140 ++++++++++++++++++ notebooks/250408_cook_gradio_agent_demo.ipynb | 107 +++++++++++++ notebooks/250408_cook_search_api.ipynb | 80 ++++++++++ 3 files changed, 327 insertions(+) create mode 100644 notebooks/250407_cook_vllm_sglang.ipynb create mode 100644 notebooks/250408_cook_gradio_agent_demo.ipynb create mode 100644 notebooks/250408_cook_search_api.ipynb diff --git a/notebooks/250407_cook_vllm_sglang.ipynb b/notebooks/250407_cook_vllm_sglang.ipynb new file mode 100644 index 0000000..47c196b --- /dev/null +++ b/notebooks/250407_cook_vllm_sglang.ipynb @@ -0,0 +1,140 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cook vllm - sglang - test eval script\n", + "## OpenAI API for reference\n", + "- https://platform.openai.com/docs/api-reference/completions/create\n", + "- https://platform.openai.com/docs/api-reference/completions/create" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cook vllm\n", + "- https://docs.vllm.ai/en/latest/getting_started/examples/gradio_webserver.html\n", + "- https://docs.vllm.ai/en/latest/getting_started/examples/gradio_openai_chatbot_webserver.html\n", + "- https://docs.vllm.ai/en/latest/getting_started/examples/basic.html#gguf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!vllm serve meta-llama/Llama-3.2-1B-Instruct --dtype auto" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cook sglang\n", + "- https://docs.sglang.ai/backend/server_arguments.html\n", + "- > lora_paths: You may provide a list of adapters to your model as a list. Each batch element will get model response with the corresponding lora adapter applied. Currently cuda_graph and radix_attention are not supported with this option so you need to disable them manually. We are still working on through these issues.\n", + "- > To enable multi-GPU data parallelism, add --dp 2. Data parallelism is better for throughput if there is enough memory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# %pip install \"sglang[all]>=0.4.5\" --find-links https://flashinfer.ai/whl/cu124/torch2.5/flashinfer-python -q\n", + "!python3 -m sglang.launch_server --model-path meta-llama/Llama-3.2-1B-Instruct --dp 2 --port 30000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "\n", + "url = f\"http://localhost:30000/v1/chat/completions\"\n", + "\n", + "data = {\n", + " \"model\": \"meta-llama/Llama-3.2-1B-Instruct\",\n", + " \"messages\": [{\"role\": \"user\", \"content\": \"Hello em an com chua?\"}],\n", + "}\n", + "\n", + "response = requests.post(url, json=data)\n", + "print(response.json())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cook eval script" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Command to launch SGLang server\n", + "!python3 -m sglang.launch_server \\\n", + " --model-path \"janhq/250404-llama-3.2-3b-instruct-grpo-03-s250\" \\\n", + " --context-length 8192 \\\n", + " --enable-metrics \\\n", + " --dtype bfloat16 \\\n", + " --host 0.0.0.0 \\\n", + " --port 8002 \\\n", + " --trust-remote-code \\\n", + " --disable-overlap \\\n", + " --disable-radix-cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# cd scripts/evaluation\n", + "!python run_eval.py \\\n", + " --config_path eval_config.yaml \\\n", + " --method_name research \\\n", + " --data_dir {root/path/to/evaluation/data} \\\n", + " --dataset_name bamboogle \\\n", + " --split test \\\n", + " --save_dir {your-save-dir} \\\n", + " --save_note research_qwen7b_ins\n", + " --sgl_remote_url {your-launched-sgl-url} \\\n", + " --remote_retriever_url {your-hosted-retriever-url} \\\n", + " --generator_model {your-local-model-path} \\\n", + " --apply_chat True" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "deepsearch-py311", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/250408_cook_gradio_agent_demo.ipynb b/notebooks/250408_cook_gradio_agent_demo.ipynb new file mode 100644 index 0000000..43f3fbe --- /dev/null +++ b/notebooks/250408_cook_gradio_agent_demo.ipynb @@ -0,0 +1,107 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting Familiar\n", + "\n", + "https://www.gradio.app/guides/agents-and-tool-usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import gradio as gr\n", + "\n", + "with gr.Blocks() as demo:\n", + " chatbot = gr.Chatbot(\n", + " type=\"messages\",\n", + " value=[\n", + " gr.ChatMessage(role=\"user\", content=\"What is the weather in San Francisco?\"),\n", + " gr.ChatMessage(\n", + " role=\"assistant\", content=\"I need to use the weather API tool?\", metadata={\"title\": \"🧠 Thinking\"}\n", + " ),\n", + " ],\n", + " )\n", + "\n", + "demo.launch()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import gradio as gr\n", + "from transformers import Tool, ReactCodeAgent # type: ignore\n", + "from transformers.agents import stream_to_gradio, HfApiEngine # type: ignore\n", + "\n", + "# Import tool from Hub\n", + "image_generation_tool = Tool.from_space(\n", + " space_id=\"black-forest-labs/FLUX.1-schnell\",\n", + " name=\"image_generator\",\n", + " description=\"Generates an image following your prompt. Returns a PIL Image.\",\n", + " api_name=\"/infer\",\n", + ")\n", + "\n", + "llm_engine = HfApiEngine(\"Qwen/Qwen2.5-Coder-32B-Instruct\")\n", + "# Initialize the agent with both tools and engine\n", + "agent = ReactCodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)\n", + "\n", + "def interact_with_agent(prompt, history):\n", + " messages = []\n", + " yield messages\n", + " for msg in stream_to_gradio(agent, prompt):\n", + " messages.append(asdict(msg))\n", + " yield messages\n", + " yield messages\n", + "\n", + "\n", + "demo = gr.ChatInterface(\n", + " interact_with_agent,\n", + " chatbot= gr.Chatbot(\n", + " label=\"Agent\",\n", + " type=\"messages\",\n", + " avatar_images=(\n", + " None,\n", + " \"https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png\",\n", + " ),\n", + " ),\n", + " examples=[\n", + " [\"Generate an image of an astronaut riding an alligator\"],\n", + " [\"I am writing a children's book for my daughter. Can you help me with some illustrations?\"],\n", + " ],\n", + " type=\"messages\",\n", + ")\n", + "\n", + "demo.launch()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "deepsearch-py311-2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/250408_cook_search_api.ipynb b/notebooks/250408_cook_search_api.ipynb new file mode 100644 index 0000000..c868ce7 --- /dev/null +++ b/notebooks/250408_cook_search_api.ipynb @@ -0,0 +1,80 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cook Search APIs\n", + "- This notebook is used to cook search APIs like Tavily and Serper." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import http.client\n", + "import json\n", + "import os\n", + "\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv(override=True)\n", + "\n", + "# https://docs.spaceserp.com/serp-api\n", + "conn = http.client.HTTPSConnection(\"google.serper.dev\")\n", + "payload = json.dumps({\"q\": \"apple inc\"})\n", + "headers = {\n", + " \"X-API-KEY\": os.getenv(\"SERPER_API_KEY\"),\n", + " \"Content-Type\": \"application/json\",\n", + "}\n", + "conn.request(\"POST\", \"/search\", payload, headers)\n", + "res = conn.getresponse()\n", + "data = res.read()\n", + "print(data.decode(\"utf-8\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from dotenv import load_dotenv\n", + "from tavily import TavilyClient\n", + "\n", + "load_dotenv(override=True)\n", + "\n", + "tavily_client = TavilyClient(api_key=os.getenv(\"TAVILY_API_KEY\"))\n", + "# https://docs.tavily.com/documentation/api-reference/endpoint/search\n", + "response = tavily_client.search(\n", + " query=\"Who is Leo Messi?\",\n", + " search_depth=\"basic\",\n", + " topic=\"general\",\n", + " max_results=2,\n", + " include_answer=True,\n", + " include_raw_content=False,\n", + " include_images=False,\n", + ")\n", + "\n", + "print(response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "deepsearch-py311-2", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}