parent
1ad7061d75
commit
9e32b81b89
@ -1,192 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
import os
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
import tiktoken
|
|
||||||
from fastapi import FastAPI, HTTPException
|
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from swarms import Agent, Anthropic, GPT4o, GPT4VisionAPI, OpenAIChat
|
|
||||||
from swarms.utils.loguru_logger import logger
|
|
||||||
|
|
||||||
from swarms_cloud.schema.cog_vlm_schemas import (
|
|
||||||
ChatCompletionResponse,
|
|
||||||
UsageInfo,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# Define the input model using Pydantic
|
|
||||||
class AgentInput(BaseModel):
|
|
||||||
agent_name: str = "Swarm Agent"
|
|
||||||
system_prompt: str = None
|
|
||||||
agent_description: str = None
|
|
||||||
model_name: str = "OpenAIChat"
|
|
||||||
max_loops: int = 1
|
|
||||||
autosave: bool = False
|
|
||||||
dynamic_temperature_enabled: bool = False
|
|
||||||
dashboard: bool = False
|
|
||||||
verbose: bool = False
|
|
||||||
streaming_on: bool = True
|
|
||||||
saved_state_path: str = None
|
|
||||||
sop: str = None
|
|
||||||
sop_list: List[str] = None
|
|
||||||
user_name: str = "User"
|
|
||||||
retry_attempts: int = 3
|
|
||||||
context_length: int = 8192
|
|
||||||
task: str = None
|
|
||||||
|
|
||||||
|
|
||||||
# Define the input model using Pydantic
|
|
||||||
class AgentOutput(BaseModel):
|
|
||||||
agent: AgentInput
|
|
||||||
completions: ChatCompletionResponse
|
|
||||||
|
|
||||||
|
|
||||||
async def count_tokens(
|
|
||||||
text: str,
|
|
||||||
):
|
|
||||||
try:
|
|
||||||
# Get the encoding for the specific model
|
|
||||||
encoding = tiktoken.get_encoding("gpt-4o")
|
|
||||||
|
|
||||||
# Encode the text
|
|
||||||
tokens = encoding.encode(text)
|
|
||||||
|
|
||||||
# Count the tokens
|
|
||||||
token_count = len(tokens)
|
|
||||||
|
|
||||||
return token_count
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=400, detail=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
async def model_router(model_name: str):
|
|
||||||
"""
|
|
||||||
Function to switch to the specified model.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
- model_name (str): The name of the model to switch to.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
- None
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
- None
|
|
||||||
|
|
||||||
"""
|
|
||||||
# Logic to switch to the specified model
|
|
||||||
if model_name == "OpenAIChat":
|
|
||||||
# Switch to OpenAIChat model
|
|
||||||
llm = OpenAIChat()
|
|
||||||
elif model_name == "GPT4o":
|
|
||||||
# Switch to GPT4o model
|
|
||||||
llm = GPT4o(openai_api_key=os.getenv("OPENAI_API_KEY"))
|
|
||||||
elif model_name == "GPT4VisionAPI":
|
|
||||||
# Switch to GPT4VisionAPI model
|
|
||||||
llm = GPT4VisionAPI()
|
|
||||||
elif model_name == "Anthropic":
|
|
||||||
# Switch to Anthropic model
|
|
||||||
llm = Anthropic(anthropic_api_key=os.getenv("ANTHROPIC_API_KEY"))
|
|
||||||
else:
|
|
||||||
# Invalid model name
|
|
||||||
pass
|
|
||||||
|
|
||||||
return llm
|
|
||||||
|
|
||||||
|
|
||||||
# Create a FastAPI app
|
|
||||||
app = FastAPI(debug=True)
|
|
||||||
|
|
||||||
# Load the middleware to handle CORS
|
|
||||||
app.add_middleware(
|
|
||||||
CORSMiddleware,
|
|
||||||
allow_origins=["*"],
|
|
||||||
allow_credentials=True,
|
|
||||||
allow_methods=["*"],
|
|
||||||
allow_headers=["*"],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# @app.get("/v1/models", response_model=ModelList)
|
|
||||||
# async def list_models():
|
|
||||||
# """
|
|
||||||
# An endpoint to list available models. It returns a list of model cards.
|
|
||||||
# This is useful for clients to query and understand what models are available for use.
|
|
||||||
# """
|
|
||||||
# model_card = ModelCard(
|
|
||||||
# id="cogvlm-chat-17b"
|
|
||||||
# ) # can be replaced by your model id like cogagent-chat-18b
|
|
||||||
# return ModelList(data=[model_card])
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("v1/agent/completions", response_model=AgentOutput)
|
|
||||||
async def agent_completions(agent_input: AgentInput):
|
|
||||||
try:
|
|
||||||
logger.info(f"Received request: {agent_input}")
|
|
||||||
llm = model_router(agent_input.model_name)
|
|
||||||
|
|
||||||
agent = Agent(
|
|
||||||
agent_name=agent_input.agent_name,
|
|
||||||
system_prompt=agent_input.system_prompt,
|
|
||||||
agent_description=agent_input.agent_description,
|
|
||||||
llm=llm,
|
|
||||||
max_loops=agent_input.max_loops,
|
|
||||||
autosave=agent_input.autosave,
|
|
||||||
dynamic_temperature_enabled=agent_input.dynamic_temperature_enabled,
|
|
||||||
dashboard=agent_input.dashboard,
|
|
||||||
verbose=agent_input.verbose,
|
|
||||||
streaming_on=agent_input.streaming_on,
|
|
||||||
saved_state_path=agent_input.saved_state_path,
|
|
||||||
sop=agent_input.sop,
|
|
||||||
sop_list=agent_input.sop_list,
|
|
||||||
user_name=agent_input.user_name,
|
|
||||||
retry_attempts=agent_input.retry_attempts,
|
|
||||||
context_length=agent_input.context_length,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Run the agent
|
|
||||||
logger.info(f"Running agent with task: {agent_input.task}")
|
|
||||||
completions = await agent.run(agent_input.task)
|
|
||||||
|
|
||||||
logger.info(f"Completions: {completions}")
|
|
||||||
all_input_tokens, output_tokens = await asyncio.gather(
|
|
||||||
count_tokens(agent.short_memory.return_history_as_string()),
|
|
||||||
count_tokens(completions),
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"Token counts: {all_input_tokens}, {output_tokens}")
|
|
||||||
|
|
||||||
out = AgentOutput(
|
|
||||||
agent=agent_input,
|
|
||||||
completions=ChatCompletionResponse(
|
|
||||||
choices=[
|
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"message": {
|
|
||||||
"role": agent_input.agent_name,
|
|
||||||
"content": completions,
|
|
||||||
"name": None,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
],
|
|
||||||
stream_choices=None,
|
|
||||||
usage_info=UsageInfo(
|
|
||||||
prompt_tokens=all_input_tokens,
|
|
||||||
completion_tokens=output_tokens,
|
|
||||||
total_tokens=all_input_tokens + output_tokens,
|
|
||||||
),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
return out.json()
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise HTTPException(status_code=400, detail=str(e))
|
|
||||||
|
|
||||||
|
|
||||||
# if __name__ == "__main__":
|
|
||||||
# import uvicorn
|
|
||||||
|
|
||||||
# uvicorn.run(
|
|
||||||
# app, host="0.0.0.0", port=8000, use_colors=True, log_level="info"
|
|
||||||
# )
|
|
@ -1,29 +1,339 @@
|
|||||||
- This is the class for the Task
|
# Task Class Documentation
|
||||||
- For the constructor, it takes in the description, agent, args, kwargs, result, history, schedule_time, scheduler, trigger, action, condition, priority, and dependencies
|
|
||||||
- The `execute` method runs the task by calling the agent or model with the arguments and keyword arguments
|
The `Task` class is a pivotal component designed for managing tasks in a sequential workflow. This class allows for the execution of tasks using various agents, which can be callable objects or specific instances of the `Agent` class. It supports the scheduling of tasks, handling their dependencies, and setting conditions and actions that govern their execution.
|
||||||
- It sets a trigger, action, and condition for the task
|
|
||||||
- Task completion is checked with `is_completed` method
|
Key features of the `Task` class include:
|
||||||
- `add_dependency` adds a task to the list of dependencies
|
- Executing tasks with specified agents and handling their results.
|
||||||
- `set_priority` sets the priority of the task
|
- Scheduling tasks to run at specified times.
|
||||||
|
- Setting triggers, actions, and conditions for tasks.
|
||||||
```python
|
- Managing task dependencies and priorities.
|
||||||
# Example 1: Creating and executing a Task
|
- Providing a history of task executions for tracking purposes.
|
||||||
from swarms.models import OpenAIChat
|
|
||||||
from swarms.structs import Agent, Task
|
## Class Definition
|
||||||
|
|
||||||
agent = Agent(llm=OpenAIChat(openai_api_key=""), max_loops=1, dashboard=False)
|
The `Task` class is defined as follows:
|
||||||
task = Task(agent=agent)
|
|
||||||
task.execute("What's the weather in miami")
|
|
||||||
print(task.result)
|
### Attributes
|
||||||
|
|
||||||
# Example 2: Adding a dependency and setting priority
|
| Attribute | Type | Description |
|
||||||
task2 = Task(description="Task 2", agent=agent)
|
|----------------|-----------------------------|---------------------------------------------------------------------------------------|
|
||||||
task.add_dependency(task2)
|
| `agent` | `Union[Callable, Agent]` | The agent or callable object to run the task. |
|
||||||
task.set_priority(1)
|
| `description` | `str` | Description of the task. |
|
||||||
|
| `result` | `Any` | Result of the task. |
|
||||||
# Example 3: Executing a scheduled task
|
| `history` | `List[Any]` | History of the task. |
|
||||||
task3 = Task(description="Scheduled Task", agent=agent)
|
| `schedule_time`| `datetime` | Time to schedule the task. |
|
||||||
task3.schedule_time = datetime.datetime.now() + datetime.timedelta(minutes=30)
|
| `scheduler` | `sched.scheduler` | Scheduler to schedule the task. |
|
||||||
task3.handle_scheduled_task()
|
| `trigger` | `Callable` | Trigger to run the task. |
|
||||||
print(task3.is_completed())
|
| `action` | `Callable` | Action to run the task. |
|
||||||
|
| `condition` | `Callable` | Condition to run the task. |
|
||||||
|
| `priority` | `int` | Priority of the task. |
|
||||||
|
| `dependencies` | `List[Task]` | List of tasks that need to be completed before this task can be executed. |
|
||||||
|
| `args` | `List[Any]` | Arguments to pass to the agent or callable object. |
|
||||||
|
| `kwargs` | `Dict[str, Any]` | Keyword arguments to pass to the agent or callable object. |
|
||||||
|
|
||||||
|
## Methods
|
||||||
|
|
||||||
|
### `execute(self, *args, **kwargs)`
|
||||||
|
|
||||||
|
Executes the task by calling the agent or model with the specified arguments and keyword arguments. If a condition is set, the task will only execute if the condition returns `True`.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
- `args`: Arguments to pass to the agent or callable object.
|
||||||
|
- `kwargs`: Keyword arguments to pass to the agent or callable object.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> from swarms.structs import Task, Agent
|
||||||
|
>>> from swarms.models import OpenAIChat
|
||||||
|
>>> agent = Agent(llm=OpenAIChat(openai_api_key=""), max_loops=1, dashboard=False)
|
||||||
|
>>> task = Task(description="What's the weather in Miami?", agent=agent)
|
||||||
|
>>> task.execute()
|
||||||
|
>>> task.result
|
||||||
|
```
|
||||||
|
|
||||||
|
### `handle_scheduled_task(self)`
|
||||||
|
|
||||||
|
Handles the execution of a scheduled task. If the schedule time is not set or has already passed, the task is executed immediately. Otherwise, the task is scheduled to be executed at the specified schedule time.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> task.schedule_time = datetime.now() + timedelta(seconds=10)
|
||||||
|
>>> task.handle_scheduled_task()
|
||||||
|
```
|
||||||
|
|
||||||
|
### `set_trigger(self, trigger: Callable)`
|
||||||
|
|
||||||
|
Sets the trigger for the task.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
- `trigger` (`Callable`): The trigger to set.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> def my_trigger():
|
||||||
|
>>> print("Trigger executed")
|
||||||
|
>>> task.set_trigger(my_trigger)
|
||||||
|
```
|
||||||
|
|
||||||
|
### `set_action(self, action: Callable)`
|
||||||
|
|
||||||
|
Sets the action for the task.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
- `action` (`Callable`): The action to set.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> def my_action():
|
||||||
|
>>> print("Action executed")
|
||||||
|
>>> task.set_action(my_action)
|
||||||
|
```
|
||||||
|
|
||||||
|
### `set_condition(self, condition: Callable)`
|
||||||
|
|
||||||
|
Sets the condition for the task.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
- `condition` (`Callable`): The condition to set.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> def my_condition():
|
||||||
|
>>> print("Condition checked")
|
||||||
|
>>> return True
|
||||||
|
>>> task.set_condition(my_condition)
|
||||||
|
```
|
||||||
|
|
||||||
|
### `is_completed(self)`
|
||||||
|
|
||||||
|
Checks whether the task has been completed.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
- `bool`: `True` if the task has been completed, `False` otherwise.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> task.is_completed()
|
||||||
|
```
|
||||||
|
|
||||||
|
### `add_dependency(self, task)`
|
||||||
|
|
||||||
|
Adds a task to the list of dependencies.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
- `task` (`Task`): The task to add as a dependency.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> dependent_task = Task(description="Dependent Task")
|
||||||
|
>>> task.add_dependency(dependent_task)
|
||||||
|
```
|
||||||
|
|
||||||
|
### `set_priority(self, priority: int)`
|
||||||
|
|
||||||
|
Sets the priority of the task.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
- `priority` (`int`): The priority to set.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> task.set_priority(5)
|
||||||
|
```
|
||||||
|
|
||||||
|
### `check_dependency_completion(self)`
|
||||||
|
|
||||||
|
Checks whether all the dependencies have been completed.
|
||||||
|
|
||||||
|
#### Returns
|
||||||
|
- `bool`: `True` if all the dependencies have been completed, `False` otherwise.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> task.check_dependency_completion()
|
||||||
|
```
|
||||||
|
|
||||||
|
### `context(self, task: "Task" = None, context: List["Task"] = None, *args, **kwargs)`
|
||||||
|
|
||||||
|
Sets the context for the task. For a sequential workflow, it sequentially adds the context of the previous task in the list.
|
||||||
|
|
||||||
|
#### Parameters
|
||||||
|
- `task` (`Task`, optional): The task whose context is to be set.
|
||||||
|
- `context` (`List[Task]`, optional): The list of tasks to set the context.
|
||||||
|
|
||||||
|
#### Examples
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> task1 = Task(description="Task 1")
|
||||||
|
>>> task2 = Task(description="Task 2")
|
||||||
|
>>> task2.context(context=[task1])
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Basic Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from swarms import Agent, OpenAIChat, Task
|
||||||
|
|
||||||
|
# Load the environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Define a function to be used as the action
|
||||||
|
def my_action():
|
||||||
|
print("Action executed")
|
||||||
|
|
||||||
|
# Define a function to be used as the condition
|
||||||
|
def my_condition():
|
||||||
|
print("Condition checked")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Create an agent
|
||||||
|
agent = Agent(
|
||||||
|
llm=OpenAIChat(openai_api_key=os.environ["OPENAI_API_KEY"]),
|
||||||
|
max_loops=1,
|
||||||
|
dashboard=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a task
|
||||||
|
task = Task(
|
||||||
|
description="Generate a report on the top 3 biggest expenses for small businesses and how businesses can save 20%",
|
||||||
|
agent=agent,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set the action and condition
|
||||||
|
task.set_action(my_action)
|
||||||
|
task.set_condition(my_condition)
|
||||||
|
|
||||||
|
# Execute the task
|
||||||
|
print("Executing task...")
|
||||||
|
task.run()
|
||||||
|
|
||||||
|
# Check if the task is completed
|
||||||
|
if task.is_completed():
|
||||||
|
print("Task completed")
|
||||||
|
else:
|
||||||
|
print("Task not completed")
|
||||||
|
|
||||||
|
# Output the result of the task
|
||||||
|
print(f"Task result: {task.result}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scheduled Task Execution
|
||||||
|
|
||||||
|
```python
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from swarms import Agent, OpenAIChat, Task
|
||||||
|
|
||||||
|
# Load the environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Create an agent
|
||||||
|
agent = Agent(
|
||||||
|
llm=OpenAIChat(openai_api_key=os.environ["OPENAI_API_KEY"]),
|
||||||
|
max_loops=1,
|
||||||
|
dashboard=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create a task
|
||||||
|
task = Task(
|
||||||
|
description="Scheduled task example",
|
||||||
|
agent=agent,
|
||||||
|
schedule_time=datetime.now() + timedelta(seconds=10)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Handle scheduled task
|
||||||
|
task.handle_scheduled_task()
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task with Dependencies
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from swarms import Agent, OpenAIChat, Task
|
||||||
|
|
||||||
|
# Load the environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Create agents
|
||||||
|
agent1 = Agent(
|
||||||
|
llm=OpenAIChat(openai_api_key=os.environ["OPENAI_API_KEY"]),
|
||||||
|
max_loops=1,
|
||||||
|
dashboard=False,
|
||||||
|
)
|
||||||
|
agent2 = Agent(
|
||||||
|
llm=OpenAIChat(openai_api_key=os.environ["OPENAI_API_KEY"]),
|
||||||
|
max_loops=1,
|
||||||
|
dashboard=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create tasks
|
||||||
|
task1 = Task(description="First task", agent=agent1)
|
||||||
|
task2 = Task(description="Second task", agent=agent2)
|
||||||
|
|
||||||
|
# Add dependency
|
||||||
|
task2.add_dependency(task1)
|
||||||
|
|
||||||
|
# Execute tasks
|
||||||
|
print("Executing first task...")
|
||||||
|
task1.run()
|
||||||
|
|
||||||
|
print("Executing second task...")
|
||||||
|
task2.run()
|
||||||
|
|
||||||
|
# Check if tasks are completed
|
||||||
|
print(f"Task 1 completed: {task1.is_completed()}")
|
||||||
|
print(f"Task 2 completed: {task2.is_completed()}")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Task Context
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from swarms import Agent, OpenAIChat, Task
|
||||||
|
|
||||||
|
# Load the environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Create an agent
|
||||||
|
agent = Agent(
|
||||||
|
llm=OpenAIChat(openai_api_key=os.environ["OPENAI_API_KEY"]),
|
||||||
|
max_loops
|
||||||
|
|
||||||
|
=1,
|
||||||
|
dashboard=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create tasks
|
||||||
|
task1 = Task(description="First task", agent=agent)
|
||||||
|
task2 = Task(description="Second task", agent=agent)
|
||||||
|
|
||||||
|
# Set context for the second task
|
||||||
|
task2.context(context=[task1])
|
||||||
|
|
||||||
|
# Execute tasks
|
||||||
|
print("Executing first task...")
|
||||||
|
task1.run()
|
||||||
|
|
||||||
|
print("Executing second task...")
|
||||||
|
task2.run()
|
||||||
|
|
||||||
|
# Output the context of the second task
|
||||||
|
print(f"Task 2 context: {task2.history}")
|
||||||
```
|
```
|
||||||
|
@ -1,84 +0,0 @@
|
|||||||
envs:
|
|
||||||
# MODEL_NAME: meta-llama/Meta-Llama-3-70B-Instruct
|
|
||||||
MODEL_NAME: meta-llama/Meta-Llama-3-8B
|
|
||||||
HF_TOKEN: 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 # Change to your own huggingface token, or use --env to pass.
|
|
||||||
HF_HUB_ENABLE_HF_TRANSFER: True
|
|
||||||
|
|
||||||
# Service configuration
|
|
||||||
service:
|
|
||||||
readiness_probe:
|
|
||||||
path: /v1/chat/completions # Path for the readiness probe
|
|
||||||
post_data:
|
|
||||||
model: $MODEL_NAME # Specify the model name
|
|
||||||
messages:
|
|
||||||
- role: user
|
|
||||||
content: Hello! What is your name? # Specify the initial message
|
|
||||||
max_tokens: 1 # Maximum number of tokens
|
|
||||||
readiness_probe: /v1/health # Additional readiness probe
|
|
||||||
|
|
||||||
# Replica Policy
|
|
||||||
replica_policy:
|
|
||||||
min_replicas: 1 # Minimum number of replicas
|
|
||||||
max_replicas: 10 # Maximum number of replicas
|
|
||||||
target_qps_per_replica: 2.5 # Target queries per second per replica
|
|
||||||
upscale_delay_seconds: 200 # Delay before upscaling replicas
|
|
||||||
downscale_delay_seconds: 1200 # Delay before downscaling replicas
|
|
||||||
|
|
||||||
resources:
|
|
||||||
# accelerators: {L4:8, A10g:8, A10:8, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8}
|
|
||||||
accelerators: {A10g, A10, L40, A40} # We can use cheaper accelerators for 8B model.
|
|
||||||
# cpus: 32+
|
|
||||||
use_spot: True
|
|
||||||
disk_size: 100 # Ensure model checkpoints can fit.
|
|
||||||
# disk_tier: best
|
|
||||||
ports: 8081 # Expose to internet traffic.
|
|
||||||
|
|
||||||
setup: |
|
|
||||||
#Install vllm
|
|
||||||
conda activate vllm
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
conda create -n vllm python=3.10 -y
|
|
||||||
conda activate vllm
|
|
||||||
fi
|
|
||||||
|
|
||||||
pip install vllm==0.4.0.post1
|
|
||||||
|
|
||||||
# Install Gradio for web UI.
|
|
||||||
pip install gradio openai
|
|
||||||
pip install flash-attn==2.5.7
|
|
||||||
pip install hf_transfer
|
|
||||||
|
|
||||||
run: |
|
|
||||||
# Serve VLM
|
|
||||||
|
|
||||||
conda activate vllm
|
|
||||||
echo 'Starting vllm api server...'
|
|
||||||
# https://github.com/vllm-project/vllm/issues/3098
|
|
||||||
export PATH=$PATH:/sbin
|
|
||||||
|
|
||||||
# NOTE: --gpu-memory-utilization 0.95 needed for 4-GPU nodes.
|
|
||||||
python3 -u -m vllm.entrypoints.openai.api_server \
|
|
||||||
--port 8090 \
|
|
||||||
--model meta-llama/Meta-Llama-3-8B \
|
|
||||||
--trust-remote-code --tensor-parallel-size 4 \
|
|
||||||
--gpu-memory-utilization 0.95 \
|
|
||||||
--max-num-seqs 64 \
|
|
||||||
|
|
||||||
# Serve Gradio
|
|
||||||
|
|
||||||
# echo 'Starting gradio server...'
|
|
||||||
# git clone https://github.com/vllm-project/vllm.git || true
|
|
||||||
# python vllm/examples/gradio_openai_chatbot_webserver.py \
|
|
||||||
# -m $MODEL_NAME \
|
|
||||||
# --port 8811 \
|
|
||||||
# --model-url http://localhost:8081/v1 \
|
|
||||||
# --stop-token-ids 128009,128001
|
|
||||||
# --share
|
|
||||||
|
|
||||||
echo 'Starting gradio server...'
|
|
||||||
git clone https://github.com/vllm-project/vllm.git || true
|
|
||||||
python3 vllm/examples/gradio_openai_chatbot_webserver.py \
|
|
||||||
-m meta-llama/Meta-Llama-3-8B\
|
|
||||||
--port 8811 \
|
|
||||||
--model-url http://localhost:8081/v1 \
|
|
||||||
--stop-token-ids 128009,128001
|
|
Loading…
Reference in new issue