parent
9ce26a1ba8
commit
6cc31a549a
@ -0,0 +1,125 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class TaskInput(BaseModel):
|
||||
__root__: Any = Field(
|
||||
...,
|
||||
description="The input parameters for the task. Any value is allowed.",
|
||||
example='{\n"debug": false,\n"mode": "benchmarks"\n}',
|
||||
)
|
||||
|
||||
|
||||
class Artifact(BaseModel):
|
||||
artifact_id: str = Field(
|
||||
...,
|
||||
description="Id of the artifact",
|
||||
example="b225e278-8b4c-4f99-a696-8facf19f0e56",
|
||||
)
|
||||
file_name: str = Field(
|
||||
..., description="Filename of the artifact", example="main.py"
|
||||
)
|
||||
relative_path: Optional[str] = Field(
|
||||
None,
|
||||
description="Relative path of the artifact in the agent's workspace",
|
||||
example="python/code/"
|
||||
)
|
||||
|
||||
|
||||
class ArtifactUpload(BaseModel):
|
||||
file: bytes = Field(
|
||||
...,
|
||||
description="File to upload"
|
||||
)
|
||||
relative_path: Optional[str] = Field(
|
||||
None,
|
||||
description="Relative path of the artifact in the agent's workspace",
|
||||
example="python/code/"
|
||||
)
|
||||
|
||||
|
||||
class StepInput(BaseModel):
|
||||
__root__: Any = Field(
|
||||
...,
|
||||
description="Input parameters for the task step. Any value is allowed.",
|
||||
example='{\n"file_to_refactor": "models.py"\n}',
|
||||
)
|
||||
|
||||
|
||||
class StepOutput(BaseModel):
|
||||
__root__: Any = Field(
|
||||
...,
|
||||
description="Output that the task step has produced. Any value is allowed.",
|
||||
example='{\n"tokens": 7894,\n"estimated_cost": "0,24$"\n}',
|
||||
)
|
||||
|
||||
|
||||
class TaskRequestBody(BaseModel):
|
||||
input: Optional[str] = Field(
|
||||
None,
|
||||
description="Input prompt for the task.",
|
||||
example="Write the words you receive to the file 'output.txt'.",
|
||||
)
|
||||
additional_input: Optional[TaskInput] = None
|
||||
|
||||
|
||||
class Task(TaskRequestBody):
|
||||
task_id: str = Field(
|
||||
...,
|
||||
description="The ID of the task.",
|
||||
example="50da533e-3904-4401-8a07-c49adf88b5eb",
|
||||
)
|
||||
artifacts: List[Artifact] = Field(
|
||||
[],
|
||||
description="A list of artifacts that the task has produced.",
|
||||
example=[
|
||||
"7a49f31c-f9c6-4346-a22c-e32bc5af4d8e",
|
||||
"ab7b4091-2560-4692-a4fe-d831ea3ca7d6",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class StepRequestBody(BaseModel):
|
||||
input: Optional[str] = Field(
|
||||
None, description="Input prompt for the step.", example="Washington"
|
||||
)
|
||||
additional_input: Optional[StepInput] = None
|
||||
|
||||
|
||||
class Status(Enum):
|
||||
created = "created"
|
||||
running = "running"
|
||||
completed = "completed"
|
||||
|
||||
|
||||
class Step(StepRequestBody):
|
||||
task_id: str = Field(
|
||||
...,
|
||||
description="The ID of the task this step belongs to.",
|
||||
example="50da533e-3904-4401-8a07-c49adf88b5eb",
|
||||
)
|
||||
step_id: str = Field(
|
||||
...,
|
||||
description="The ID of the task step.",
|
||||
example="6bb1801a-fd80-45e8-899a-4dd723cc602e",
|
||||
)
|
||||
name: Optional[str] = Field(
|
||||
None, description="The name of the task step.", example="Write to file"
|
||||
)
|
||||
status: Status = Field(..., description="The status of the task step.")
|
||||
output: Optional[str] = Field(
|
||||
None,
|
||||
description="Output of the task step.",
|
||||
example="I am going to use the write_to_file command and write Washington to a file called output.txt <write_to_file('output.txt', 'Washington')",
|
||||
)
|
||||
additional_output: Optional[StepOutput] = None
|
||||
artifacts: List[Artifact] = Field(
|
||||
[], description="A list of artifacts that the step has produced."
|
||||
)
|
||||
is_last: Optional[bool] = Field(
|
||||
False, description="Whether this is the last step in the task."
|
||||
)
|
@ -1,63 +1,249 @@
|
||||
import torch
|
||||
import logging
|
||||
# from transformers import BitsAndBytesConfig
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
class HuggingFaceLLM:
|
||||
def __init__(self,
|
||||
model_id: str,
|
||||
device: str = None,
|
||||
max_length: int = 20,
|
||||
# quantize: bool = False,
|
||||
quantization_config: dict = None
|
||||
):
|
||||
|
||||
import torch
|
||||
from torch.multiprocessing import set_start_method
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from transformers import (
|
||||
AutoModelForCausalLM,
|
||||
AutoTokenizer,
|
||||
BitsAndBytesConfig,
|
||||
GPTQConfig,
|
||||
)
|
||||
|
||||
#set up logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class HFLLM:
|
||||
"""
|
||||
A class for running inference on a given model.
|
||||
|
||||
Attributes:
|
||||
model_id (str): The ID of the model.
|
||||
device (str): The device to run the model on (either 'cuda' or 'cpu').
|
||||
max_length (int): The maximum length of the output sequence.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
model_id: str,
|
||||
device: str = None,
|
||||
max_length: int = 20,
|
||||
quantize: bool = False,
|
||||
quantization_config: dict = None,
|
||||
verbose = False,
|
||||
# logger=None,
|
||||
distributed=False,
|
||||
decoding=False
|
||||
):
|
||||
"""
|
||||
Initialize the Inference object.
|
||||
|
||||
Args:
|
||||
model_id (str): The ID of the model.
|
||||
device (str, optional): The device to run the model on. Defaults to 'cuda' if available.
|
||||
max_length (int, optional): The maximum length of the output sequence. Defaults to 20.
|
||||
quantize (bool, optional): Whether to use quantization. Defaults to False.
|
||||
quantization_config (dict, optional): The configuration for quantization.
|
||||
verbose (bool, optional): Whether to print verbose logs. Defaults to False.
|
||||
logger (logging.Logger, optional): The logger to use. Defaults to a basic logger.
|
||||
"""
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.device = device if device else ('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
self.model_id = model_id
|
||||
self.max_length = max_length
|
||||
self.verbose = verbose
|
||||
self.distributed = distributed
|
||||
self.decoding = decoding
|
||||
self.model, self.tokenizer = None, None
|
||||
|
||||
|
||||
if self.distributed:
|
||||
assert torch.cuda.device_count() > 1, "You need more than 1 gpu for distributed processing"
|
||||
|
||||
|
||||
# bnb_config = None
|
||||
# if quantize:
|
||||
# if not quantization_config:
|
||||
# quantization_config = {
|
||||
# 'load_in_4bit': True,
|
||||
# 'bnb_4bit_use_double_quant': True,
|
||||
# 'bnb_4bit_quant_type': "nf4",
|
||||
# 'bnb_4bit_compute_dtype': torch.bfloat16
|
||||
# }
|
||||
# bnb_config = BitsAndBytesConfig(**quantization_config)
|
||||
bnb_config = None
|
||||
if quantize:
|
||||
if not quantization_config:
|
||||
quantization_config = {
|
||||
'load_in_4bit': True,
|
||||
'bnb_4bit_use_double_quant': True,
|
||||
'bnb_4bit_quant_type': "nf4",
|
||||
'bnb_4bit_compute_dtype': torch.bfloat16
|
||||
}
|
||||
bnb_config = BitsAndBytesConfig(**quantization_config)
|
||||
|
||||
try:
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) # quantization_config=bnb_config)
|
||||
self.model.to(self.device)
|
||||
self.model = AutoModelForCausalLM.from_pretrained(
|
||||
self.model_id,
|
||||
quantization_config=bnb_config
|
||||
)
|
||||
|
||||
self.model#.to(self.device)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to load the model or the tokenizer: {e}")
|
||||
raise
|
||||
|
||||
def __call__(self, prompt_text: str, max_length: int = None):
|
||||
def load_model(self):
|
||||
if not self.model or not self.tokenizer:
|
||||
try:
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
|
||||
bnb_config = BitsAndBytesConfig(
|
||||
**self.quantization_config
|
||||
) if self.quantization_config else None
|
||||
|
||||
self.model = AutoModelForCausalLM.from_pretrained(
|
||||
self.model_id,
|
||||
quantization_config=bnb_config
|
||||
).to(self.device)
|
||||
|
||||
if self.distributed:
|
||||
self.model = DDP(self.model)
|
||||
except Exception as error:
|
||||
self.logger.error(f"Failed to load the model or the tokenizer: {error}")
|
||||
raise
|
||||
|
||||
def run(
|
||||
self,
|
||||
prompt_text: str,
|
||||
max_length: int = None
|
||||
):
|
||||
"""
|
||||
Generate a response based on the prompt text.
|
||||
|
||||
Args:
|
||||
- prompt_text (str): Text to prompt the model.
|
||||
- max_length (int): Maximum length of the response.
|
||||
|
||||
Returns:
|
||||
- Generated text (str).
|
||||
"""
|
||||
self.load_model()
|
||||
|
||||
max_length = max_length if max_length else self.max_length
|
||||
try:
|
||||
inputs = self.tokenizer.encode(prompt_text, return_tensors="pt").to(self.device)
|
||||
with torch.no_grad():
|
||||
outputs = self.model.generate(inputs, max_length=max_length, do_sample=True)
|
||||
inputs = self.tokenizer.encode(
|
||||
prompt_text,
|
||||
return_tensors="pt"
|
||||
).to(self.device)
|
||||
|
||||
if self.decoding:
|
||||
with torch.no_grad():
|
||||
for _ in range(max_length):
|
||||
output_sequence = []
|
||||
|
||||
outputs = self.model.generate(
|
||||
inputs,
|
||||
max_length=len(inputs) + 1,
|
||||
do_sample=True
|
||||
)
|
||||
output_tokens = outputs[0][-1]
|
||||
output_sequence.append(output_tokens.item())
|
||||
|
||||
#print token in real-time
|
||||
print(self.tokenizer.decode(
|
||||
[output_tokens],
|
||||
skip_special_tokens=True),
|
||||
end="",
|
||||
flush=True
|
||||
)
|
||||
inputs = outputs
|
||||
else:
|
||||
with torch.no_grad():
|
||||
outputs = self.model.generate(
|
||||
inputs,
|
||||
max_length=max_length,
|
||||
do_sample=True
|
||||
)
|
||||
|
||||
del inputs
|
||||
|
||||
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to generate the text: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def generate(self, prompt_text: str, max_length: int = None):
|
||||
max_length = max_length if max_length else self.max_length
|
||||
|
||||
class GPTQInference:
|
||||
def __init__(
|
||||
self,
|
||||
model_id,
|
||||
quantization_config_bits,
|
||||
quantization_config_dataset,
|
||||
max_length,
|
||||
verbose = False,
|
||||
distributed = False,
|
||||
):
|
||||
self.model_id = model_id
|
||||
self.quantization_config_bits = quantization_config_bits
|
||||
self.quantization_config_dataset = quantization_config_dataset
|
||||
self.max_length = max_length
|
||||
self.verbose = verbose
|
||||
self.distributed = distributed
|
||||
|
||||
if self.distributed:
|
||||
assert torch.cuda.device_count() > 1, "You need more than 1 gpu for distributed processing"
|
||||
set_start_method("spawn", force=True)
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
else:
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.quantization_config = GPTQConfig(
|
||||
bits=self.quantization_config_bits,
|
||||
dataset=quantization_config_dataset,
|
||||
tokenizer=self.tokenizer
|
||||
)
|
||||
|
||||
self.model = AutoModelForCausalLM.from_pretrained(
|
||||
self.model_id,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config
|
||||
).to(self.device)
|
||||
|
||||
if self.distributed:
|
||||
self.model = DDP(
|
||||
self.model,
|
||||
device_ids=[0],
|
||||
output_device=0,
|
||||
)
|
||||
|
||||
logger.info(f"Model loaded from {self.model_id} on {self.device}")
|
||||
|
||||
def run(
|
||||
self,
|
||||
prompt: str,
|
||||
max_length: int = 500,
|
||||
):
|
||||
max_length = self.max_length or max_length
|
||||
|
||||
try:
|
||||
inputs = self.tokenizer.encode(prompt_text, return_tensors="pt").to(self.device)
|
||||
with torch.no_grad():
|
||||
outputs = self.model.generate(inputs, max_length=max_length, do_sample=True)
|
||||
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to generate the text: {e}")
|
||||
raise
|
||||
inputs = self.tokenizer.encode(
|
||||
prompt,
|
||||
return_tensors="pt"
|
||||
).to(self.device)
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = self.model.generate(
|
||||
inputs,
|
||||
max_length=max_length,
|
||||
do_sample=True
|
||||
)
|
||||
|
||||
return self.tokenizer.decode(
|
||||
outputs[0],
|
||||
skip_special_tokens=True
|
||||
)
|
||||
|
||||
except Exception as error:
|
||||
print(f"Error: {error} in inference mode, please change the inference logic or try again")
|
||||
raise
|
||||
|
||||
def __del__(self):
|
||||
#free up resources
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
@ -0,0 +1,204 @@
|
||||
import enum
|
||||
import os
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import time
|
||||
import shutil
|
||||
import argparse
|
||||
import asyncio
|
||||
import re
|
||||
from typing import List, Optional, Callable, Any
|
||||
|
||||
import openai
|
||||
from openai_function_call import openai_function
|
||||
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
||||
import logging
|
||||
|
||||
from smol_dev.prompts import plan, specify_file_paths, generate_code_sync
|
||||
from smol_dev.utils import generate_folder, write_file
|
||||
|
||||
from agent_protocol import Agent, Step, Task
|
||||
|
||||
|
||||
|
||||
|
||||
class DeveloperAgent:
|
||||
class StepTypes(str, enum.Enum):
|
||||
PLAN = "plan"
|
||||
SPECIFY_FILE_PATHS = "specify_file_paths"
|
||||
GENERATE_CODE = "generate_code"
|
||||
|
||||
async def _generate_shared_deps(step: Step) -> Step:
|
||||
task = await Agent.db.get_task(step.task_id)
|
||||
shared_deps = plan(task.input)
|
||||
await Agent.db.create_step(
|
||||
step.task_id,
|
||||
DeveloperAgent.StepTypes.SPECIFY_FILE_PATHS,
|
||||
additional_properties={
|
||||
"shared_deps": shared_deps,
|
||||
},
|
||||
)
|
||||
step.output = shared_deps
|
||||
return step
|
||||
|
||||
async def _generate_file_paths(task: Task, step: Step) -> Step:
|
||||
shared_deps = step.additional_properties["shared_deps"]
|
||||
file_paths = specify_file_paths(task.input, shared_deps)
|
||||
for file_path in file_paths[:-1]:
|
||||
await Agent.db.create_step(
|
||||
task.task_id,
|
||||
f"Generate code for {file_path}",
|
||||
additional_properties={
|
||||
"shared_deps": shared_deps,
|
||||
"file_path": file_paths[-1],
|
||||
},
|
||||
)
|
||||
|
||||
await Agent.db.create_step(
|
||||
task.task_id,
|
||||
f"Generate code for {file_paths[-1]}",
|
||||
is_last=True,
|
||||
additional_properties={
|
||||
"shared_deps": shared_deps,
|
||||
"file_path": file_paths[-1],
|
||||
},
|
||||
)
|
||||
|
||||
step.output = f"File paths are: {str(file_paths)}"
|
||||
return step
|
||||
|
||||
async def _generate_code(task: Task, step: Step) -> Step:
|
||||
shared_deps = step.additional_properties["shared_deps"]
|
||||
file_path = step.additional_properties["file_path"]
|
||||
|
||||
code = await generate_code(task.input, shared_deps, file_path)
|
||||
step.output = code
|
||||
|
||||
write_file(os.path.join(Agent.get_workspace(task.task_id), file_path), code)
|
||||
path = Path("./" + file_path)
|
||||
await Agent.db.create_artifact(
|
||||
task_id=task.task_id,
|
||||
step_id=step.step_id,
|
||||
relative_path=str(path.parent),
|
||||
file_name=path.name,
|
||||
)
|
||||
|
||||
return step
|
||||
|
||||
async def task_handler(task: Task) -> None:
|
||||
if not task.input:
|
||||
raise Exception("No task prompt")
|
||||
await Agent.db.create_step(task.task_id, DeveloperAgent.StepTypes.PLAN)
|
||||
|
||||
async def step_handler(step: Step):
|
||||
task = await Agent.db.get_task(step.task_id)
|
||||
if step.name == DeveloperAgent.StepTypes.PLAN:
|
||||
return await DeveloperAgent._generate_shared_deps(step)
|
||||
elif step.name == DeveloperAgent.StepTypes.SPECIFY_FILE_PATHS:
|
||||
return await DeveloperAgent._generate_file_paths(task, step)
|
||||
else:
|
||||
return await DeveloperAgent._generate_code(task, step)
|
||||
|
||||
@classmethod
|
||||
def setup_agent(cls, task_handler, step_handler):
|
||||
# Setup agent here
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def generate_folder(folder_path: str):
|
||||
if not os.path.exists(folder_path):
|
||||
os.makedirs(folder_path)
|
||||
else:
|
||||
shutil.rmtree(folder_path)
|
||||
os.makedirs(folder_path)
|
||||
|
||||
@staticmethod
|
||||
def write_file(file_path: str, content: str):
|
||||
if not os.path.exists(os.path.dirname(file_path)):
|
||||
os.makedirs(os.path.dirname(file_path))
|
||||
with open(file_path, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
@staticmethod
|
||||
def main(prompt, generate_folder_path="generated", debug=False, model: str = 'gpt-4-0613'):
|
||||
DeveloperAgent.generate_folder(generate_folder_path)
|
||||
|
||||
if debug:
|
||||
print("--------shared_deps---------")
|
||||
with open(f"{generate_folder_path}/shared_deps.md", "wb") as f:
|
||||
start_time = time.time()
|
||||
def stream_handler(chunk):
|
||||
f.write(chunk)
|
||||
if debug:
|
||||
end_time = time.time()
|
||||
sys.stdout.write("\r \033[93mChars streamed\033[0m: {}. \033[93mChars per second\033[0m: {:.2f}".format(stream_handler.count, stream_handler.count / (end_time - start_time)))
|
||||
sys.stdout.flush()
|
||||
stream_handler.count += len(chunk)
|
||||
stream_handler.count = 0
|
||||
stream_handler.onComplete = lambda x: sys.stdout.write("\033[0m\n")
|
||||
|
||||
shared_deps = plan(prompt, stream_handler, model=model)
|
||||
if debug:
|
||||
print(shared_deps)
|
||||
DeveloperAgent.write_file(f"{generate_folder_path}/shared_deps.md", shared_deps)
|
||||
if debug:
|
||||
print("--------shared_deps---------")
|
||||
|
||||
if debug:
|
||||
print("--------specify_filePaths---------")
|
||||
file_paths = specify_file_paths(prompt, shared_deps, model=model)
|
||||
if debug:
|
||||
print(file_paths)
|
||||
if debug:
|
||||
print("--------file_paths---------")
|
||||
|
||||
for file_path in file_paths:
|
||||
file_path = f"{generate_folder_path}/{file_path}"
|
||||
if debug:
|
||||
print(f"--------generate_code: {file_path} ---------")
|
||||
|
||||
start_time = time.time()
|
||||
def stream_handler(chunk):
|
||||
if debug:
|
||||
end_time = time.time()
|
||||
sys.stdout.write("\r \033[93mChars streamed\033[0m: {}. \033[93mChars per second\033[0m: {:.2f}".format(stream_handler.count, stream_handler.count / (end_time - start_time)))
|
||||
sys.stdout.flush()
|
||||
stream_handler.count += len(chunk)
|
||||
stream_handler.count = 0
|
||||
stream_handler.onComplete = lambda x: sys.stdout.write("\033[0m\n")
|
||||
code = generate_code_sync(prompt, shared_deps, file_path, stream_handler, model=model)
|
||||
if debug:
|
||||
print(code)
|
||||
if debug:
|
||||
print(f"--------generate_code: {file_path} ---------")
|
||||
DeveloperAgent.write_file(file_path, code)
|
||||
|
||||
print("--------Smol Dev done!---------")
|
||||
|
||||
if __name__ == "__main__":
|
||||
prompt = """
|
||||
a simple JavaScript/HTML/CSS/Canvas app that is a one-player game of PONG.
|
||||
The left paddle is controlled by the player, following where the mouse goes.
|
||||
The right paddle is controlled by a simple AI algorithm, which slowly moves the paddle toward the ball at every frame, with some probability of error.
|
||||
Make the canvas a 400 x 400 black square and center it in the app.
|
||||
Make the paddles 100px long, yellow, and the ball small and red.
|
||||
Make sure to render the paddles and name them so they can be controlled in JavaScript.
|
||||
Implement the collision detection and scoring as well.
|
||||
Every time the ball bounces off a paddle, the ball should move faster.
|
||||
It is meant to run in the Chrome browser, so don't use anything that is not supported by Chrome, and don't use the import and export keywords.
|
||||
"""
|
||||
|
||||
if len(sys.argv) == 2:
|
||||
prompt = sys.argv[1]
|
||||
else:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--prompt", type=str, required=True, help="Prompt for the app to be created.")
|
||||
parser.add_argument("--generate_folder_path", type=str, default="generated", help="Path of the folder for generated code.")
|
||||
parser.add_argument("--debug", type=bool, default=False, help="Enable or disable debug mode.")
|
||||
args = parser.parse_args()
|
||||
if args.prompt:
|
||||
prompt = args.prompt
|
||||
|
||||
print(prompt)
|
||||
|
||||
DeveloperAgent.main(prompt=prompt, generate_folder_path=args.generate_folder_path, debug=args.debug)
|
Loading…
Reference in new issue