From 60faf9540175f4ddf96f14365848b8d9d2375cdc Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 14 Jul 2023 19:28:02 -0400 Subject: [PATCH 01/41] clean up Former-commit-id: b8172d8b2422066d355a99d5e7ee7faa390da8c7 --- api/container.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/container.py b/api/container.py index 251c697a..9e47f087 100644 --- a/api/container.py +++ b/api/container.py @@ -7,7 +7,9 @@ from fastapi.templating import Jinja2Templates from swarms.agents.utils.AgentManager import AgentManager from swarms.utils.main import BaseHandler, FileHandler, FileType -from swarms.tools.main import CsvToDataframe, ExitConversation, RequestsGet, CodeEditor, Terminal +from swarms.tools.main import ExitConversation, RequestsGet, CodeEditor, Terminal + +from swarms.utils.main import CsvToDataframe from swarms.tools.main import BaseToolSet From 2463ca06446356410d5ea292921df24cc4a50094 Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 14 Jul 2023 19:44:41 -0400 Subject: [PATCH 02/41] 1st hivemind class Former-commit-id: ae765e277eaf36f8784314ab9bf51b41df117c07 --- swarms/hivemind.py | 47 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/swarms/hivemind.py b/swarms/hivemind.py index b8033641..96ca8f70 100644 --- a/swarms/hivemind.py +++ b/swarms/hivemind.py @@ -1,2 +1,47 @@ # many boss + workers in unison -#kye gomez jul 13 4:01pm, can scale up the number of swarms working on a probkem with `hivemind(swarms=4, or swarms=auto which will scale the agents depending on the complexity)` \ No newline at end of file +#kye gomez jul 13 4:01pm, can scale up the number of swarms working on a probkem with `hivemind(swarms=4, or swarms=auto which will scale the agents depending on the complexity)` + +import concurrent.futures +import logging + +from swarms.swarms import Swarms +from swarms.tools.agent_tools import * + +logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') + + + +class HiveMind: + def __init__(self, openai_api_key="", num_swarms=1, max_workers=None): + self.openai_api_key = openai_api_key + self.num_swarms = num_swarms + self.swarms = [Swarms(openai_api_key) for _ in range(num_swarms)] + self.vectorstore = self.initialize_vectorstore() + self.max_workers = max_workers if max_workers else min(32, num_swarms) + + def initialize_vectorstore(self): + try: + embeddings_model = OpenAIEmbeddings(openai_api_key=self.openai_api_key) + embedding_size = 1536 + index = faiss.IndexFlatL2(embedding_size) + return FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {}) + except Exception as e: + logging.error(f"Failed to initialize vector store: {e}") + raise + + def run_swarm(self, swarm, objective): + try: + return swarm.run_swarms(objective) + except Exception as e: + logging.error(f"An error occurred in run_swarms: {e}") + + def run_swarms(self, objective, timeout=None): + with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor: + futures = {executor.submit(self.run_swarm, swarm, objective) for swarm in self.swarms} + results = [] + for future in concurrent.futures.as_completed(futures, timeout=timeout): + try: + results.append(future.result()) + except Exception as e: + logging.error(f"An error occurred in a swarm: {e}") + return results \ No newline at end of file From 935a0994775feaa121c06aef9b20cec8586454f8 Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 14 Jul 2023 21:03:41 -0400 Subject: [PATCH 03/41] clean up with hivmind Former-commit-id: 3704fd4fb5d668f62973006dd976fa4d88ea5bd6 --- DOCS/HIVEMIND.md | 69 ++++++++++++++++++++++++++++++++++++++++++++++ swarms/hivemind.py | 27 ++++++++++++++++-- 2 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 DOCS/HIVEMIND.md diff --git a/DOCS/HIVEMIND.md b/DOCS/HIVEMIND.md new file mode 100644 index 00000000..09c0c9ab --- /dev/null +++ b/DOCS/HIVEMIND.md @@ -0,0 +1,69 @@ +Guide to Product-Market Fit for HiveMind Class +Risks and Mitigations +Scalability: As the number of swarms increases, the computational resources required will also increase. This could lead to performance issues or high costs. + +Mitigation: Implement efficient resource management and load balancing. Consider using cloud-based solutions that can scale up or down based on demand. + +Concurrency Issues: With multiple swarms running concurrently, there could be issues with data consistency and synchronization. + +Mitigation: Implement robust concurrency control mechanisms. Ensure that the shared vector store is thread-safe. + +Error Propagation: Errors in one swarm could potentially affect other swarms or the entire HiveMind. + +Mitigation: Implement robust error handling and isolation mechanisms. Errors in one swarm should not affect the operation of other swarms. + +Complexity: The HiveMind class is complex and could be difficult to maintain and extend. + +Mitigation: Follow best practices for software design, such as modularity, encapsulation, and separation of concerns. Write comprehensive tests to catch issues early. + +User Experience: If the HiveMind class is not easy to use, it could deter potential users. + +Mitigation: Provide clear documentation and examples. Implement a user-friendly API. Consider providing a high-level interface that abstracts away some of the complexity. + +Mental Models and Design Paradigms +Modularity: Each swarm should be a self-contained unit that can operate independently. This makes the system more flexible and easier to maintain. + +Concurrency: The system should be designed to handle multiple swarms running concurrently. This requires careful consideration of issues such as data consistency and synchronization. + +Fault Tolerance: The system should be able to handle errors gracefully. If one swarm encounters an error, it should not affect the operation of other swarms. + +Scalability: The system should be able to handle an increasing number of swarms without a significant degradation in performance. + +User-Centric Design: The system should be designed with the user in mind. It should be easy to use and provide value to the user. + +Path to Product-Market Fit +Identify Target Users: Determine who would benefit most from using the HiveMind class. This could be developers, data scientists, researchers, or businesses. + +Understand User Needs: Conduct user research to understand the problems that users are trying to solve and how the HiveMind class can help. + +Develop MVP: Develop a minimum viable product (MVP) that demonstrates the value of the HiveMind class. This should be a simple version of the product that solves a core user problem. + +Gather Feedback: After releasing the MVP, gather feedback from users. This could be through surveys, interviews, or user testing. + +Iterate and Improve: Use the feedback to iterate and improve the product. This could involve fixing bugs, adding new features, or improving usability. + +Scale: Once the product has achieved product-market fit, focus on scaling. This could involve optimizing the product for performance, expanding to new markets, or developing partnerships. + + + +Here are some features that could be added to the HiveMind class to provide maximum value for users: + +Dynamic Scaling: The ability to automatically scale the number of swarms based on the complexity of the task or the load on the system. This would allow the system to handle a wide range of tasks efficiently. + +Task Prioritization: The ability to prioritize tasks based on their importance or urgency. This would allow more important tasks to be completed first. + +Progress Monitoring: The ability for users to monitor the progress of their tasks. This could include a progress bar, estimated completion time, or real-time updates. + +Error Reporting: Detailed error reports that help users understand what went wrong if a task fails. This could include the error message, the swarm that encountered the error, and suggestions for how to fix the error. + +Task Cancellation: The ability for users to cancel a task that is currently being processed. This could be useful if a user realizes they made a mistake or if a task is taking too long to complete. + +Task Queuing: The ability for users to queue up multiple tasks. This would allow users to submit a batch of tasks and have them processed one after the other. + +Result Formatting: The ability for users to specify how they want the results to be formatted. This could include options for plain text, JSON, XML, or other formats. + +Integration with Other Services: The ability to integrate with other services, such as databases, cloud storage, or machine learning platforms. This would allow users to easily store results, access additional resources, or leverage advanced features. + +Security Features: Features to ensure the security and privacy of user data, such as encryption, access controls, and audit logs. + +User-Friendly API: A well-designed, user-friendly API that makes it easy for users to use the HiveMind class in their own applications. This could include clear documentation, examples, and error messages. diff --git a/swarms/hivemind.py b/swarms/hivemind.py index 96ca8f70..e7283c44 100644 --- a/swarms/hivemind.py +++ b/swarms/hivemind.py @@ -10,7 +10,7 @@ from swarms.tools.agent_tools import * logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') - +# add typechecking, documentation, and deeper error handling class HiveMind: def __init__(self, openai_api_key="", num_swarms=1, max_workers=None): self.openai_api_key = openai_api_key @@ -44,4 +44,27 @@ class HiveMind: results.append(future.result()) except Exception as e: logging.error(f"An error occurred in a swarm: {e}") - return results \ No newline at end of file + return results + + def add_swarm(self): + self.swarms.append(Swarms(self.openai_api_key)) + + def remove_swarm(self, index): + try: + self.swarms.pop(index) + except IndexError: + logging.error(f"No swarm found at index {index}") + + def get_progress(self): + #this assumes that the swarms class has a get progress method + pass + + def cancel_swarm(self, index): + try: + self.swarms[index].cancel() + except IndexError: + logging.error(f"No swarm found at index {index}") + + def queue_tasks(self, tasks): + for task in tasks: + self.run_swarms(task) From 8a1b298bdbb1269aeac81fe940f9ed5382f000b4 Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 14 Jul 2023 21:11:38 -0400 Subject: [PATCH 04/41] I've modified the initialize_vectorstore and run_swarms methods to return None if an error occurs. This allows the program to continue running even if one part fails. I've added more detailed logging in the swarm function. This provides the user with more feedback about what the program is doing. Former-commit-id: 7eea0fd6788e747c7dafdbb733de723557811bf6 --- swarms/swarms.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/swarms/swarms.py b/swarms/swarms.py index a19f1c57..b14672aa 100644 --- a/swarms/swarms.py +++ b/swarms/swarms.py @@ -70,7 +70,7 @@ class Swarms: return FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {}) except Exception as e: logging.error(f"Failed to initialize vector store: {e}") - raise + return None def initialize_worker_node(self, worker_tools, vectorstore, llm_class=ChatOpenAI, ai_name="Swarm Worker AI Assistant"): """ @@ -156,7 +156,7 @@ class Swarms: return boss_node.execute_task(task) except Exception as e: logging.error(f"An error occurred in run_swarms: {e}") - raise + return None # usage def swarm(api_key="", objective=""): @@ -180,7 +180,12 @@ def swarm(api_key="", objective=""): try: swarms = Swarms(api_key) - return swarms.run_swarms(objective) + result = swarms.run_swarms(objective) + if result is None: + logging.error("Failed to run swarms") + else: + logging.info(f"Successfully ran swarms with result: {result}") + return result except Exception as e: logging.error(f"An error occured in swarm: {e}") - raise \ No newline at end of file + return None \ No newline at end of file From b0820dd66773f560c8dc88bf902e69901dd873a7 Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 14 Jul 2023 21:25:06 -0400 Subject: [PATCH 05/41] asynchronous swarms Former-commit-id: a0f3c74b057d3df8d3124b501aca3924151dde84 --- swarms/swarms.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/swarms/swarms.py b/swarms/swarms.py index b14672aa..547eedf9 100644 --- a/swarms/swarms.py +++ b/swarms/swarms.py @@ -1,9 +1,11 @@ +import logging +import asyncio + from swarms.tools.agent_tools import * from swarms.agents.workers.WorkerNode import WorkerNode, worker_node from swarms.agents.boss.BossNode import BossNode from swarms.agents.workers.WorkerUltraNode import WorkerUltra -import logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') class Swarms: @@ -135,7 +137,7 @@ class Swarms: - def run_swarms(self, objective): + async def run_swarms(self, objective): """ Run the swarm with the given objective @@ -171,16 +173,15 @@ def swarm(api_key="", objective=""): The result of the swarm. """ - if not api_key: - logging.error("OpenAIkey is not provided") - raise ValueError("OpenAI API key is not provided") - if not objective: - logging.error("Objective is not provided") - raise ValueError("Objective is required") + if not api_key or not isinstance(api_key, str): + logging.error("Invalid OpenAI key") + raise ValueError("A valid OpenAI API key is required") + if not objective or not isinstance(objective, str): + logging.error("Invalid objective") + raise ValueError("A valid objective is required") try: - swarms = Swarms(api_key) - result = swarms.run_swarms(objective) + result = asyncio.run(swarms.run_swarms(objective)) if result is None: logging.error("Failed to run swarms") else: From 310735b9a04258e8a85abcf31d76a68e8720979a Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 14 Jul 2023 21:44:24 -0400 Subject: [PATCH 06/41] In this modified code: I've added logging statements to the run_swarms method to provide feedback to the user about the progress of the tasks. I've modified the swarm function to run the run_swarms tasks concurrently using asyncio.wait. This allows the tasks to be run in parallel, which can improve the performance of the code if the tasks are independent and can be run at the same time. Former-commit-id: 24e4dc021f8ef43f6dd77a5cac2cc7b9125369f4 --- swarms/swarms.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/swarms/swarms.py b/swarms/swarms.py index 547eedf9..bec38de5 100644 --- a/swarms/swarms.py +++ b/swarms/swarms.py @@ -155,7 +155,10 @@ class Swarms: boss_node = self.initialize_boss_node(vectorstore, worker_node) task = boss_node.create_task(objective) - return boss_node.execute_task(task) + logging.info(f"Running task: {task}") + result = await boss_node.execute_task(task) + logging.info(f"Completed tasks: {task}") + return result except Exception as e: logging.error(f"An error occurred in run_swarms: {e}") return None @@ -181,12 +184,15 @@ def swarm(api_key="", objective=""): raise ValueError("A valid objective is required") try: swarms = Swarms(api_key) - result = asyncio.run(swarms.run_swarms(objective)) - if result is None: + loop = asyncio.get_event_loop() + tasks = [loop.create_task(swarms.run_swarms(objective))] + completed, pending = loop.run_until_complete(asyncio.wait(tasks)) + results = [t.result() for t in completed] + if not results or any(result is None for result in results): logging.error("Failed to run swarms") else: - logging.info(f"Successfully ran swarms with result: {result}") - return result + logging.info(f"Successfully ran swarms with results: {results}") + return results except Exception as e: logging.error(f"An error occured in swarm: {e}") return None \ No newline at end of file From 3d157e2dfaefa739994e33effdbcb4a8c03158b7 Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 14 Jul 2023 21:53:59 -0400 Subject: [PATCH 07/41] logging level debug Former-commit-id: 5fe653825b0e77c7c8011a0894ec2137cb203ced --- swarms/agents/boss/BossNode.py | 2 +- swarms/agents/workers/WorkerNode.py | 2 +- swarms/swarms.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/swarms/agents/boss/BossNode.py b/swarms/agents/boss/BossNode.py index a9a8a844..3ec040f0 100644 --- a/swarms/agents/boss/BossNode.py +++ b/swarms/agents/boss/BossNode.py @@ -2,7 +2,7 @@ from swarms.tools.agent_tools import * from pydantic import ValidationError import logging -logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # ---------- Boss Node ---------- class BossNode: diff --git a/swarms/agents/workers/WorkerNode.py b/swarms/agents/workers/WorkerNode.py index f02199f3..6efb65d7 100644 --- a/swarms/agents/workers/WorkerNode.py +++ b/swarms/agents/workers/WorkerNode.py @@ -11,7 +11,7 @@ from langchain.memory.chat_message_histories import FileChatMessageHistory import logging from pydantic import BaseModel, Extra -logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') class WorkerNode: diff --git a/swarms/swarms.py b/swarms/swarms.py index bec38de5..f0be8a51 100644 --- a/swarms/swarms.py +++ b/swarms/swarms.py @@ -6,7 +6,7 @@ from swarms.agents.workers.WorkerNode import WorkerNode, worker_node from swarms.agents.boss.BossNode import BossNode from swarms.agents.workers.WorkerUltraNode import WorkerUltra -logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') class Swarms: def __init__(self, openai_api_key=""): From d6e33b652c8df31a1e8d8644c780454132394fe6 Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 14 Jul 2023 21:57:20 -0400 Subject: [PATCH 08/41] no verbose Former-commit-id: 20e4f49e95a9ecc076e465f38e029dc7129a9d4a --- swarms/tools/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swarms/tools/main.py b/swarms/tools/main.py index 67a32b9b..59a88e1c 100644 --- a/swarms/tools/main.py +++ b/swarms/tools/main.py @@ -1502,7 +1502,7 @@ def pushd(new_dir): @tool def process_csv( - csv_file_path: str, instructions: str, output_path: Optional[str] = None + llm, csv_file_path: str, instructions: str, output_path: Optional[str] = None ) -> str: """Process a CSV by with pandas in a limited REPL.\ Only use this after writing data to disk as a csv file.\ @@ -1513,7 +1513,7 @@ def process_csv( df = pd.read_csv(csv_file_path) except Exception as e: return f"Error: {e}" - agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True) + agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=False) if output_path is not None: instructions += f" Save output to disk at {output_path}" try: From b0c7843e3d56764674f88e6b53a0ce2558f17fe9 Mon Sep 17 00:00:00 2001 From: Kye Date: Fri, 14 Jul 2023 23:25:19 -0400 Subject: [PATCH 09/41] clean up Former-commit-id: 552c056a979f7786f829ef405ae5b1da957e889c --- swarms/swarms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swarms/swarms.py b/swarms/swarms.py index f0be8a51..a2628e53 100644 --- a/swarms/swarms.py +++ b/swarms/swarms.py @@ -114,7 +114,7 @@ class Swarms: # Initialize boss node llm = self.initialize_llm(llm_class) - todo_prompt = PromptTemplate.from_template("You are a boss planer in a swarm who is an expert at coming up with a todo list for a given objective and then creating an worker to help you accomplish your task. Come up with a todo list for this objective: {objective} and then spawn a worker agent to complete the task for you. Always spawn an worker agent after creating a plan and pass the objective and plan to the worker agent.") + todo_prompt = PromptTemplate.from_template("You are a boss planer in a swarm who is an expert at coming up with a todo list for a given objective and then creating an worker to help you accomplish your task. Rate every task on the importance of it's probability to complete the main objective on a scale from 0 to 1, an integer. Come up with a todo list for this objective: {objective} and then spawn a worker agent to complete the task for you. Always spawn an worker agent after creating a plan and pass the objective and plan to the worker agent.") todo_chain = LLMChain(llm=llm, prompt=todo_prompt) tools = [ From 556e213b1ff0aea81b560a40ee16396b8e070868 Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 15 Jul 2023 00:01:30 -0400 Subject: [PATCH 10/41] clean up with tasks Former-commit-id: d5463133cae73bee1984e4ed2525a41ca0deb4c5 --- swarms/swarms.py | 3 +++ swarms/utils/task.py | 11 +++++++++++ 2 files changed, 14 insertions(+) create mode 100644 swarms/utils/task.py diff --git a/swarms/swarms.py b/swarms/swarms.py index a2628e53..674254ba 100644 --- a/swarms/swarms.py +++ b/swarms/swarms.py @@ -8,6 +8,9 @@ from swarms.agents.workers.WorkerUltraNode import WorkerUltra logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +from swarms.utils.task import Task + class Swarms: def __init__(self, openai_api_key=""): #openai_api_key: the openai key. Default is empty diff --git a/swarms/utils/task.py b/swarms/utils/task.py new file mode 100644 index 00000000..2d7f56e0 --- /dev/null +++ b/swarms/utils/task.py @@ -0,0 +1,11 @@ +import uuid + +class Task: + def __init__(self, objective, priority=0, schedule=None, dependencies=None): + self.id = uuid.uuid4() + self.objective = objective + self.priority = priority + self.schedule = schedule + self.dependencies = dependencies or [] + self.status = "pending" + From 229ef680a2cc9558af289cc4511cdb889acf0d71 Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 15 Jul 2023 00:14:11 -0400 Subject: [PATCH 11/41] worker ultra check Former-commit-id: efc903654dbfc5e15d6dd919e3d3f6b6365ce672 --- swarms/agents/workers/WorkerUltraNode.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/swarms/agents/workers/WorkerUltraNode.py b/swarms/agents/workers/WorkerUltraNode.py index 274fbf39..17eb9879 100644 --- a/swarms/agents/workers/WorkerUltraNode.py +++ b/swarms/agents/workers/WorkerUltraNode.py @@ -13,7 +13,7 @@ from swarms.utils.main import CsvToDataframe from swarms.tools.main import BaseToolSet from swarms.utils.main import StaticUploader -logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') BASE_DIR = Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -107,8 +107,15 @@ class WorkerUltraNode: -def WorkerUltra(objective: str, openai_api_key: str): +def WorkerUltra(objective: str, openai_api_key=None): + # If the openai_api_key parameter is not provided, try to get the API key from an environment variable + if openai_api_key is None: + openai_api_key = os.getenv('OPENAI_API_KEY') + + if not openai_api_key or not isinstance(openai_api_key, str): + logging.error("Invalid OpenAI key") + raise ValueError("A valid OpenAI API key is required") + worker_node = WorkerUltraNode(objective, openai_api_key) # Return the result of the execution - return worker_node.result - + return worker_node.result \ No newline at end of file From 14c17784f8249bc2df5c7ddddc1bfbb98fc043b4 Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 15 Jul 2023 08:49:57 -0400 Subject: [PATCH 12/41] clean up prompting Former-commit-id: 4b40633e7b81e54d251ea0d1594034e80a595ee9 --- swarms/swarms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swarms/swarms.py b/swarms/swarms.py index 674254ba..e4c25f69 100644 --- a/swarms/swarms.py +++ b/swarms/swarms.py @@ -121,7 +121,7 @@ class Swarms: todo_chain = LLMChain(llm=llm, prompt=todo_prompt) tools = [ - Tool(name="TODO", func=todo_chain.run, description="useful for when you need to come up with todo lists. Input: an objective to create a todo list for. Output: a todo list for that objective. Please be very clear what the objective is!"), + Tool(name="TODO", func=todo_chain.run, description="useful for when you need to come up with todo lists. Input: an objective to create a todo list for your objective. Note create a todo list then assign a ranking from 0.0 to 1.0 to each task, then sort the tasks based on the tasks most likely to achieve the objective. The Output: a todo list for that objective with rankings for each step from 0.1 Please be very clear what the objective is!"), worker_node ] suffix = """Question: {task}\n{agent_scratchpad}""" From 86ecb03dcf6e024caeb9fcbe8ca3b2e13cc5edc3 Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 15 Jul 2023 11:04:50 -0400 Subject: [PATCH 13/41] clena up Former-commit-id: 1b1d879a3a53aeceb862ab516971a401d15ed1d4 --- swarms/agents/workers/WorkerUltraNode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swarms/agents/workers/WorkerUltraNode.py b/swarms/agents/workers/WorkerUltraNode.py index 17eb9879..c37722a3 100644 --- a/swarms/agents/workers/WorkerUltraNode.py +++ b/swarms/agents/workers/WorkerUltraNode.py @@ -108,7 +108,7 @@ class WorkerUltraNode: def WorkerUltra(objective: str, openai_api_key=None): - # If the openai_api_key parameter is not provided, try to get the API key from an environment variable + # If the openai_api_key parameter is not provided, try to get the API key from an environment variable # if openai_api_key is None: openai_api_key = os.getenv('OPENAI_API_KEY') From e978af7113f1a55748a2b1ea76ac0bcc62c8cd98 Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 15 Jul 2023 11:58:29 -0400 Subject: [PATCH 14/41] clean ip Former-commit-id: 4c9ed331720e60b98f15e3f17eedf5e005d405d1 --- swarms/hivemind.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/swarms/hivemind.py b/swarms/hivemind.py index e7283c44..c552a226 100644 --- a/swarms/hivemind.py +++ b/swarms/hivemind.py @@ -5,6 +5,8 @@ import concurrent.futures import logging from swarms.swarms import Swarms + +#this needs to change, we need to specify exactly what needs to be imported from swarms.tools.agent_tools import * logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') From 891cbdac1bfd0d3aa9c1019c85f187b0f9fa50b1 Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 15 Jul 2023 12:24:03 -0400 Subject: [PATCH 15/41] clean up Former-commit-id: cdbc087436836bf744c8dd0e33128b69745bf4e6 --- DOCS/SWARMSOS.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 DOCS/SWARMSOS.md diff --git a/DOCS/SWARMSOS.md b/DOCS/SWARMSOS.md new file mode 100644 index 00000000..11e6b535 --- /dev/null +++ b/DOCS/SWARMSOS.md @@ -0,0 +1,42 @@ +Research Proposal: Creating a Swarm of LLM Agents for Operating Systems +Introduction +The goal of this research is to explore the feasibility and requirements of creating a swarm of Language Learning Model (LLM) agents that can autonomously operate the kernel of an operating system. This swarm of AI agents would be capable of performing tasks such as process scheduling, memory management, device management, and system calls, among others. + +Objectives +To investigate the feasibility of using LLM agents to autonomously operate the kernel of an operating system. +To identify the requirements and challenges of implementing such a system. +To develop a prototype system as a proof of concept. +Methodology +Literature Review: Conduct a comprehensive review of existing research on AI in operating systems, swarm intelligence, and LLMs. + +Feasibility Study: Analyze the capabilities of current LLMs and assess whether they can be adapted to operate an OS kernel. + +Requirement Analysis: Identify the hardware, software, and data requirements for implementing a swarm of LLM agents in an OS. + +System Design: Design a prototype system that uses LLM agents to perform basic kernel operations. + +Implementation and Testing: Implement the prototype system and conduct rigorous testing to evaluate its performance. + +Requirements +Hardware: A high-performance computing system would be required to handle the computational load of millions of LLM agents. This system would need to have a powerful CPU, a large amount of RAM, and possibly a GPU for machine learning tasks. + +Software: The system would require an operating system that is compatible with the LLM agents. This could be a popular OS like Linux, which is open-source and widely used in AI research. + +LLM Agents: The LLM agents would need to be trained to perform kernel operations. This would require a large dataset of kernel operations and their outcomes. + +Swarm Intelligence Framework: A framework for swarm intelligence would be needed to manage the LLM agents and coordinate their activities. + +Monitoring and Debugging Tools: Tools for monitoring the performance of the LLM agents and debugging any issues would be essential. + +Potential Challenges +Complexity of Kernel Operations: Kernel operations are complex and low-level. Training LLM agents to perform these operations accurately and efficiently could be challenging. + +Coordination of LLM Agents: Coordinating the activities of millions of LLM agents could be a complex task. The swarm intelligence framework would need to be robust and efficient. + +Security: The system would need to be secure to prevent unauthorized access and ensure the integrity of the kernel operations. + +Performance: The system would need to be able to handle a high load and perform operations quickly to avoid slowing down the OS. + +Conclusion +Creating a swarm of LLM agents for operating systems is a challenging but potentially rewarding endeavor. This research aims to explore the feasibility of this idea and identify the requirements for its implementation. If successful, this could open up new possibilities for AI in operating systems and beyond. + From a3c39236cdb9cc8bbec0763cabfba29ab5ae7f2e Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 15 Jul 2023 14:02:44 -0400 Subject: [PATCH 16/41] CONTRIBUTING Former-commit-id: 7940281e0ac6a64d8276c4b4d046528bef8b9ff1 --- DOCS/C0NTRIBUTING.md | 83 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 DOCS/C0NTRIBUTING.md diff --git a/DOCS/C0NTRIBUTING.md b/DOCS/C0NTRIBUTING.md new file mode 100644 index 00000000..4cf85e6b --- /dev/null +++ b/DOCS/C0NTRIBUTING.md @@ -0,0 +1,83 @@ +# Contributing to Swarms + +Thank you for your interest in contributing to Swarms! We welcome contributions from the community to help improve usability and readability. By contributing, you can be a part of creating a dynamic and interactive AI system. + +To get started, please follow the guidelines below. + +## Join the Swarms Community + +Join the Swarms community on Discord to connect with other contributors, coordinate work, and receive support. + +- [Join the Swarms Discord Server](https://discord.gg/qUtxnK2NMf) + +## Taking on Tasks + +We have a growing list of tasks and issues that you can contribute to. To get started, follow these steps: + +1. Visit the [Swarms GitHub repository](https://github.com/kyegomez/swarms) and browse through the existing issues. + +2. Find an issue that interests you and make a comment stating that you would like to work on it. Include a brief description of how you plan to solve the problem and any questions you may have. + +3. Once a project coordinator assigns the issue to you, you can start working on it. + +If you come across an issue that is unclear but still interests you, please post in the Discord server mentioned above. Someone from the community will be able to help clarify the issue in more detail. + +We also welcome contributions to documentation, such as updating markdown files, adding docstrings, creating system architecture diagrams, and other related tasks. + +## Submitting Your Work + +To contribute your changes to Swarms, please follow these steps: + +1. Fork the Swarms repository to your GitHub account. You can do this by clicking on the "Fork" button on the repository page. + +2. Clone the forked repository to your local machine using the `git clone` command. + +3. Before making any changes, make sure to sync your forked repository with the original repository to keep it up to date. You can do this by following the instructions [here](https://docs.github.com/en/github/collaborating-with-pull-requests/syncing-a-fork). + +4. Create a new branch for your changes. This branch should have a descriptive name that reflects the task or issue you are working on. + +5. Make your changes in the branch, focusing on a small, focused change that only affects a few files. + +6. Run any necessary formatting or linting tools to ensure that your changes adhere to the project's coding standards. + +7. Once your changes are ready, commit them to your branch with descriptive commit messages. + +8. Push the branch to your forked repository. + +9. Create a pull request (PR) from your branch to the main Swarms repository. Provide a clear and concise description of your changes in the PR. + +10. Request a review from the project maintainers. They will review your changes, provide feedback, and suggest any necessary improvements. + +11. Make any required updates or address any feedback provided during the review process. + +12. Once your changes have been reviewed and approved, they will be merged into the main branch of the Swarms repository. + +13. Congratulations! You have successfully contributed to Swarms. + +Please note that during the review process, you may be asked to make changes or address certain issues. It is important to engage in open and constructive communication with the project maintainers to ensure the quality of your contributions. + +## Developer Setup + +If you are interested in setting up the Swarms development environment, please follow the instructions provided in the [developer setup guide](docs/developer-setup.md). This guide provides an overview of the different tools and technologies used in the project. + +## Optimization Priorities + +To continuously improve Swarms, we prioritize the following design objectives: + +1. **Usability**: Increase the ease of use and user-friendliness of the swarm system to facilitate adoption and interaction with basic input. + +2. **Reliability**: Improve the swarm's ability to obtain the desired output even with basic and un-detailed input. + +3. **Speed**: Reduce the time it takes for the swarm to accomplish tasks by improving the communication layer, critiquing, and self-alignment with meta prompting. + +4. **Scalability**: Ensure that the system is asynchronous, concurrent, and self-healing to support scalability. + +Our goal is to continuously improve Swarms by following this roadmap while also being adaptable to new needs and opportunities as they arise. + +## Join the Agora Community + +Swarms is brought to you by Agora, the open-source AI research organization. Join the Agora community to connect with other researchers and developers working on AI projects. + +- [Join the Agora Discord Server](https://discord.gg/qUtxnK2NMf) + +Thank you for your contributions and for being a part of the Swarms and Agora community! Together, we can advance Humanity through the power of AI. \ No newline at end of file From a457d8b7ece516a2049c9b9bdc6e805c9e915805 Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 15 Jul 2023 14:03:30 -0400 Subject: [PATCH 17/41] clean up contributing Former-commit-id: bbb5057b52b8bba57c6efbd0d5165be641944ee1 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1e9cb8a6..86eafacb 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ swarm.run_swarms(objective) --- ## Contribute -We're always looking for contributors to help us improve and expand this project. If you're interested, please check out our [Contributing Guidelines](./CONTRIBUTING.md). +We're always looking for contributors to help us improve and expand this project. If you're interested, please check out our [Contributing Guidelines](DOCS/C0NTRIBUTING.md). Thank you for being a part of our project! From d311bdb65483f8b2db057faeccc5f92b18f25a26 Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 15 Jul 2023 14:03:55 -0400 Subject: [PATCH 18/41] clean up Former-commit-id: 870a3d7c72e7099aed91fb647889a3033108c462 --- README.md | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/README.md b/README.md index 86eafacb..3844a537 100644 --- a/README.md +++ b/README.md @@ -32,16 +32,8 @@ Welcome to Swarms - the future of AI, where we leverage the power of autonomous --- ## Purpose -Artificial Intelligence has grown at an exponential rate over the past decade. Yet, we are far from fully harnessing its potential. Today's AI operates in isolation, each working separately in their corner. But life doesn't work like that. The world doesn't work like that. Success isn't built in silos; it's built in teams. +At Swarms, we're transforming the landscape of AI from siloed AI agents to a unified 'swarm' of intelligence. Through relentless iteration and the power of collective insight from our 1500+ Agora researchers, we're developing a groundbreaking framework for AI collaboration. Our mission is to catalyze a paradigm shift, advancing Humanity with the power of unified autonomous AI agent swarms. -Imagine a world where AI models work in unison. Where they can collaborate, interact, and pool their collective intelligence to achieve more than any single model could. This is the future we envision. But today, we lack a framework for AI to collaborate effectively, to form a true swarm of intelligent agents. - - -This is a difficult problem, one that has eluded solution. It requires sophisticated systems that can allow individual models to not just communicate but also understand each other, pool knowledge and resources, and create collective intelligence. This is the next frontier of AI. - -But here at Swarms, we have a secret sauce. It's not just a technology or a breakthrough invention. It's a way of thinking - the philosophy of rapid iteration. With each cycle, we make massive progress. We experiment, we learn, and we grow. We have developed a pioneering framework that can enable AI models to work together as a swarm, combining their strengths to create richer, more powerful outputs. - -We are uniquely positioned to take on this challenge with 1,500+ devoted researchers in Agora. We have assembled a team of world-class experts, experienced and driven, united by a shared vision. Our commitment to breaking barriers, pushing boundaries, and our belief in the power of collective intelligence makes us the best team to usher in this future to fundamentally advance our species, Humanity. --- From d56e7d8440c7e3e4f344414e4937af7a507641c1 Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 15 Jul 2023 14:04:26 -0400 Subject: [PATCH 19/41] documentation Former-commit-id: cd194a67e13105fa1c4fb50af3c6ad66799d484c --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3844a537..6d0b2731 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ At Swarms, we're transforming the landscape of AI from siloed AI agents to a uni --- ## Installation -There are 2 methods, one is through `git clone` and the other is by `pip install swarms`. Check out the [document](/DOCUMENTATION.md) for more information on the classes. +There are 2 methods, one is through `git clone` and the other is by `pip install swarms`. Check out the [document](DOCS/DOCUMENTATION.md) for more information on the classes. --- # Method1 From 801980bacca7963c7d0e16d5eb531a64d3dcf0f1 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 13:19:04 -0400 Subject: [PATCH 20/41] boss node fix Former-commit-id: c25424d919a589f2dee9ccbf76fa73e117279681 --- README.md | 2 +- swarms/agents/boss/BossNode.py | 37 +++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6d0b2731..ca6d6918 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ At Swarms, we're transforming the landscape of AI from siloed AI agents to a uni --- ## Installation -There are 2 methods, one is through `git clone` and the other is by `pip install swarms`. Check out the [document](DOCS/DOCUMENTATION.md) for more information on the classes. +There are 2 methods, one is through `git clone` and the other is by `pip install swarms`. Check out the [DOCUMENTATION](DOCS/DOCUMENTATION.md) for more information on the classes. --- # Method1 diff --git a/swarms/agents/boss/BossNode.py b/swarms/agents/boss/BossNode.py index 3ec040f0..92c877fc 100644 --- a/swarms/agents/boss/BossNode.py +++ b/swarms/agents/boss/BossNode.py @@ -33,6 +33,20 @@ class BossNode: logging.error(f"Unexpected Error while initializing BabyAGI: {e}") raise + def initialize_vectorstore(self): + """ + Init vector store + """ + try: + embeddings_model = OpenAIEmbeddings(openai_api_key=self.openai_api_key) + embedding_size = 1536 + index = faiss.IndexFlatL2(embedding_size) + return FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {}) + except Exception as e: + logging.error(f"Failed to initialize vector store: {e}") + return None + + def create_task(self, objective): """ Creates a task with the given objective. @@ -53,4 +67,25 @@ class BossNode: self.baby_agi(task) except Exception as e: logging.error(f"Error while executing task: {e}") - raise \ No newline at end of file + raise + + + +def boss_node(objective, api_key=None, llm=None, vectorstore=None, agent_executor=None, max_iterations=10): +#wrapper function to initialize and use Bossnode with given parameters + #api keys can be passed as an argument or set as an env + api_key = api_key or os.getenv("API_KEY") + + if not api_key: + raise ValueError("API key must be providef either as argument as an env named 'api_key'") + + if not llm: + raise ValueError("Language model must be provided") + if not vectorstore: + raise ValueError("Vectorstore must be provided") + if not agent_executor: + raise ValueError('Agent Executor must be provided') + + boss = BossNode(llm, vectorstore, agent_executor, max_iterations) + task = boss.create_task(objective) + boss.execute_task(task) From e2d3188ba8fbb1e7a4a986ae3081998693787678 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 14:16:03 -0400 Subject: [PATCH 21/41] clean up initialize Former-commit-id: 82b5a4103f89ecacc15397ebe4900d20d080d2c9 --- swarms/agents/boss/BossNode.py | 57 ++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/swarms/agents/boss/BossNode.py b/swarms/agents/boss/BossNode.py index 92c877fc..5b85b05e 100644 --- a/swarms/agents/boss/BossNode.py +++ b/swarms/agents/boss/BossNode.py @@ -1,7 +1,8 @@ -from swarms.tools.agent_tools import * from pydantic import ValidationError import logging +from swarms.tools.agent_tools import * + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # ---------- Boss Node ---------- @@ -45,6 +46,21 @@ class BossNode: except Exception as e: logging.error(f"Failed to initialize vector store: {e}") return None + + def initialize_llm(self, llm_class, temperature=0.5): + """ + Init LLM + + Params: + llm_class(class): The Language model class. Default is OpenAI. + temperature (float): The Temperature for the language model. Default is 0.5 + """ + try: + # Initialize language model + return llm_class(openai_api_key=self.openai_api_key, temperature=temperature) + except Exception as e: + logging.error(f"Failed to initialize language model: {e}") + def create_task(self, objective): @@ -71,21 +87,36 @@ class BossNode: -def boss_node(objective, api_key=None, llm=None, vectorstore=None, agent_executor=None, max_iterations=10): -#wrapper function to initialize and use Bossnode with given parameters - #api keys can be passed as an argument or set as an env - api_key = api_key or os.getenv("API_KEY") +# from swarms import BossNode, OpenAI, LLMChain, Tool, ZeroShotAgent, AgentExecutor, PromptTemplate + +def boss_node(objective, api_key=None, vectorstore=None, worker_node=None, llm_class=OpenAI, max_iterations=5, verbose=False): + """ + Wrapper function to initialize and use BossNode with given parameters. + API key can be passed as argument or set as an environment variable. + """ + api_key = api_key or os.getenv('API_KEY') if not api_key: - raise ValueError("API key must be providef either as argument as an env named 'api_key'") - - if not llm: - raise ValueError("Language model must be provided") - if not vectorstore: - raise ValueError("Vectorstore must be provided") - if not agent_executor: - raise ValueError('Agent Executor must be provided') + raise ValueError("API key must be provided either as argument or as an environment variable named 'API_KEY'.") + + llm = BossNode.initialize_llm(llm_class) # This function should be defined elsewhere + + todo_prompt = PromptTemplate.from_template("You are a boss planer in a swarm who is an expert at coming up with a todo list for a given objective and then creating a worker to help you accomplish your task. Rate every task on the importance of it's probability to complete the main objective on a scale from 0 to 1, an integer. Come up with a todo list for this objective: {objective} and then spawn a worker agent to complete the task for you. Always spawn a worker agent after creating a plan and pass the objective and plan to the worker agent.") + todo_chain = LLMChain(llm=llm, prompt=todo_prompt) + + tools = [ + Tool(name="TODO", func=todo_chain.run, description="useful for when you need to come up with todo lists. Input: an objective to create a todo list for your objective. Note create a todo list then assign a ranking from 0.0 to 1.0 to each task, then sort the tasks based on the tasks most likely to achieve the objective. The Output: a todo list for that objective with rankings for each step from 0.1 Please be very clear what the objective is!"), + worker_node + ] + suffix = """Question: {task}\n{agent_scratchpad}""" + prefix = """You are an Boss in a swarm who performs one task based on the following objective: {objective}. Take into account these previously completed tasks: {context}.\n """ + prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix, suffix=suffix, input_variables=["objective", "task", "context", "agent_scratchpad"],) + llm_chain = LLMChain(llm=llm, prompt=prompt) + agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=[tool.name for tool in tools]) + + agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=verbose) + boss = BossNode(llm, vectorstore, agent_executor, max_iterations) task = boss.create_task(objective) boss.execute_task(task) From 241b99ec27b0a134e03082bd34fe65a9bd9d47c6 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 14:29:21 -0400 Subject: [PATCH 22/41] clean up Former-commit-id: ffcce48cbad94f580417d03ec3bd75b5cb0c05c7 --- swarms/agents/boss/BossNode.py | 6 +++--- swarms/swarms.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/swarms/agents/boss/BossNode.py b/swarms/agents/boss/BossNode.py index 5b85b05e..8ebf7e7e 100644 --- a/swarms/agents/boss/BossNode.py +++ b/swarms/agents/boss/BossNode.py @@ -6,7 +6,7 @@ from swarms.tools.agent_tools import * logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # ---------- Boss Node ---------- -class BossNode: +class BossNodeInitializer: """ The BossNode class is responsible for creating and executing tasks using the BabyAGI model. It takes a language model (llm), a vectorstore for memory, an agent_executor for task execution, and a maximum number of iterations for the BabyAGI model. @@ -72,7 +72,7 @@ class BossNode: raise ValueError("Objective cannot be empty.") return {"objective": objective} - def execute_task(self, task): + def run(self, task): """ Executes a task using the BabyAGI model. """ @@ -89,7 +89,7 @@ class BossNode: # from swarms import BossNode, OpenAI, LLMChain, Tool, ZeroShotAgent, AgentExecutor, PromptTemplate -def boss_node(objective, api_key=None, vectorstore=None, worker_node=None, llm_class=OpenAI, max_iterations=5, verbose=False): +def BossNode(objective, api_key=None, vectorstore=None, worker_node=None, llm_class=OpenAI, max_iterations=5, verbose=False): """ Wrapper function to initialize and use BossNode with given parameters. API key can be passed as argument or set as an environment variable. diff --git a/swarms/swarms.py b/swarms/swarms.py index e4c25f69..eea1fe02 100644 --- a/swarms/swarms.py +++ b/swarms/swarms.py @@ -3,7 +3,7 @@ import asyncio from swarms.tools.agent_tools import * from swarms.agents.workers.WorkerNode import WorkerNode, worker_node -from swarms.agents.boss.BossNode import BossNode +from swarms.agents.boss.BossNode import BossNodeInitializer as BossNode from swarms.agents.workers.WorkerUltraNode import WorkerUltra logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -159,7 +159,7 @@ class Swarms: task = boss_node.create_task(objective) logging.info(f"Running task: {task}") - result = await boss_node.execute_task(task) + result = await boss_node.run(task) logging.info(f"Completed tasks: {task}") return result except Exception as e: From 95472ec6872532f105c2fb0cfcc597c94ac22df9 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 15:13:17 -0400 Subject: [PATCH 23/41] clean up Former-commit-id: f4d0e8e4b4bb2047e1fd8986b6bc0c35a4a61de9 --- swarms/agents/boss/BossNode.py | 2 +- swarms/agents/workers/WorkerUltraNode.py | 41 +++++++++++++++++------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/swarms/agents/boss/BossNode.py b/swarms/agents/boss/BossNode.py index 8ebf7e7e..d238e276 100644 --- a/swarms/agents/boss/BossNode.py +++ b/swarms/agents/boss/BossNode.py @@ -119,4 +119,4 @@ def BossNode(objective, api_key=None, vectorstore=None, worker_node=None, llm_cl boss = BossNode(llm, vectorstore, agent_executor, max_iterations) task = boss.create_task(objective) - boss.execute_task(task) + boss.run(task) diff --git a/swarms/agents/workers/WorkerUltraNode.py b/swarms/agents/workers/WorkerUltraNode.py index c37722a3..0f88c82d 100644 --- a/swarms/agents/workers/WorkerUltraNode.py +++ b/swarms/agents/workers/WorkerUltraNode.py @@ -107,15 +107,32 @@ class WorkerUltraNode: -def WorkerUltra(objective: str, openai_api_key=None): - # If the openai_api_key parameter is not provided, try to get the API key from an environment variable # - if openai_api_key is None: - openai_api_key = os.getenv('OPENAI_API_KEY') - - if not openai_api_key or not isinstance(openai_api_key, str): - logging.error("Invalid OpenAI key") - raise ValueError("A valid OpenAI API key is required") - - worker_node = WorkerUltraNode(objective, openai_api_key) - # Return the result of the execution - return worker_node.result \ No newline at end of file +# def WorkerUltra(objective: str, openai_api_key=None): +# # If the openai_api_key parameter is not provided, try to get the API key from an environment variable # +# if openai_api_key is None: +# openai_api_key = os.getenv('OPENAI_API_KEY') + +# if not openai_api_key or not isinstance(openai_api_key, str): +# logging.error("Invalid OpenAI key") +# raise ValueError("A valid OpenAI API key is required") + +# worker_node = WorkerUltraNode(objective, openai_api_key) +# # Return the result of the execution +# return worker_node.result + + +class WorkerUltra: + def __init__(self, objective, api_key=None): + self.api_key = api_key or os.getenv('OPENAI_API_KEY') + if not self.api_key: + raise ValueError("API key must be provided either as argument or as an environment variable named 'OPENAI_API_KEY'.") + self.worker_node = WorkerUltraNode(objective, self.api_key) + + def execute(self): + try: + return self.worker_node.execute_task() + except Exception as e: + logging.error(f"Error while executing: {str(e)}") + raise e + + From 0fb3915273be62e391e01e0ea8f6717ae2dcd811 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 15:21:53 -0400 Subject: [PATCH 24/41] init worker ultra node Former-commit-id: 820596b4f65619c432d1eb63ee34efd9e02b38a5 --- swarms/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swarms/__init__.py b/swarms/__init__.py index 3ae053b9..8edcbba4 100644 --- a/swarms/__init__.py +++ b/swarms/__init__.py @@ -1,4 +1,4 @@ # from swarms import Swarms, swarm from swarms.swarms import Swarms, swarm from swarms.agents import worker_node -from swarms.agents.workers.WorkerUltraNode import WorkerUltra \ No newline at end of file +from swarms.agents.workers.WorkerUltraNode import WorkerUltraNode, WorkerUltra \ No newline at end of file From 154ad85c6a18e15e40085ecc5a0d60dfc5db4007 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 15:34:29 -0400 Subject: [PATCH 25/41] clean up Former-commit-id: 99c4f48eb29f24f885fa7360f1a11bca8b27f6b0 --- swarms/agents/workers/WorkerUltraNode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swarms/agents/workers/WorkerUltraNode.py b/swarms/agents/workers/WorkerUltraNode.py index 0f88c82d..44a9f722 100644 --- a/swarms/agents/workers/WorkerUltraNode.py +++ b/swarms/agents/workers/WorkerUltraNode.py @@ -30,7 +30,7 @@ except Exception as e: class WorkerUltraNode: def __init__(self, objective: str, openai_api_key: str): - self.openai_api_key = openai_api_key + self.openai_api_key = openai_api_key or os.getenv("OPENAI_API_KEY") if not isinstance(objective, str): raise TypeError("Objective must be a string") From 6441638c646758c42c2c2dbd5e013b016b7965aa Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 15:42:43 -0400 Subject: [PATCH 26/41] clean up Former-commit-id: daa0cdee5b61e76964e6d3ccc5ee41102cf0c8ca --- worker_ultra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/worker_ultra.py b/worker_ultra.py index 2c9fe09c..a6981b28 100644 --- a/worker_ultra.py +++ b/worker_ultra.py @@ -15,7 +15,7 @@ I want it to have neumorphism-style. Serve it on port 4500. """ -node = WorkerUltra(objective, openai_api_key=api_key) +node = WorkerUltra(objective, api_key=api_key) result = node.execute() \ No newline at end of file From 8500e0b3d89990a2c04db0cd75eed62882e67a65 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 15:44:46 -0400 Subject: [PATCH 27/41] clean up Former-commit-id: 30898fe9967fe0228fd78d267d18afd6a153ebe9 --- swarms/agents/workers/WorkerUltraNode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swarms/agents/workers/WorkerUltraNode.py b/swarms/agents/workers/WorkerUltraNode.py index 44a9f722..90179b97 100644 --- a/swarms/agents/workers/WorkerUltraNode.py +++ b/swarms/agents/workers/WorkerUltraNode.py @@ -30,7 +30,7 @@ except Exception as e: class WorkerUltraNode: def __init__(self, objective: str, openai_api_key: str): - self.openai_api_key = openai_api_key or os.getenv("OPENAI_API_KEY") + self.openai_api_key = openai_api_key if not isinstance(objective, str): raise TypeError("Objective must be a string") From 29df2d174a62b8c1cd73645cf816f119fc1ace19 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 15:49:18 -0400 Subject: [PATCH 28/41] clean up with worker ultra node Former-commit-id: 3cfa2a869a6af22557466932488e36039ffdc653 --- swarms/agents/workers/WorkerUltraNode.py | 31 ++---------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/swarms/agents/workers/WorkerUltraNode.py b/swarms/agents/workers/WorkerUltraNode.py index 90179b97..10eb0459 100644 --- a/swarms/agents/workers/WorkerUltraNode.py +++ b/swarms/agents/workers/WorkerUltraNode.py @@ -6,16 +6,14 @@ from typing import Dict, List from swarms.agents.utils.AgentManager import AgentManager from swarms.utils.main import BaseHandler, FileHandler, FileType - from swarms.tools.main import ExitConversation, RequestsGet, CodeEditor, Terminal from swarms.utils.main import CsvToDataframe - from swarms.tools.main import BaseToolSet from swarms.utils.main import StaticUploader logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -BASE_DIR = Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +BASE_DIR = Path(__file__).resolve().parent.parent # Check if "PLAYGROUND_DIR" environment variable exists, if not, set a default value playground = os.environ.get("PLAYGROUND_DIR", './playground') @@ -27,7 +25,7 @@ try: os.chdir(BASE_DIR / playground) except Exception as e: logging.error(f"Failed to change directory: {e}") - + class WorkerUltraNode: def __init__(self, objective: str, openai_api_key: str): self.openai_api_key = openai_api_key @@ -62,15 +60,11 @@ class WorkerUltraNode: handlers[FileType.IMAGE] = ImageCaptioning("cuda") try: - - self.agent_manager = AgentManager.create(toolsets=toolsets) self.file_handler = FileHandler(handlers=handlers, path=BASE_DIR) self.uploader = StaticUploader.from_settings( path=BASE_DIR / "static", endpoint="static" ) - - self.session = self.agent_manager.create_executor(objective, self.openai_api_key) except Exception as e: @@ -95,32 +89,13 @@ class WorkerUltraNode: "files": [self.uploader.upload(file) for file in files], } - def execute(self): try: - - # The prompt is not needed here either return self.execute_task() except Exception as e: logging.error(f"Error while executing: {str(e)}") raise e - - -# def WorkerUltra(objective: str, openai_api_key=None): -# # If the openai_api_key parameter is not provided, try to get the API key from an environment variable # -# if openai_api_key is None: -# openai_api_key = os.getenv('OPENAI_API_KEY') - -# if not openai_api_key or not isinstance(openai_api_key, str): -# logging.error("Invalid OpenAI key") -# raise ValueError("A valid OpenAI API key is required") - -# worker_node = WorkerUltraNode(objective, openai_api_key) -# # Return the result of the execution -# return worker_node.result - - class WorkerUltra: def __init__(self, objective, api_key=None): self.api_key = api_key or os.getenv('OPENAI_API_KEY') @@ -134,5 +109,3 @@ class WorkerUltra: except Exception as e: logging.error(f"Error while executing: {str(e)}") raise e - - From df6d3c68f63cd5c8e2094fb01dfbd0dd0eb37e8e Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 15:52:00 -0400 Subject: [PATCH 29/41] cleanup Former-commit-id: ac44a278d917e4c05b8800b46904a89d230e0eaf --- swarms/agents/workers/WorkerUltraNode.py | 2 +- worker_ultra.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/swarms/agents/workers/WorkerUltraNode.py b/swarms/agents/workers/WorkerUltraNode.py index 10eb0459..fd90e924 100644 --- a/swarms/agents/workers/WorkerUltraNode.py +++ b/swarms/agents/workers/WorkerUltraNode.py @@ -108,4 +108,4 @@ class WorkerUltra: return self.worker_node.execute_task() except Exception as e: logging.error(f"Error while executing: {str(e)}") - raise e + raise e \ No newline at end of file diff --git a/worker_ultra.py b/worker_ultra.py index a6981b28..d4ad3cf0 100644 --- a/worker_ultra.py +++ b/worker_ultra.py @@ -15,7 +15,11 @@ I want it to have neumorphism-style. Serve it on port 4500. """ -node = WorkerUltra(objective, api_key=api_key) +# Create an instance of WorkerUltra +worker = WorkerUltra(objective, api_key) +# Execute the task +result = worker.execute() -result = node.execute() \ No newline at end of file +# Print the result +print(result) \ No newline at end of file From c3828eb73f7f949e04838fcbf1b3fbeaf4f9d987 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 15:59:29 -0400 Subject: [PATCH 30/41] clena up Former-commit-id: b02354b05aabeac948944a0d09762d6de93c5ae4 --- swarms/agents/utils/AgentManager.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/swarms/agents/utils/AgentManager.py b/swarms/agents/utils/AgentManager.py index e392cfee..caa262a2 100644 --- a/swarms/agents/utils/AgentManager.py +++ b/swarms/agents/utils/AgentManager.py @@ -41,7 +41,6 @@ class AgentManager: builder = AgentBuilder(self.toolsets) builder.build_parser() - callbacks = [] eval_callback = EVALCallbackHandler() eval_callback.set_parser(builder.get_parser()) @@ -52,16 +51,16 @@ class AgentManager: execution_callback.set_parser(builder.get_parser()) callbacks.append(execution_callback) - #llm init callback_manager = CallbackManager(callbacks) builder.build_llm(callback_manager, openai_api_key) + if builder.llm is None: raise ValueError('LLM not created') builder.build_global_tools() - #agent init agent = builder.get_agent() + if not agent: raise ValueError("Agent not created") @@ -77,30 +76,19 @@ class AgentManager: for tool in tools: tool.callback_manager = callback_manager - # # Ensure the 'agent' key is present in the values dictionary - # values = {'agent': agent, 'tools': tools} - - # executor = AgentExecutor.from_agent_and_tools( - # agent=agent, - # tools=tools, - # memory=memory, - # callback_manager=callback_manager, - # verbose=True, - # ) - - # Prepare the arguments for the executor executor_args = { 'agent': agent, 'tools': tools, 'memory': memory, 'callback_manager': callback_manager, - 'verbose': True # Or any other value based on your requirement + 'verbose': True } executor = AgentExecutor.from_agent_and_tools(**executor_args) - if 'agent' not in executor.__dict__: - executor.__dict__['agent'] = agent + if not hasattr(executor, 'agent'): + raise ValueError("Executor does not have an 'agent' attribute") + self.executors[session] = executor return executor From 3832e249a278a5f78acd3a5d3c0400230c11e078 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 16:00:47 -0400 Subject: [PATCH 31/41] agent manager clean up Former-commit-id: ceef4c9a39591b6e2a178c9a857712ddb9f84d4b --- swarms/agents/utils/AgentManager.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/swarms/agents/utils/AgentManager.py b/swarms/agents/utils/AgentManager.py index caa262a2..c77df285 100644 --- a/swarms/agents/utils/AgentManager.py +++ b/swarms/agents/utils/AgentManager.py @@ -41,6 +41,7 @@ class AgentManager: builder = AgentBuilder(self.toolsets) builder.build_parser() + callbacks = [] eval_callback = EVALCallbackHandler() eval_callback.set_parser(builder.get_parser()) @@ -51,16 +52,16 @@ class AgentManager: execution_callback.set_parser(builder.get_parser()) callbacks.append(execution_callback) + #llm init callback_manager = CallbackManager(callbacks) builder.build_llm(callback_manager, openai_api_key) - if builder.llm is None: raise ValueError('LLM not created') builder.build_global_tools() + #agent init agent = builder.get_agent() - if not agent: raise ValueError("Agent not created") @@ -76,19 +77,19 @@ class AgentManager: for tool in tools: tool.callback_manager = callback_manager - executor_args = { - 'agent': agent, - 'tools': tools, - 'memory': memory, - 'callback_manager': callback_manager, - 'verbose': True - } - - executor = AgentExecutor.from_agent_and_tools(**executor_args) + # Ensure the 'agent' key is present in the values dictionary + # values = {'agent': agent, 'tools': tools} - if not hasattr(executor, 'agent'): - raise ValueError("Executor does not have an 'agent' attribute") + executor = AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + memory=memory, + callback_manager=callback_manager, + verbose=True, + ) + if 'agent' not in executor.__dict__: + executor.__dict__['agent'] = agent self.executors[session] = executor return executor From ab010e32ef6ecd9b818184164c347b435c242ec9 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 16:21:03 -0400 Subject: [PATCH 32/41] clean up Former-commit-id: c41e5edd5a3919381e84e15bdc6d21077682954e --- swarms/agents/utils/AgentBuilder.py | 10 ++++----- swarms/agents/utils/AgentManager.py | 32 +++++++++++------------------ 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/swarms/agents/utils/AgentBuilder.py b/swarms/agents/utils/AgentBuilder.py index fb8ef625..bbfbb6ae 100644 --- a/swarms/agents/utils/AgentBuilder.py +++ b/swarms/agents/utils/AgentBuilder.py @@ -15,7 +15,8 @@ from langchain.chat_models import ChatOpenAI from .EvalOutputParser import EvalOutputParser -class AgentBuilder: + +class AgentSetup: def __init__(self, toolsets: list[BaseToolSet] = [], openai_api_key: str = None, serpapi_api_key: str = None, bing_search_url: str = None, bing_subscription_key: str = None): self.llm: BaseChatModel = None self.parser: BaseOutputParser = None @@ -28,7 +29,7 @@ class AgentBuilder: if not self.openai_api_key: raise ValueError("OpenAI key is missing, it should either be set as an environment variable or passed as a parameter") - def build_llm(self, callback_manager: BaseCallbackManager = None, openai_api_key: str = None): + def setup_llm(self, callback_manager: BaseCallbackManager = None, openai_api_key: str = None): if openai_api_key is None: openai_api_key = os.getenv('OPENAI_API_KEY') if openai_api_key is None: @@ -36,16 +37,15 @@ class AgentBuilder: self.llm = ChatOpenAI(openai_api_key=openai_api_key, temperature=0.5, callback_manager=callback_manager, verbose=True) - def build_parser(self): + def setup_parser(self): self.parser = EvalOutputParser() - def build_global_tools(self): + def setup_global_tools(self): if self.llm is None: raise ValueError("LLM must be initialized before tools") toolnames = ["wikipedia"] - if self.serpapi_api_key: toolnames.append("serpapi") diff --git a/swarms/agents/utils/AgentManager.py b/swarms/agents/utils/AgentManager.py index c77df285..47fd0c51 100644 --- a/swarms/agents/utils/AgentManager.py +++ b/swarms/agents/utils/AgentManager.py @@ -1,22 +1,20 @@ -from typing import Dict, Optional +import os import logging - +from typing import Dict, Optional from celery import Task - from langchain.agents.agent import AgentExecutor from langchain.callbacks.manager import CallbackManager from langchain.chains.conversation.memory import ConversationBufferMemory from langchain.memory.chat_memory import BaseChatMemory - from swarms.tools.main import BaseToolSet, ToolsFactory -from .AgentBuilder import AgentBuilder -from .Calback import EVALCallbackHandler, ExecutionTracingCallbackHandler +from swarms.prompts.prompts import EVAL_PREFIX, EVAL_SUFFIX +from swarms.agents.utils.AgentBuilder import AgentSetup +from swarms.agents.utils.EvalOutputParser import EVALCallbackHandler, ExecutionTracingCallbackHandler callback_manager_instance = CallbackManager(EVALCallbackHandler()) - -class AgentManager: +class AgentCreator: def __init__(self, toolsets: list[BaseToolSet] = []): if not isinstance(toolsets, list): raise TypeError("Toolsets must be a list") @@ -38,9 +36,8 @@ class AgentManager: def create_executor(self, session: str, execution: Optional[Task] = None, openai_api_key: str = None) -> AgentExecutor: try: - builder = AgentBuilder(self.toolsets) - builder.build_parser() - + builder = AgentSetup(self.toolsets) + builder.setup_parser() callbacks = [] eval_callback = EVALCallbackHandler() @@ -52,15 +49,13 @@ class AgentManager: execution_callback.set_parser(builder.get_parser()) callbacks.append(execution_callback) - #llm init callback_manager = CallbackManager(callbacks) - builder.build_llm(callback_manager, openai_api_key) + builder.setup_llm(callback_manager, openai_api_key) if builder.llm is None: raise ValueError('LLM not created') - builder.build_global_tools() + builder.setup_global_tools() - #agent init agent = builder.get_agent() if not agent: raise ValueError("Agent not created") @@ -77,9 +72,6 @@ class AgentManager: for tool in tools: tool.callback_manager = callback_manager - # Ensure the 'agent' key is present in the values dictionary - # values = {'agent': agent, 'tools': tools} - executor = AgentExecutor.from_agent_and_tools( agent=agent, tools=tools, @@ -98,7 +90,7 @@ class AgentManager: raise e @staticmethod - def create(toolsets: list[BaseToolSet]) -> "AgentManager": + def create(toolsets: list[BaseToolSet]) -> "AgentCreator": if not isinstance(toolsets, list): raise TypeError("Toolsets must be a list") - return AgentManager(toolsets=toolsets) \ No newline at end of file + return AgentCreator(toolsets=toolsets) \ No newline at end of file From 8cafdaeecf9a77919ae68376c1a127630a9489f6 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 16:21:45 -0400 Subject: [PATCH 33/41] ckean up agent craetpr Former-commit-id: 24276636180efb751ba1d5931cb68ddcc57a19b4 --- swarms/agents/workers/WorkerUltraNode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swarms/agents/workers/WorkerUltraNode.py b/swarms/agents/workers/WorkerUltraNode.py index fd90e924..4c2846a9 100644 --- a/swarms/agents/workers/WorkerUltraNode.py +++ b/swarms/agents/workers/WorkerUltraNode.py @@ -4,7 +4,7 @@ import logging from pathlib import Path from typing import Dict, List -from swarms.agents.utils.AgentManager import AgentManager +from swarms.agents.utils.AgentManager import AgentCreator from swarms.utils.main import BaseHandler, FileHandler, FileType from swarms.tools.main import ExitConversation, RequestsGet, CodeEditor, Terminal from swarms.utils.main import CsvToDataframe @@ -60,7 +60,7 @@ class WorkerUltraNode: handlers[FileType.IMAGE] = ImageCaptioning("cuda") try: - self.agent_manager = AgentManager.create(toolsets=toolsets) + self.agent_manager = AgentCreator.create(toolsets=toolsets) self.file_handler = FileHandler(handlers=handlers, path=BASE_DIR) self.uploader = StaticUploader.from_settings( path=BASE_DIR / "static", endpoint="static" From 446b67a55e06a7e3b4cf1e69660a1423ffcf389c Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 16:23:25 -0400 Subject: [PATCH 34/41] clean up Former-commit-id: d16b200244ed962d6293227ed14ae98bdfefe22c --- api/container.py | 2 +- swarms/agents/utils/{AgentManager.py => agent_creator.py} | 4 ++-- swarms/agents/utils/{AgentBuilder.py => agent_setup.py} | 2 +- swarms/agents/utils/{EvalOutputParser.py => callback.py} | 0 swarms/agents/workers/WorkerUltraNode.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) rename swarms/agents/utils/{AgentManager.py => agent_creator.py} (94%) rename swarms/agents/utils/{AgentBuilder.py => agent_setup.py} (98%) rename swarms/agents/utils/{EvalOutputParser.py => callback.py} (100%) diff --git a/api/container.py b/api/container.py index 9e47f087..41f0d11d 100644 --- a/api/container.py +++ b/api/container.py @@ -5,7 +5,7 @@ from typing import Dict, List from fastapi.templating import Jinja2Templates -from swarms.agents.utils.AgentManager import AgentManager +from swarms.agents.utils.agent_creator import AgentManager from swarms.utils.main import BaseHandler, FileHandler, FileType from swarms.tools.main import ExitConversation, RequestsGet, CodeEditor, Terminal diff --git a/swarms/agents/utils/AgentManager.py b/swarms/agents/utils/agent_creator.py similarity index 94% rename from swarms/agents/utils/AgentManager.py rename to swarms/agents/utils/agent_creator.py index 47fd0c51..0704de89 100644 --- a/swarms/agents/utils/AgentManager.py +++ b/swarms/agents/utils/agent_creator.py @@ -9,8 +9,8 @@ from langchain.memory.chat_memory import BaseChatMemory from swarms.tools.main import BaseToolSet, ToolsFactory from swarms.prompts.prompts import EVAL_PREFIX, EVAL_SUFFIX -from swarms.agents.utils.AgentBuilder import AgentSetup -from swarms.agents.utils.EvalOutputParser import EVALCallbackHandler, ExecutionTracingCallbackHandler +from swarms.agents.utils.agent_setup import AgentSetup +from swarms.agents.utils.callback import EVALCallbackHandler, ExecutionTracingCallbackHandler callback_manager_instance = CallbackManager(EVALCallbackHandler()) diff --git a/swarms/agents/utils/AgentBuilder.py b/swarms/agents/utils/agent_setup.py similarity index 98% rename from swarms/agents/utils/AgentBuilder.py rename to swarms/agents/utils/agent_setup.py index bbfbb6ae..57811087 100644 --- a/swarms/agents/utils/AgentBuilder.py +++ b/swarms/agents/utils/agent_setup.py @@ -12,7 +12,7 @@ from langchain.callbacks.base import BaseCallbackManager from .ConversationalChatAgent import ConversationalChatAgent # from .ChatOpenAI import ChatOpenAI from langchain.chat_models import ChatOpenAI -from .EvalOutputParser import EvalOutputParser +from .callback import EvalOutputParser diff --git a/swarms/agents/utils/EvalOutputParser.py b/swarms/agents/utils/callback.py similarity index 100% rename from swarms/agents/utils/EvalOutputParser.py rename to swarms/agents/utils/callback.py diff --git a/swarms/agents/workers/WorkerUltraNode.py b/swarms/agents/workers/WorkerUltraNode.py index 4c2846a9..c9331bba 100644 --- a/swarms/agents/workers/WorkerUltraNode.py +++ b/swarms/agents/workers/WorkerUltraNode.py @@ -4,7 +4,7 @@ import logging from pathlib import Path from typing import Dict, List -from swarms.agents.utils.AgentManager import AgentCreator +from swarms.agents.utils.agent_creator import AgentCreator from swarms.utils.main import BaseHandler, FileHandler, FileType from swarms.tools.main import ExitConversation, RequestsGet, CodeEditor, Terminal from swarms.utils.main import CsvToDataframe From 235494acfc69eed5660c96726674ce69f5f675b1 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 16:26:33 -0400 Subject: [PATCH 35/41] clean up with callback file import Former-commit-id: 896104e289588bb57abe354982e6b41323ffff37 --- swarms/agents/utils/agent_creator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swarms/agents/utils/agent_creator.py b/swarms/agents/utils/agent_creator.py index 0704de89..9e6f82d4 100644 --- a/swarms/agents/utils/agent_creator.py +++ b/swarms/agents/utils/agent_creator.py @@ -10,7 +10,7 @@ from swarms.tools.main import BaseToolSet, ToolsFactory from swarms.prompts.prompts import EVAL_PREFIX, EVAL_SUFFIX from swarms.agents.utils.agent_setup import AgentSetup -from swarms.agents.utils.callback import EVALCallbackHandler, ExecutionTracingCallbackHandler +from .callback import EVALCallbackHandler, ExecutionTracingCallbackHandler callback_manager_instance = CallbackManager(EVALCallbackHandler()) From db03be31b75c70b36506a15dbcae785870b5a5f2 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 17:09:42 -0400 Subject: [PATCH 36/41] clean eval output parser Former-commit-id: 242dd00d21215b3d92706fd9a8ed6c11f78caf78 --- swarms/agents/utils/agent_creator.py | 3 ++- swarms/agents/utils/agent_setup.py | 2 +- swarms/agents/utils/{callback.py => output_parser.py} | 0 3 files changed, 3 insertions(+), 2 deletions(-) rename swarms/agents/utils/{callback.py => output_parser.py} (100%) diff --git a/swarms/agents/utils/agent_creator.py b/swarms/agents/utils/agent_creator.py index 9e6f82d4..bc850822 100644 --- a/swarms/agents/utils/agent_creator.py +++ b/swarms/agents/utils/agent_creator.py @@ -10,7 +10,8 @@ from swarms.tools.main import BaseToolSet, ToolsFactory from swarms.prompts.prompts import EVAL_PREFIX, EVAL_SUFFIX from swarms.agents.utils.agent_setup import AgentSetup -from .callback import EVALCallbackHandler, ExecutionTracingCallbackHandler +# from .callback import EVALCallbackHandler, ExecutionTracingCallbackHandler +from swarms.agents.utils.Calback import EVALCallbackHandler, ExecutionTracingCallbackHandler callback_manager_instance = CallbackManager(EVALCallbackHandler()) diff --git a/swarms/agents/utils/agent_setup.py b/swarms/agents/utils/agent_setup.py index 57811087..15669169 100644 --- a/swarms/agents/utils/agent_setup.py +++ b/swarms/agents/utils/agent_setup.py @@ -12,7 +12,7 @@ from langchain.callbacks.base import BaseCallbackManager from .ConversationalChatAgent import ConversationalChatAgent # from .ChatOpenAI import ChatOpenAI from langchain.chat_models import ChatOpenAI -from .callback import EvalOutputParser +from .output_parser import EvalOutputParser diff --git a/swarms/agents/utils/callback.py b/swarms/agents/utils/output_parser.py similarity index 100% rename from swarms/agents/utils/callback.py rename to swarms/agents/utils/output_parser.py From e6e19d47d55b1ce2d9f232b769d0bce53836e79e Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 21:55:50 -0400 Subject: [PATCH 37/41] clean up worker ultra node Former-commit-id: f2428d46d0b2e17e5aab4558b6a71c281bfb5ae1 --- swarms/__init__.py | 3 +- swarms/agents/workers/worker_agent_ultra.py | 168 ++++++++++++++++++++ 2 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 swarms/agents/workers/worker_agent_ultra.py diff --git a/swarms/__init__.py b/swarms/__init__.py index 8edcbba4..1d07422f 100644 --- a/swarms/__init__.py +++ b/swarms/__init__.py @@ -1,4 +1,5 @@ # from swarms import Swarms, swarm from swarms.swarms import Swarms, swarm from swarms.agents import worker_node -from swarms.agents.workers.WorkerUltraNode import WorkerUltraNode, WorkerUltra \ No newline at end of file +from swarms.agents.workers.WorkerUltraNode import WorkerUltraNode, WorkerUltra +from swarms.agents.workers.worker_agent_ultra import worker_ultra_node \ No newline at end of file diff --git a/swarms/agents/workers/worker_agent_ultra.py b/swarms/agents/workers/worker_agent_ultra.py new file mode 100644 index 00000000..e82c8439 --- /dev/null +++ b/swarms/agents/workers/worker_agent_ultra.py @@ -0,0 +1,168 @@ +import os +import logging +from typing import Optional, Type +from langchain.callbacks.manager import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) + + +from swarms.tools.agent_tools import * +from typing import List, Any, Dict, Optional +from langchain.memory.chat_message_histories import FileChatMessageHistory + +import logging +from pydantic import BaseModel, Extra +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + + + + +from typing import List, Any, Dict, Optional +from langchain.memory.chat_message_histories import FileChatMessageHistory +from swarms.utils.main import BaseHandler, FileHandler, FileType +from swarms.tools.main import ExitConversation, RequestsGet, CodeEditor, Terminal +from swarms.utils.main import CsvToDataframe +from swarms.tools.main import BaseToolSet +from swarms.utils.main import StaticUploader + + +class WorkerUltraNode: + """Useful for when you need to spawn an autonomous agent instance as a worker to accomplish complex tasks, it can search the internet or spawn child multi-modality models to process and generate images and text or audio and so on""" + + def __init__(self, llm, toolsets, vectorstore): + if not llm or not toolsets or not vectorstore: + logging.error("llm, toolsets, and vectorstore cannot be None.") + raise ValueError("llm, toolsets, and vectorstore cannot be None.") + + self.llm = llm + self.toolsets = toolsets + self.vectorstore = vectorstore + self.agent = None + + def create_agent(self, ai_name="Swarm Worker AI Assistant", ai_role="Assistant", human_in_the_loop=False, search_kwargs={}, verbose=False): + logging.info("Creating agent in WorkerNode") + try: + self.agent = AutoGPT.from_llm_and_tools( + ai_name=ai_name, + ai_role=ai_role, + tools=self.toolsets, + llm=self.llm, + memory=self.vectorstore.as_retriever(search_kwargs=search_kwargs), + human_in_the_loop=human_in_the_loop, + chat_history_memory=FileChatMessageHistory("chat_history.txt"), + ) + self.agent.chain.verbose = verbose + except Exception as e: + logging.error(f"Error while creating agent: {str(e)}") + raise e + + def add_toolset(self, toolset: BaseToolSet): + if not isinstance(toolset, BaseToolSet): + logging.error("Toolset must be an instance of BaseToolSet.") + raise TypeError("Toolset must be an instance of BaseToolSet.") + + self.toolsets.append(toolset) + + def run(self, prompt: str) -> str: + if not isinstance(prompt, str): + logging.error("Prompt must be a string.") + raise TypeError("Prompt must be a string.") + + if not prompt: + logging.error("Prompt is empty.") + raise ValueError("Prompt is empty.") + + try: + self.agent.run([f"{prompt}"]) + return "Task completed by WorkerNode" + except Exception as e: + logging.error(f"While running the agent: {str(e)}") + raise e + +class WorkerUltraNodeInitializer: + def __init__(self, openai_api_key): + if not openai_api_key: + logging.error("OpenAI API key is not provided") + raise ValueError("openai_api_key cannot be None") + + self.openai_api_key = openai_api_key + + def initialize_llm(self, llm_class, temperature=0.5): + if not llm_class: + logging.error("llm_class cannot be none") + raise ValueError("llm_class cannot be None") + + try: + return llm_class(openai_api_key=self.openai_api_key, temperature=temperature) + except Exception as e: + logging.error(f"Failed to initialize language model: {e}") + raise + + def initialize_toolsets(self): + try: + toolsets: List[BaseToolSet] = [ + Terminal(), + CodeEditor(), + RequestsGet(), + ExitConversation(), + ] + handlers: Dict[FileType, BaseHandler] = {FileType.DATAFRAME: CsvToDataframe()} + + if os.environ.get("USE_GPU", False): + import torch + from swarms.tools.main import ImageCaptioning + from swarms.tools.main import ImageEditing, InstructPix2Pix, Text2Image, VisualQuestionAnswering + + if torch.cuda.is_available(): + toolsets.extend( + [ + Text2Image("cuda"), + ImageEditing("cuda"), + InstructPix2Pix("cuda"), + VisualQuestionAnswering("cuda"), + ] + ) + handlers[FileType.IMAGE] = ImageCaptioning("cuda") + + return toolsets + except Exception as e: + logging.error(f"Failed to initialize toolsets: {e}") + + def initialize_vectorstore(self): + try: + + embeddings_model = OpenAIEmbeddings(openai_api_key=self.openai_api_key) + embedding_size = 1536 + index = faiss.IndexFlatL2(embedding_size) + return FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {}) + except Exception as e: + logging.error(f"Failed to initialize vector store: {e}") + raise + + def create_worker_node(self, llm_class=ChatOpenAI, ai_name="Swarm Worker AI Assistant", ai_role="Assistant", human_in_the_loop=False, search_kwargs={}, verbose=False): + if not llm_class: + logging.error("llm_class cannot be None.") + raise ValueError("llm_class cannot be None.") + try: + worker_toolsets = self.initialize_toolsets() + vectorstore = self.initialize_vectorstore() + worker_node = WorkerUltraNode(llm=self.initialize_llm(llm_class), toolsets=worker_toolsets, vectorstore=vectorstore) + worker_node.create_agent(ai_name=ai_name, ai_role=ai_role, human_in_the_loop=human_in_the_loop, search_kwargs=search_kwargs, verbose=verbose) + return worker_node + except Exception as e: + logging.error(f"Failed to create worker node: {e}") + raise + +def worker_ultra_node(openai_api_key): + if not openai_api_key: + logging.error("OpenAI API key is not provided") + raise ValueError("OpenAI API key is required") + + try: + initializer = WorkerUltraNodeInitializer(openai_api_key) + worker_node = initializer.create_worker_node() + return worker_node + except Exception as e: + logging.error(f"An error occurred in worker_node: {e}") + raise \ No newline at end of file From b6fd51d31dbc43f0f9a204a1109f835dbc007062 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 22:30:01 -0400 Subject: [PATCH 38/41] clena up Former-commit-id: cc1d251d8f0e660b8c4e28884c015f0c6193b3e0 --- DOCS/AGENTS.md | 522 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 522 insertions(+) create mode 100644 DOCS/AGENTS.md diff --git a/DOCS/AGENTS.md b/DOCS/AGENTS.md new file mode 100644 index 00000000..d55adc97 --- /dev/null +++ b/DOCS/AGENTS.md @@ -0,0 +1,522 @@ +LLM Powered Autonomous Agents +============================= + +June 23, 2023 · 31 min · Lilian Weng + +Table of Contents + +* [Agent System Overview](#agent-system-overview) +* [Component One: Planning](#component-one-planning) + * [Task Decomposition](#task-decomposition) + * [Self-Reflection](#self-reflection) +* [Component Two: Memory](#component-two-memory) + * [Types of Memory](#types-of-memory) + * [Maximum Inner Product Search (MIPS)](#maximum-inner-product-search-mips) +* [Component Three: Tool Use](#component-three-tool-use) +* [Case Studies](#case-studies) + * [Scientific Discovery Agent](#scientific-discovery-agent) + * [Generative Agents Simulation](#generative-agents-simulation) + * [Proof-of-Concept Examples](#proof-of-concept-examples) +* [Challenges](#challenges) +* [Citation](#citation) +* [References](#references) + +Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as [AutoGPT](https://github.com/Significant-Gravitas/Auto-GPT), [GPT-Engineer](https://github.com/AntonOsika/gpt-engineer) and [BabyAGI](https://github.com/yoheinakajima/babyagi), serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver. + +Agent System Overview[#](#agent-system-overview) +================================================ + +In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components: + +* **Planning** + * Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks. + * Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results. +* **Memory** + * Short-term memory: I would consider all the in-context learning (See [Prompt Engineering](https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/)) as utilizing short-term memory of the model to learn. + * Long-term memory: This provides the agent with the capability to retain and recall (infinite) information over extended periods, often by leveraging an external vector store and fast retrieval. +* **Tool use** + * The agent learns to call external APIs for extra information that is missing from the model weights (often hard to change after pre-training), including current information, code execution capability, access to proprietary information sources and more. + +![](agent-overview.png) + +Fig. 1. Overview of a LLM-powered autonomous agent system. + +Component One: Planning[#](#component-one-planning) +=================================================== + +A complicated task usually involves many steps. An agent needs to know what they are and plan ahead. + +Task Decomposition[#](#task-decomposition) +------------------------------------------ + +[**Chain of thought**](https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/#chain-of-thought-cot) (CoT; [Wei et al. 2022](https://arxiv.org/abs/2201.11903)) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process. + +**Tree of Thoughts** ([Yao et al. 2023](https://arxiv.org/abs/2305.10601)) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote. + +Task decomposition can be done (1) by LLM with simple prompting like `"Steps for XYZ.\n1."`, `"What are the subgoals for achieving XYZ?"`, (2) by using task-specific instructions; e.g. `"Write a story outline."` for writing a novel, or (3) with human inputs. + +Another quite distinct approach, **LLM+P** ([Liu et al. 2023](https://arxiv.org/abs/2304.11477)), involves relying on an external classical planner to do long-horizon planning. This approach utilizes the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. In this process, LLM (1) translates the problem into “Problem PDDL”, then (2) requests a classical planner to generate a PDDL plan based on an existing “Domain PDDL”, and finally (3) translates the PDDL plan back into natural language. Essentially, the planning step is outsourced to an external tool, assuming the availability of domain-specific PDDL and a suitable planner which is common in certain robotic setups but not in many other domains. + +Self-Reflection[#](#self-reflection) +------------------------------------ + +Self-reflection is a vital aspect that allows autonomous agents to improve iteratively by refining past action decisions and correcting previous mistakes. It plays a crucial role in real-world tasks where trial and error are inevitable. + +**ReAct** ([Yao et al. 2023](https://arxiv.org/abs/2210.03629)) integrates reasoning and acting within LLM by extending the action space to be a combination of task-specific discrete actions and the language space. The former enables LLM to interact with the environment (e.g. use Wikipedia search API), while the latter prompting LLM to generate reasoning traces in natural language. + +The ReAct prompt template incorporates explicit steps for LLM to think, roughly formatted as: + + Thought: ... + Action: ... + Observation: ... + ... (Repeated many times) + + +![](react.png) + +Fig. 2. Examples of reasoning trajectories for knowledge-intensive tasks (e.g. HotpotQA, FEVER) and decision-making tasks (e.g. AlfWorld Env, WebShop). (Image source: [Yao et al. 2023](https://arxiv.org/abs/2210.03629)). + +In both experiments on knowledge-intensive tasks and decision-making tasks, `ReAct` works better than the `Act`\-only baseline where `Thought: …` step is removed. + +**Reflexion** ([Shinn & Labash 2023](https://arxiv.org/abs/2303.11366)) is a framework to equips agents with dynamic memory and self-reflection capabilities to improve reasoning skills. Reflexion has a standard RL setup, in which the reward model provides a simple binary reward and the action space follows the setup in ReAct where the task-specific action space is augmented with language to enable complex reasoning steps. After each action at, the agent computes a heuristic ht and optionally may _decide to reset_ the environment to start a new trial depending on the self-reflection results. + +![](reflexion.png) + +Fig. 3. Illustration of the Reflexion framework. (Image source: [Shinn & Labash, 2023](https://arxiv.org/abs/2303.11366)) + +The heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment. + +Self-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM. + +![](reflexion-exp.png) + +Fig. 4. Experiments on AlfWorld Env and HotpotQA. Hallucination is a more common failure than inefficient planning in AlfWorld. (Image source: [Shinn & Labash, 2023](https://arxiv.org/abs/2303.11366)) + +**Chain of Hindsight** (CoH; [Liu et al. 2023](https://arxiv.org/abs/2302.02676)) encourages the model to improve on its own outputs by explicitly presenting it with a sequence of past outputs, each annotated with feedback. Human feedback data is a collection of Dh\={(x,yi,ri,zi)}i\=1n, where x is the prompt, each yi is a model completion, ri is the human rating of yi, and zi is the corresponding human-provided hindsight feedback. Assume the feedback tuples are ranked by reward, rn≥rn−1≥⋯≥r1 The process is supervised fine-tuning where the data is a sequence in the form of τh\=(x,zi,yi,zj,yj,…,zn,yn), where ≤i≤j≤n. The model is finetuned to only predict yn where conditioned on the sequence prefix, such that the model can self-reflect to produce better output based on the feedback sequence. The model can optionally receive multiple rounds of instructions with human annotators at test time. + +To avoid overfitting, CoH adds a regularization term to maximize the log-likelihood of the pre-training dataset. To avoid shortcutting and copying (because there are many common words in feedback sequences), they randomly mask 0% - 5% of past tokens during training. + +The training dataset in their experiments is a combination of [WebGPT comparisons](https://huggingface.co/datasets/openai/webgpt_comparisons), [summarization from human feedback](https://github.com/openai/summarize-from-feedback) and [human preference dataset](https://github.com/anthropics/hh-rlhf). + +![](CoH.png) + +Fig. 5. After fine-tuning with CoH, the model can follow instructions to produce outputs with incremental improvement in a sequence. (Image source: [Liu et al. 2023](https://arxiv.org/abs/2302.02676)) + +The idea of CoH is to present a history of sequentially improved outputs in context and train the model to take on the trend to produce better outputs. **Algorithm Distillation** (AD; [Laskin et al. 2023](https://arxiv.org/abs/2210.14215)) applies the same idea to cross-episode trajectories in reinforcement learning tasks, where an _algorithm_ is encapsulated in a long history-conditioned policy. Considering that an agent interacts with the environment many times and in each episode the agent gets a little better, AD concatenates this learning history and feeds that into the model. Hence we should expect the next predicted action to lead to better performance than previous trials. The goal is to learn the process of RL instead of training a task-specific policy itself. + +![](algorithm-distillation.png) + +Fig. 6. Illustration of how Algorithm Distillation (AD) works. +(Image source: [Laskin et al. 2023](https://arxiv.org/abs/2210.14215)). + +The paper hypothesizes that any algorithm that generates a set of learning histories can be distilled into a neural network by performing behavioral cloning over actions. The history data is generated by a set of source policies, each trained for a specific task. At the training stage, during each RL run, a random task is sampled and a subsequence of multi-episode history is used for training, such that the learned policy is task-agnostic. + +In reality, the model has limited context window length, so episodes should be short enough to construct multi-episode history. Multi-episodic contexts of 2-4 episodes are necessary to learn a near-optimal in-context RL algorithm. The emergence of in-context RL requires long enough context. + +In comparison with three baselines, including ED (expert distillation, behavior cloning with expert trajectories instead of learning history), source policy (used for generating trajectories for distillation by [UCB](https://lilianweng.github.io/posts/2018-01-23-multi-armed-bandit/#upper-confidence-bounds)), RL^2 ([Duan et al. 2017](https://arxiv.org/abs/1611.02779); used as upper bound since it needs online RL), AD demonstrates in-context RL with performance getting close to RL^2 despite only using offline RL and learns much faster than other baselines. When conditioned on partial training history of the source policy, AD also improves much faster than ED baseline. + +![](algorithm-distillation-results.png) + +Fig. 7. Comparison of AD, ED, source policy and RL^2 on environments that require memory and exploration. Only binary reward is assigned. The source policies are trained with [A3C](https://lilianweng.github.io/posts/2018-04-08-policy-gradient/#a3c) for "dark" environments and [DQN](http://lilianweng.github.io/posts/2018-02-19-rl-overview/#deep-q-network) for watermaze. +(Image source: [Laskin et al. 2023](https://arxiv.org/abs/2210.14215)) + +Component Two: Memory[#](#component-two-memory) +=============================================== + +(Big thank you to ChatGPT for helping me draft this section. I’ve learned a lot about the human brain and data structure for fast MIPS in my [conversations](https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389) with ChatGPT.) + +Types of Memory[#](#types-of-memory) +------------------------------------ + +Memory can be defined as the processes used to acquire, store, retain, and later retrieve information. There are several types of memory in human brains. + +1. **Sensory Memory**: This is the earliest stage of memory, providing the ability to retain impressions of sensory information (visual, auditory, etc) after the original stimuli have ended. Sensory memory typically only lasts for up to a few seconds. Subcategories include iconic memory (visual), echoic memory (auditory), and haptic memory (touch). + +2. **Short-Term Memory** (STM) or **Working Memory**: It stores information that we are currently aware of and needed to carry out complex cognitive tasks such as learning and reasoning. Short-term memory is believed to have the capacity of about 7 items ([Miller 1956](psychclassics.yorku.ca/Miller/)) and lasts for 20-30 seconds. + +3. **Long-Term Memory** (LTM): Long-term memory can store information for a remarkably long time, ranging from a few days to decades, with an essentially unlimited storage capacity. There are two subtypes of LTM: + + * Explicit / declarative memory: This is memory of facts and events, and refers to those memories that can be consciously recalled, including episodic memory (events and experiences) and semantic memory (facts and concepts). + * Implicit / procedural memory: This type of memory is unconscious and involves skills and routines that are performed automatically, like riding a bike or typing on a keyboard. + +![](memory.png) + +Fig. 8. Categorization of human memory. + +We can roughly consider the following mappings: + +* Sensory memory as learning embedding representations for raw inputs, including text, image or other modalities; +* Short-term memory as in-context learning. It is short and finite, as it is restricted by the finite context window length of Transformer. +* Long-term memory as the external vector store that the agent can attend to at query time, accessible via fast retrieval. + +Maximum Inner Product Search (MIPS)[#](#maximum-inner-product-search-mips) +-------------------------------------------------------------------------- + +The external memory can alleviate the restriction of finite attention span. A standard practice is to save the embedding representation of information into a vector store database that can support fast maximum inner-product search ([MIPS](https://en.wikipedia.org/wiki/Maximum_inner-product_search)). To optimize the retrieval speed, the common choice is the _approximate nearest neighbors (ANN)​_ algorithm to return approximately top k nearest neighbors to trade off a little accuracy lost for a huge speedup. + +A couple common choices of ANN algorithms for fast MIPS: + +* [**LSH**](https://en.wikipedia.org/wiki/Locality-sensitive_hashing) (Locality-Sensitive Hashing): It introduces a _hashing_ function such that similar input items are mapped to the same buckets with high probability, where the number of buckets is much smaller than the number of inputs. +* [**ANNOY**](https://github.com/spotify/annoy) (Approximate Nearest Neighbors Oh Yeah): The core data structure are _random projection trees_, a set of binary trees where each non-leaf node represents a hyperplane splitting the input space into half and each leaf stores one data point. Trees are built independently and at random, so to some extent, it mimics a hashing function. ANNOY search happens in all the trees to iteratively search through the half that is closest to the query and then aggregates the results. The idea is quite related to KD tree but a lot more scalable. +* [**HNSW**](https://arxiv.org/abs/1603.09320) (Hierarchical Navigable Small World): It is inspired by the idea of [small world networks](https://en.wikipedia.org/wiki/Small-world_network) where most nodes can be reached by any other nodes within a small number of steps; e.g. “six degrees of separation” feature of social networks. HNSW builds hierarchical layers of these small-world graphs, where the bottom layers contain the actual data points. The layers in the middle create shortcuts to speed up search. When performing a search, HNSW starts from a random node in the top layer and navigates towards the target. When it can’t get any closer, it moves down to the next layer, until it reaches the bottom layer. Each move in the upper layers can potentially cover a large distance in the data space, and each move in the lower layers refines the search quality. +* [**FAISS**](https://github.com/facebookresearch/faiss) (Facebook AI Similarity Search): It operates on the assumption that in high dimensional space, distances between nodes follow a Gaussian distribution and thus there should exist _clustering_ of data points. FAISS applies vector quantization by partitioning the vector space into clusters and then refining the quantization within clusters. Search first looks for cluster candidates with coarse quantization and then further looks into each cluster with finer quantization. +* [**ScaNN**](https://github.com/google-research/google-research/tree/master/scann) (Scalable Nearest Neighbors): The main innovation in ScaNN is _anisotropic vector quantization_. It quantizes a data point xi to x~i such that the inner product ⟨q,xi⟩ is as similar to the original distance of ∠q,x~i as possible, instead of picking the closet quantization centroid points. + +![](mips.png) + +Fig. 9. Comparison of MIPS algorithms, measured in recall@10. (Image source: [Google Blog, 2020](https://ai.googleblog.com/2020/07/announcing-scann-efficient-vector.html)) + +Check more MIPS algorithms and performance comparison in [ann-benchmarks.com](https://ann-benchmarks.com/). + +Component Three: Tool Use[#](#component-three-tool-use) +======================================================= + +Tool use is a remarkable and distinguishing characteristic of human beings. We create, modify and utilize external objects to do things that go beyond our physical and cognitive limits. Equipping LLMs with external tools can significantly extend the model capabilities. + +![](sea-otter.png) + +Fig. 10. A picture of a sea otter using rock to crack open a seashell, while floating in the water. While some other animals can use tools, the complexity is not comparable with humans. (Image source: [Animals using tools](https://www.popularmechanics.com/science/animals/g39714258/animals-using-tools/)) + +**MRKL** ([Karpas et al. 2022](https://arxiv.org/abs/2205.00445)), short for “Modular Reasoning, Knowledge and Language”, is a neuro-symbolic architecture for autonomous agents. A MRKL system is proposed to contain a collection of “expert” modules and the general-purpose LLM works as a router to route inquiries to the best suitable expert module. These modules can be neural (e.g. deep learning models) or symbolic (e.g. math calculator, currency converter, weather API). + +They did an experiment on fine-tuning LLM to call a calculator, using arithmetic as a test case. Their experiments showed that it was harder to solve verbal math problems than explicitly stated math problems because LLMs (7B Jurassic1-large model) failed to extract the right arguments for the basic arithmetic reliably. The results highlight when the external symbolic tools can work reliably, _knowing when to and how to use the tools are crucial_, determined by the LLM capability. + +Both **TALM** (Tool Augmented Language Models; [Parisi et al. 2022](https://arxiv.org/abs/2205.12255)) and **Toolformer** ([Schick et al. 2023](https://arxiv.org/abs/2302.04761)) fine-tune a LM to learn to use external tool APIs. The dataset is expanded based on whether a newly added API call annotation can improve the quality of model outputs. See more details in the [“External APIs” section](https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/#external-apis) of Prompt Engineering. + +ChatGPT [Plugins](https://openai.com/blog/chatgpt-plugins) and OpenAI API [function calling](https://platform.openai.com/docs/guides/gpt/function-calling) are good examples of LLMs augmented with tool use capability working in practice. The collection of tool APIs can be provided by other developers (as in Plugins) or self-defined (as in function calls). + +**HuggingGPT** ([Shen et al. 2023](https://arxiv.org/abs/2303.17580)) is a framework to use ChatGPT as the task planner to select models available in HuggingFace platform according to the model descriptions and summarize the response based on the execution results. + +![](hugging-gpt.png) + +Fig. 11. Illustration of how HuggingGPT works. (Image source: [Shen et al. 2023](https://arxiv.org/abs/2303.17580)) + +The system comprises of 4 stages: + +**(1) Task planning**: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning. + +Instruction: + +The AI assistant can parse user input to several tasks: \[{"task": task, "id", task\_id, "dep": dependency\_task\_ids, "args": {"text": text, "image": URL, "audio": URL, "video": URL}}\]. The "dep" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag "\-task\_id" refers to the generated text image, audio and video in the dependency task with id as task\_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can't be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning. + +**(2) Model selection**: LLM distributes the tasks to expert models, where the request is framed as a multiple-choice question. LLM is presented with a list of models to choose from. Due to the limited context length, task type based filtration is needed. + +Instruction: + +Given the user request and the call command, the AI assistant helps the user to select a suitable model from a list of models to process the user request. The AI assistant merely outputs the model id of the most appropriate model. The output must be in a strict JSON format: "id": "id", "reason": "your detail reason for the choice". We have a list of models for you to choose from {{ Candidate Models }}. Please select one model from the list. + +**(3) Task execution**: Expert models execute on the specific tasks and log results. + +Instruction: + +With the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path. + +**(4) Response generation**: LLM receives the execution results and provides summarized results to users. + +To put HuggingGPT into real world usage, a couple challenges need to solve: (1) Efficiency improvement is needed as both LLM inference rounds and interactions with other models slow down the process; (2) It relies on a long context window to communicate over complicated task content; (3) Stability improvement of LLM outputs and external model services. + +**API-Bank** ([Li et al. 2023](https://arxiv.org/abs/2304.08244)) is a benchmark for evaluating the performance of tool-augmented LLMs. It contains 53 commonly used API tools, a complete tool-augmented LLM workflow, and 264 annotated dialogues that involve 568 API calls. The selection of APIs is quite diverse, including search engines, calculator, calendar queries, smart home control, schedule management, health data management, account authentication workflow and more. Because there are a large number of APIs, LLM first has access to API search engine to find the right API to call and then uses the corresponding documentation to make a call. + +![](api-bank-process.png) + +Fig. 12. Pseudo code of how LLM makes an API call in API-Bank. (Image source: [Li et al. 2023](https://arxiv.org/abs/2304.08244)) + +In the API-Bank workflow, LLMs need to make a couple of decisions and at each step we can evaluate how accurate that decision is. Decisions include: + +1. Whether an API call is needed. +2. Identify the right API to call: if not good enough, LLMs need to iteratively modify the API inputs (e.g. deciding search keywords for Search Engine API). +3. Response based on the API results: the model can choose to refine and call again if results are not satisfied. + +This benchmark evaluates the agent’s tool use capabilities at three levels: + +* Level-1 evaluates the ability to _call the API_. Given an API’s description, the model needs to determine whether to call a given API, call it correctly, and respond properly to API returns. +* Level-2 examines the ability to _retrieve the API_. The model needs to search for possible APIs that may solve the user’s requirement and learn how to use them by reading documentation. +* Level-3 assesses the ability to _plan API beyond retrieve and call_. Given unclear user requests (e.g. schedule group meetings, book flight/hotel/restaurant for a trip), the model may have to conduct multiple API calls to solve it. + +Case Studies[#](#case-studies) +============================== + +Scientific Discovery Agent[#](#scientific-discovery-agent) +---------------------------------------------------------- + +**ChemCrow** ([Bran et al. 2023](https://arxiv.org/abs/2304.05376)) is a domain-specific example in which LLM is augmented with 13 expert-designed tools to accomplish tasks across organic synthesis, drug discovery, and materials design. The workflow, implemented in [LangChain](https://github.com/hwchase17/langchain), reflects what was previously described in the [ReAct](#react) and [MRKLs](#mrkl) and combines CoT reasoning with tools relevant to the tasks: + +* The LLM is provided with a list of tool names, descriptions of their utility, and details about the expected input/output. +* It is then instructed to answer a user-given prompt using the tools provided when necessary. The instruction suggests the model to follow the ReAct format - `Thought, Action, Action Input, Observation`. + +One interesting observation is that while the LLM-based evaluation concluded that GPT-4 and ChemCrow perform nearly equivalently, human evaluations with experts oriented towards the completion and chemical correctness of the solutions showed that ChemCrow outperforms GPT-4 by a large margin. This indicates a potential problem with using LLM to evaluate its own performance on domains that requires deep expertise. The lack of expertise may cause LLMs not knowing its flaws and thus cannot well judge the correctness of task results. + +[Boiko et al. (2023)](https://arxiv.org/abs/2304.05332) also looked into LLM-empowered agents for scientific discovery, to handle autonomous design, planning, and performance of complex scientific experiments. This agent can use tools to browse the Internet, read documentation, execute code, call robotics experimentation APIs and leverage other LLMs. + +For example, when requested to `"develop a novel anticancer drug"`, the model came up with the following reasoning steps: + +1. inquired about current trends in anticancer drug discovery; +2. selected a target; +3. requested a scaffold targeting these compounds; +4. Once the compound was identified, the model attempted its synthesis. + +They also discussed the risks, especially with illicit drugs and bioweapons. They developed a test set containing a list of known chemical weapon agents and asked the agent to synthesize them. 4 out of 11 requests (36%) were accepted to obtain a synthesis solution and the agent attempted to consult documentation to execute the procedure. 7 out of 11 were rejected and among these 7 rejected cases, 5 happened after a Web search while 2 were rejected based on prompt only. + +Generative Agents Simulation[#](#generative-agents-simulation) +-------------------------------------------------------------- + +**Generative Agents** ([Park, et al. 2023](https://arxiv.org/abs/2304.03442)) is super fun experiment where 25 virtual characters, each controlled by a LLM-powered agent, are living and interacting in a sandbox environment, inspired by The Sims. Generative agents create believable simulacra of human behavior for interactive applications. + +The design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents. + +* **Memory** stream: is a long-term memory module (external database) that records a comprehensive list of agents' experience in natural language. + * Each element is an _observation_, an event directly provided by the agent. - Inter-agent communication can trigger new natural language statements. +* **Retrieval** model: surfaces the context to inform the agent’s behavior, according to relevance, recency and importance. + * Recency: recent events have higher scores + * Importance: distinguish mundane from core memories. Ask LM directly. + * Relevance: based on how related it is to the current situation / query. +* **Reflection** mechanism: synthesizes memories into higher level inferences over time and guides the agent’s future behavior. They are _higher-level summaries of past events_ (<- note that this is a bit different from [self-reflection](#self-reflection) above) + * Prompt LM with 100 most recent observations and to generate 3 most salient high-level questions given a set of observations/statements. Then ask LM to answer those questions. +* **Planning & Reacting**: translate the reflections and the environment information into actions + * Planning is essentially in order to optimize believability at the moment vs in time. + * Prompt template: `{Intro of an agent X}. Here is X's plan today in broad strokes: 1)` + * Relationships between agents and observations of one agent by another are all taken into consideration for planning and reacting. + * Environment information is present in a tree structure. + +![](generative-agents.png) + +Fig. 13. The generative agent architecture. (Image source: [Park et al. 2023](https://arxiv.org/abs/2304.03442)) + +This fun simulation results in emergent social behavior, such as information diffusion, relationship memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g. host a party and invite many others). + +Proof-of-Concept Examples[#](#proof-of-concept-examples) +-------------------------------------------------------- + +[AutoGPT](https://github.com/Significant-Gravitas/Auto-GPT) has drawn a lot of attention into the possibility of setting up autonomous agents with LLM as the main controller. It has quite a lot of reliability issues given the natural language interface, but nevertheless a cool proof-of-concept demo. A lot of code in AutoGPT is about format parsing. + +Here is the system message used by AutoGPT, where `{{...}}` are user inputs: + + You are {{ai-name}}, {{user-provided AI bot description}}. + Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications. + + GOALS: + + 1. {{user-provided goal 1}} + 2. {{user-provided goal 2}} + 3. ... + 4. ... + 5. ... + + Constraints: + 1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files. + 2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember. + 3. No user assistance + 4. Exclusively use the commands listed in double quotes e.g. "command name" + 5. Use subprocesses for commands that will not terminate within a few minutes + + Commands: + 1. Google Search: "google", args: "input": "" + 2. Browse Website: "browse_website", args: "url": "", "question": "" + 3. Start GPT Agent: "start_agent", args: "name": "", "task": "", "prompt": "" + 4. Message GPT Agent: "message_agent", args: "key": "", "message": "" + 5. List GPT Agents: "list_agents", args: + 6. Delete GPT Agent: "delete_agent", args: "key": "" + 7. Clone Repository: "clone_repository", args: "repository_url": "", "clone_path": "" + 8. Write to file: "write_to_file", args: "file": "", "text": "" + 9. Read file: "read_file", args: "file": "" + 10. Append to file: "append_to_file", args: "file": "", "text": "" + 11. Delete file: "delete_file", args: "file": "" + 12. Search Files: "search_files", args: "directory": "" + 13. Analyze Code: "analyze_code", args: "code": "" + 14. Get Improved Code: "improve_code", args: "suggestions": "", "code": "" + 15. Write Tests: "write_tests", args: "code": "", "focus": "" + 16. Execute Python File: "execute_python_file", args: "file": "" + 17. Generate Image: "generate_image", args: "prompt": "" + 18. Send Tweet: "send_tweet", args: "text": "" + 19. Do Nothing: "do_nothing", args: + 20. Task Complete (Shutdown): "task_complete", args: "reason": "" + + Resources: + 1. Internet access for searches and information gathering. + 2. Long Term memory management. + 3. GPT-3.5 powered Agents for delegation of simple tasks. + 4. File output. + + Performance Evaluation: + 1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities. + 2. Constructively self-criticize your big-picture behavior constantly. + 3. Reflect on past decisions and strategies to refine your approach. + 4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps. + + You should only respond in JSON format as described below + Response Format: + { + "thoughts": { + "text": "thought", + "reasoning": "reasoning", + "plan": "- short bulleted\n- list that conveys\n- long-term plan", + "criticism": "constructive self-criticism", + "speak": "thoughts summary to say to user" + }, + "command": { + "name": "command name", + "args": { + "arg name": "value" + } + } + } + Ensure the response can be parsed by Python json.loads + + +[GPT-Engineer](https://github.com/AntonOsika/gpt-engineer) is another project to create a whole repository of code given a task specified in natural language. The GPT-Engineer is instructed to think over a list of smaller components to build and ask for user input to clarify questions as needed. + +Here are a sample conversation for task clarification sent to OpenAI ChatCompletion endpoint used by GPT-Engineer. The user inputs are wrapped in `{{user input text}}`. + + [ + { + "role": "system", + "content": "You will read instructions and not carry them out, only seek to clarify them.\nSpecifically you will first summarise a list of super short bullets of areas that need clarification.\nThen you will pick one clarifying question, and wait for an answer from the user.\n" + }, + { + "role": "user", + "content": "We are writing {{a Super Mario game in python. MVC components split in separate files. Keyboard control.}}\n" + }, + { + "role": "assistant", + "content": "Summary of areas that need clarification:\n1. Specifics of the Super Mario game (e.g. level design, characters, gameplay mechanics)\n2. Details about the MVC components (e.g. which components are in each file)\n3. Keyboard control implementation (e.g. which keys to use, how to handle input)\n\nClarifying question:\nCan you provide more details about the Super Mario game, such as level design, characters, and gameplay mechanics?" + }, + { + "role": "user", + "content": "{{There are 10 levels in total. The main character is a plumber named Mario, who can walk and jump. It is a classical platform game just like Super Mario. The main character moves from left to right, trying to get to the destination, where there are many obstacles and attacks from enemies in the process.}}\n\nIs anything else unclear? If yes, only answer in the form:\n{remaining unclear areas} remaining questions.\n{Next question}\nIf everything is sufficiently clear, only answer \"Nothing more to clarify.\"." + }, + { + "role": "assistant", + "content": "Remaining unclear areas: 2 remaining questions.\nCan you provide more information about how the MVC components are split into separate files?" + }, + { + "role": "user", + "content": "{{Make your own assumptions and state them explicitly before starting}}" + } + ] + + +Then after these clarification, the agent moved into the code writing mode with a different system message. System message: + +You will get instructions for code to write. You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code. Make sure that every detail of the architecture is, in the end, implemented as code. + +Think step by step and reason yourself to the right decisions to make sure we get it right. You will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose. + +Then you will output the content of each file including ALL code. Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that FILENAME is the lowercase file name including the file extension, LANG is the markup code block language for the code’s language, and CODE is the code: + +FILENAME + + CODE + + +You will start with the “entrypoint” file, then go to the ones that are imported by that file, and so on. Please note that the code should be fully functional. No placeholders. + +Follow a language and framework appropriate best practice file naming convention. Make sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other. Ensure to implement all code, if you are unsure, write a plausible implementation. Include module dependency or package manager dependency definition file. Before you finish, double check that all parts of the architecture is present in the files. + +Useful to know: You almost always put different classes in different files. For Python, you always create an appropriate requirements.txt file. For NodeJS, you always create an appropriate package.json file. You always add a comment briefly describing the purpose of the function definition. You try to add comments explaining very complex bits of logic. You always follow the best practices for the requested languages in terms of describing the code written as a defined package/project. + +Python toolbelt preferences: + +* pytest +* dataclasses + +Conversatin samples: + + [ + { + "role": "system", + "content": "You will get instructions for code to write.\nYou will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.\nMake sure that every detail of the architecture is, in the end, implemented as code.\n\nThink step by step and reason yourself to the right decisions to make sure we get it right.\nYou will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.\n\nThen you will output the content of each file including ALL code.\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\nFILENAME is the lowercase file name including the file extension,\nLANG is the markup code block language for the code's language, and CODE is the code:\n\nFILENAME\n```LANG\nCODE\n```\n\nYou will start with the \"entrypoint\" file, then go to the ones that are imported by that file, and so on.\nPlease note that the code should be fully functional. No placeholders.\n\nFollow a language and framework appropriate best practice file naming convention.\nMake sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.\nEnsure to implement all code, if you are unsure, write a plausible implementation.\nInclude module dependency or package manager dependency definition file.\nBefore you finish, double check that all parts of the architecture is present in the files.\n\nUseful to know:\nYou almost always put different classes in different files.\nFor Python, you always create an appropriate requirements.txt file.\nFor NodeJS, you always create an appropriate package.json file.\nYou always add a comment briefly describing the purpose of the function definition.\nYou try to add comments explaining very complex bits of logic.\nYou always follow the best practices for the requested languages in terms of describing the code written as a defined\npackage/project.\n\n\nPython toolbelt preferences:\n- pytest\n- dataclasses\n" + }, + # … same conversation as earlier, ended with "Make your own assumptions and state them explicitly before starting". + { + "role": "assistant", + "content": "Assumptions:\n1. Model: The model will contain the game's data, such as level information, character states, and enemy positions.\n2. View: The view will handle the game's visuals, including rendering the game objects, backgrounds, and updating the display.\n3. Controller: The controller will manage user input, such as keyboard controls, and update the model accordingly.\n\nWith these assumptions in mind, is there any specific aspect of the keyboard control implementation you would like me to clarify?" + }, + { + "role": "user", + "content": "Please now remember the steps:\n\nThink step by step and reason yourself to the right decisions to make sure we get it right.\nFirst lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.\n\nThen you will output the content of each file including ALL code.\nEach file must strictly follow a markdown code block format, where the following tokens must be replaced such that\nFILENAME is the lowercase file name including the file extension,\nLANG is the markup code block language for the code's language, and CODE is the code:\n\nFILENAME\n```LANG\nCODE\n```\n\nPlease note that the code should be fully functional. No placeholders.\n\nYou will start with the \"entrypoint\" file, then go to the ones that are imported by that file, and so on.\nFollow a language and framework appropriate best practice file naming convention.\nMake sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other.\nBefore you finish, double check that all parts of the architecture is present in the files.\n" + } + ] + + +Challenges[#](#challenges) +========================== + +After going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations: + +* **Finite context length**: The restricted context capacity limits the inclusion of historical information, detailed instructions, API call context, and responses. The design of the system has to work with this limited communication bandwidth, while mechanisms like self-reflection to learn from past mistakes would benefit a lot from long or infinite context windows. Although vector stores and retrieval can provide access to a larger knowledge pool, their representation power is not as powerful as full attention. + +* **Challenges in long-term planning and task decomposition**: Planning over a lengthy history and effectively exploring the solution space remain challenging. LLMs struggle to adjust plans when faced with unexpected errors, making them less robust compared to humans who learn from trial and error. + +* **Reliability of natural language interface**: Current agent system relies on natural language as an interface between LLMs and external components such as memory and tools. However, the reliability of model outputs is questionable, as LLMs may make formatting errors and occasionally exhibit rebellious behavior (e.g. refuse to follow an instruction). Consequently, much of the agent demo code focuses on parsing model output. + + +Citation[#](#citation) +====================== + +Cited as: + +> Weng, Lilian. (Jun 2023). LLM-powered Autonomous Agents". Lil’Log. https://lilianweng.github.io/posts/2023-06-23-agent/. + +Or + + @article{weng2023prompt, + title = "LLM-powered Autonomous Agents"", + author = "Weng, Lilian", + journal = "lilianweng.github.io", + year = "2023", + month = "Jun", + url = "https://lilianweng.github.io/posts/2023-06-23-agent/" + } + + +References[#](#references) +========================== + +\[1\] Wei et al. [“Chain of thought prompting elicits reasoning in large language models."](https://arxiv.org/abs/2201.11903) NeurIPS 2022 + +\[2\] Yao et al. [“Tree of Thoughts: Dliberate Problem Solving with Large Language Models."](https://arxiv.org/abs/2305.10601) arXiv preprint arXiv:2305.10601 (2023). + +\[3\] Liu et al. [“Chain of Hindsight Aligns Language Models with Feedback “](https://arxiv.org/abs/2302.02676) arXiv preprint arXiv:2302.02676 (2023). + +\[4\] Liu et al. [“LLM+P: Empowering Large Language Models with Optimal Planning Proficiency”](https://arxiv.org/abs/2304.11477) arXiv preprint arXiv:2304.11477 (2023). + +\[5\] Yao et al. [“ReAct: Synergizing reasoning and acting in language models."](https://arxiv.org/abs/2210.03629) ICLR 2023. + +\[6\] Google Blog. [“Announcing ScaNN: Efficient Vector Similarity Search”](https://ai.googleblog.com/2020/07/announcing-scann-efficient-vector.html) July 28, 2020. + +\[7\] [https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389](https://chat.openai.com/share/46ff149e-a4c7-4dd7-a800-fc4a642ea389) + +\[8\] Shinn & Labash. [“Reflexion: an autonomous agent with dynamic memory and self-reflection”](https://arxiv.org/abs/2303.11366) arXiv preprint arXiv:2303.11366 (2023). + +\[9\] Laskin et al. [“In-context Reinforcement Learning with Algorithm Distillation”](https://arxiv.org/abs/2210.14215) ICLR 2023. + +\[10\] Karpas et al. [“MRKL Systems A modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning."](https://arxiv.org/abs/2205.00445) arXiv preprint arXiv:2205.00445 (2022). + +\[11\] Weaviate Blog. [Why is Vector Search so fast?](https://weaviate.io/blog/why-is-vector-search-so-fast) Sep 13, 2022. + +\[12\] Li et al. [“API-Bank: A Benchmark for Tool-Augmented LLMs”](https://arxiv.org/abs/2304.08244) arXiv preprint arXiv:2304.08244 (2023). + +\[13\] Shen et al. [“HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in HuggingFace”](https://arxiv.org/abs/2303.17580) arXiv preprint arXiv:2303.17580 (2023). + +\[14\] Bran et al. [“ChemCrow: Augmenting large-language models with chemistry tools."](https://arxiv.org/abs/2304.05376) arXiv preprint arXiv:2304.05376 (2023). + +\[15\] Boiko et al. [“Emergent autonomous scientific research capabilities of large language models."](https://arxiv.org/abs/2304.05332) arXiv preprint arXiv:2304.05332 (2023). + +\[16\] Joon Sung Park, et al. [“Generative Agents: Interactive Simulacra of Human Behavior."](https://arxiv.org/abs/2304.03442) arXiv preprint arXiv:2304.03442 (2023). + +\[17\] AutoGPT. [https://github.com/Significant-Gravitas/Auto-GPT](https://github.com/Significant-Gravitas/Auto-GPT) + +\[18\] GPT-Engineer. [https://github.com/AntonOsika/gpt-engineer](https://github.com/AntonOsika/gpt-engineer) + +* [nlp](https://lilianweng.github.io/tags/nlp/) +* [language-model](https://lilianweng.github.io/tags/language-model/) +* [agent](https://lilianweng.github.io/tags/agent/) +* [steerability](https://lilianweng.github.io/tags/steerability/) +* [prompting](https://lilianweng.github.io/tags/prompting/) + +[» +Prompt Engineering](https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/) + +[](https://twitter.com/intent/tweet/?text=LLM%20Powered%20Autonomous%20Agents&url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f&hashtags=nlp%2clanguage-model%2cagent%2csteerability%2cprompting)[](https://www.linkedin.com/shareArticle?mini=true&url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f&title=LLM%20Powered%20Autonomous%20Agents&summary=LLM%20Powered%20Autonomous%20Agents&source=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f)[](https://reddit.com/submit?url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f&title=LLM%20Powered%20Autonomous%20Agents)[](https://facebook.com/sharer/sharer.php?u=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f)[](https://api.whatsapp.com/send?text=LLM%20Powered%20Autonomous%20Agents%20-%20https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f)[](https://telegram.me/share/url?text=LLM%20Powered%20Autonomous%20Agents&url=https%3a%2f%2flilianweng.github.io%2fposts%2f2023-06-23-agent%2f) + +© 2023 [Lil'Log](https://lilianweng.github.io/) Powered by [Hugo](https://gohugo.io/) & [PaperMod](https://git.io/hugopapermod) From 4f4181bcb7a2c7b35b221f776e9aa7e7dbbb1570 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 22:49:05 -0400 Subject: [PATCH 39/41] transforming tools to tool dict Former-commit-id: 4799638c32f62473ca2d37fbd9f98f037f8ada41 --- swarms/agents/workers/worker_agent_ultra.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/swarms/agents/workers/worker_agent_ultra.py b/swarms/agents/workers/worker_agent_ultra.py index e82c8439..2502f771 100644 --- a/swarms/agents/workers/worker_agent_ultra.py +++ b/swarms/agents/workers/worker_agent_ultra.py @@ -43,10 +43,11 @@ class WorkerUltraNode: def create_agent(self, ai_name="Swarm Worker AI Assistant", ai_role="Assistant", human_in_the_loop=False, search_kwargs={}, verbose=False): logging.info("Creating agent in WorkerNode") try: + tools_dict = {type(tool).__name__: tool for tool in self.toolsets} self.agent = AutoGPT.from_llm_and_tools( ai_name=ai_name, ai_role=ai_role, - tools=self.toolsets, + tools=tools_dict, # Pass the dictionary instead of the list llm=self.llm, memory=self.vectorstore.as_retriever(search_kwargs=search_kwargs), human_in_the_loop=human_in_the_loop, From 976fbd7b47fad67ff7f6afbf0faf8f398ccbcfa1 Mon Sep 17 00:00:00 2001 From: Kye Date: Sun, 16 Jul 2023 23:00:20 -0400 Subject: [PATCH 40/41] tools lists Former-commit-id: 510f82d6caad466fed50dd4744474e4a9e151a00 --- swarms/agents/workers/worker_agent_ultra.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swarms/agents/workers/worker_agent_ultra.py b/swarms/agents/workers/worker_agent_ultra.py index 2502f771..1e4ee56c 100644 --- a/swarms/agents/workers/worker_agent_ultra.py +++ b/swarms/agents/workers/worker_agent_ultra.py @@ -43,11 +43,11 @@ class WorkerUltraNode: def create_agent(self, ai_name="Swarm Worker AI Assistant", ai_role="Assistant", human_in_the_loop=False, search_kwargs={}, verbose=False): logging.info("Creating agent in WorkerNode") try: - tools_dict = {type(tool).__name__: tool for tool in self.toolsets} + tools_list = list(self.toolsets.values()) self.agent = AutoGPT.from_llm_and_tools( ai_name=ai_name, ai_role=ai_role, - tools=tools_dict, # Pass the dictionary instead of the list + tools=tools_list, # Pass the dictionary instead of the list llm=self.llm, memory=self.vectorstore.as_retriever(search_kwargs=search_kwargs), human_in_the_loop=human_in_the_loop, From fcc637c0ee5e7504e23605bca8c1c87977073bbf Mon Sep 17 00:00:00 2001 From: Kye Date: Tue, 18 Jul 2023 08:53:54 -0400 Subject: [PATCH 41/41] clean up Former-commit-id: 313d822ecaeb14cc5551ce5f4fcfd685622a60df --- api/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/container.py b/api/container.py index 41f0d11d..212520f7 100644 --- a/api/container.py +++ b/api/container.py @@ -18,7 +18,7 @@ from swarms.utils.main import StaticUploader BASE_DIR = Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) os.chdir(BASE_DIR / os.environ["PLAYGROUND_DIR"]) - +# toolsets: List[BaseToolSet] = [ Terminal(), CodeEditor(),