From 60d777080d7df910f3df2fa6b02d352a61fd8413 Mon Sep 17 00:00:00 2001 From: Kye Date: Sat, 2 Dec 2023 01:33:50 -0800 Subject: [PATCH] [FEAT][ChromaDB] [FEAT][tool_func_doc_scraper] [FEAT][BaseVectorStore] [FEAT][memory -> short_memory] [FEAT][memory: BaseVectorDB] --- example.py | 2 +- futures.py | 15 - playground/demos/ad_gen/ad_gen.py | 71 ++-- .../structs/autoscaler.py | 0 playground/tools/tool_prompt_scaper.py | 21 ++ pyproject.toml | 2 +- swarms/memory/base_vector_db.py | 60 ++++ swarms/memory/chroma_db.py | 172 +++++++++ swarms/structs/agent.py | 327 ++++++++++++------ swarms/tools/__init__.py | 3 + swarms/tools/tool_func_doc_scraper.py | 36 ++ swarms/utils/token_count_tiktoken.py | 27 ++ 12 files changed, 587 insertions(+), 149 deletions(-) delete mode 100644 futures.py rename autoscaler.py => playground/structs/autoscaler.py (100%) create mode 100644 playground/tools/tool_prompt_scaper.py create mode 100644 swarms/memory/base_vector_db.py create mode 100644 swarms/memory/chroma_db.py create mode 100644 swarms/tools/tool_func_doc_scraper.py create mode 100644 swarms/utils/token_count_tiktoken.py diff --git a/example.py b/example.py index 67ea34f9..4bab46cd 100644 --- a/example.py +++ b/example.py @@ -20,7 +20,7 @@ llm = OpenAIChat( ## Initialize the workflow -agent = Agent(llm=llm, max_loops=1, dashboard=True) +agent = Agent(llm=llm, max_loops=1, dashboard=True, autosave=True) # Run the workflow on a task out = agent.run("Generate a 10,000 word blog on health and wellness.") diff --git a/futures.py b/futures.py deleted file mode 100644 index c5c468df..00000000 --- a/futures.py +++ /dev/null @@ -1,15 +0,0 @@ -import concurrent.futures -import time -import random -from swarms.utils.futures import execute_futures_dict - - -def f(x): - time.sleep(random.random()) - return x - - -with concurrent.futures.ThreadPoolExecutor() as executor: - """Create a dictionary of futures.""" - fs_dict = {str(i): executor.submit(f, i) for i in range(10)} - print(execute_futures_dict(fs_dict)) diff --git a/playground/demos/ad_gen/ad_gen.py b/playground/demos/ad_gen/ad_gen.py index 3d16eb25..b665b63a 100644 --- a/playground/demos/ad_gen/ad_gen.py +++ b/playground/demos/ad_gen/ad_gen.py @@ -10,43 +10,62 @@ openai_api_key = os.getenv("OPENAI_API_KEY") stability_api_key = os.getenv("STABILITY_API_KEY") # Initialize the language model and image generation model -llm = OpenAIChat(openai_api_key=openai_api_key, temperature=0.5, max_tokens=3000) +llm = OpenAIChat( + openai_api_key=openai_api_key, temperature=0.5, max_tokens=3000 +) sd_api = StableDiffusion(api_key=stability_api_key) + # Creative Concept Generator for Product Ads class ProductAdConceptGenerator: def __init__(self, product_name): self.product_name = product_name self.themes = [ - "futuristic", "rustic", "luxurious", "minimalistic", "vibrant", "elegant", - "retro", "urban", "ethereal", "surreal", "artistic", "tech-savvy", - "vintage", "natural", "sophisticated", "playful", "dynamic", "serene", "lasers," "lightning" + "futuristic", + "rustic", + "luxurious", + "minimalistic", + "vibrant", + "elegant", + "retro", + "urban", + "ethereal", + "surreal", + "artistic", + "tech-savvy", + "vintage", + "natural", + "sophisticated", + "playful", + "dynamic", + "serene", + "lasers,lightning", ] self.contexts = [ - "in an everyday setting", "in a rave setting", "in an abstract environment", - "in an adventurous context", "surrounded by nature", "in a high-tech setting", - "in a historical context", "in a busy urban scene", "in a tranquil and peaceful setting", - "against a backdrop of city lights", "in a surreal dreamscape", "in a festive atmosphere", - "in a luxurious setting", "in a playful and colorful background", "in an ice cave setting", - "in a serene and calm landscape" + "in an everyday setting", + "in a rave setting", + "in an abstract environment", + "in an adventurous context", + "surrounded by nature", + "in a high-tech setting", + "in a historical context", + "in a busy urban scene", + "in a tranquil and peaceful setting", + "against a backdrop of city lights", + "in a surreal dreamscape", + "in a festive atmosphere", + "in a luxurious setting", + "in a playful and colorful background", + "in an ice cave setting", + "in a serene and calm landscape", ] -<<<<<<< HEAD -======= self.contexts = [ "high realism product ad (extremely creative)" ] ->>>>>>> 831147e ([CODE QUALITY]) def generate_concept(self): theme = random.choice(self.themes) context = random.choice(self.contexts) -<<<<<<< HEAD - return f"An ad for {self.product_name} that embodies a {theme} theme {context}" - -# User input -product_name = input("Enter a product name for ad creation (e.g., 'PS5', 'AirPods', 'Kirkland Vodka'): ") -social_media_platform = input("Enter a social media platform (e.g., 'Facebook', 'Twitter', 'Instagram'): ") -======= return ( f"{theme} inside a {style} {self.product_name}, {context}" ) @@ -57,7 +76,6 @@ product_name = input( "Enter a product name for ad creation (e.g., 'PS5', 'AirPods'," " 'Kirkland Vodka'): " ) ->>>>>>> 831147e ([CODE QUALITY]) # Generate creative concept concept_generator = ProductAdConceptGenerator(product_name) @@ -68,15 +86,13 @@ image_paths = sd_api.run(creative_concept) # Generate ad copy ad_copy_agent = Agent(llm=llm, max_loops=1) -ad_copy_prompt = f"Write a compelling {social_media_platform} ad copy for a product photo showing {product_name} {creative_concept}." +ad_copy_prompt = ( + f"Write a compelling {social_media_platform} ad copy for a" + f" product photo showing {product_name} {creative_concept}." +) ad_copy = ad_copy_agent.run(task=ad_copy_prompt) # Output the results -<<<<<<< HEAD -print("Creative Concept:", creative_concept) -print("Image Path:", image_paths[0] if image_paths else "No image generated") -print("Ad Copy:", ad_copy) -======= print("Creative Concept:", concept_result) print("Design Ideas:", design_result) print("Ad Copy:", copywriting_result) @@ -84,4 +100,3 @@ print( "Image Path:", image_paths[0] if image_paths else "No image generated", ) ->>>>>>> 831147e ([CODE QUALITY]) diff --git a/autoscaler.py b/playground/structs/autoscaler.py similarity index 100% rename from autoscaler.py rename to playground/structs/autoscaler.py diff --git a/playground/tools/tool_prompt_scaper.py b/playground/tools/tool_prompt_scaper.py new file mode 100644 index 00000000..162211c3 --- /dev/null +++ b/playground/tools/tool_prompt_scaper.py @@ -0,0 +1,21 @@ +from swarms.tools.tool import tool +from swarms.tools.tool_func_doc_scraper import scrape_tool_func_docs + +# Define a tool by decorating a function with the tool decorator and providing a docstring + +@tool(return_direct=True) +def search_api(query: str): + """Search the web for the query + + Args: + query (str): _description_ + + Returns: + _type_: _description_ + """ + return f"Search results for {query}" + + +# Scrape the tool func docs to prepare for injection into the agent prompt +out = scrape_tool_func_docs(search_api) +print(out) diff --git a/pyproject.toml b/pyproject.toml index b9b10e4e..13547aa0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "2.5.0" +version = "2.5.2" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] diff --git a/swarms/memory/base_vector_db.py b/swarms/memory/base_vector_db.py new file mode 100644 index 00000000..fc58bf36 --- /dev/null +++ b/swarms/memory/base_vector_db.py @@ -0,0 +1,60 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict + + +class VectorDatabase(ABC): + @abstractmethod + def add( + self, vector: Dict[str, Any], metadata: Dict[str, Any] + ) -> None: + """ + add a vector into the database. + + Args: + vector (Dict[str, Any]): The vector to add. + metadata (Dict[str, Any]): Metadata associated with the vector. + """ + pass + + @abstractmethod + def query( + self, vector: Dict[str, Any], num_results: int + ) -> Dict[str, Any]: + """ + Query the database for vectors similar to the given vector. + + Args: + vector (Dict[str, Any]): The vector to compare against. + num_results (int): The number of similar vectors to return. + + Returns: + Dict[str, Any]: The most similar vectors and their associated metadata. + """ + pass + + @abstractmethod + def delete(self, vector_id: str) -> None: + """ + Delete a vector from the database. + + Args: + vector_id (str): The ID of the vector to delete. + """ + pass + + @abstractmethod + def update( + self, + vector_id: str, + vector: Dict[str, Any], + metadata: Dict[str, Any], + ) -> None: + """ + Update a vector in the database. + + Args: + vector_id (str): The ID of the vector to update. + vector (Dict[str, Any]): The new vector. + metadata (Dict[str, Any]): The new metadata. + """ + pass diff --git a/swarms/memory/chroma_db.py b/swarms/memory/chroma_db.py new file mode 100644 index 00000000..a225524e --- /dev/null +++ b/swarms/memory/chroma_db.py @@ -0,0 +1,172 @@ +import os +from termcolor import colored +import logging +from typing import Dict, List, Optional +import chromadb +import tiktoken as tiktoken +from chromadb.config import Settings +from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction +from dotenv import load_dotenv +from swarms.utils.token_count_tiktoken import limit_tokens_from_string + +load_dotenv() + +# ChromaDB settings +client = chromadb.Client(Settings(anonymized_telemetry=False)) + + +# ChromaDB client +def get_chromadb_client(): + return client + + +# OpenAI API key +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") + + +# Results storage using local ChromaDB +class ChromaDB: + """ + + ChromaDB database + + Args: + metric (str): _description_ + RESULTS_STORE_NAME (str): _description_ + LLM_MODEL (str): _description_ + openai_api_key (str): _description_ + + Methods: + add: _description_ + query: _description_ + + Examples: + >>> chromadb = ChromaDB( + >>> metric="cosine", + >>> RESULTS_STORE_NAME="results", + >>> LLM_MODEL="gpt3", + >>> openai_api_key=OPENAI_API_KEY, + >>> ) + >>> chromadb.add(task, result, result_id) + >>> chromadb.query(query, top_results_num) + """ + + def __init__( + self, + metric: str, + RESULTS_STORE_NAME: str, + LLM_MODEL: str, + openai_api_key: str = OPENAI_API_KEY, + top_results_num: int = 3, + limit_tokens: Optional[int] = 1000, + ): + self.metric = metric + self.RESULTS_STORE_NAME = RESULTS_STORE_NAME + self.LLM_MODEL = LLM_MODEL + self.openai_api_key = openai_api_key + self.top_results_num = top_results_num + self.limit_tokens = limit_tokens + + # Disable ChromaDB logging + logging.getLogger("chromadb").setLevel(logging.ERROR) + # Create Chroma collection + chroma_persist_dir = "chroma" + chroma_client = chromadb.PersistentClient( + settings=chromadb.config.Settings( + persist_directory=chroma_persist_dir, + ) + ) + + # Create embedding function + embedding_function = OpenAIEmbeddingFunction( + api_key=openai_api_key + ) + + # Create Chroma collection + self.collection = chroma_client.get_or_create_collection( + name=RESULTS_STORE_NAME, + metadata={"hnsw:space": metric}, + embedding_function=embedding_function, + ) + + def add(self, task: Dict, result: str, result_id: str): + """Adds a result to the ChromaDB collection + + Args: + task (Dict): _description_ + result (str): _description_ + result_id (str): _description_ + """ + + try: + # Embed the result + embeddings = ( + self.collection.embedding_function.embed([result])[0] + .tolist() + .copy() + ) + + # If the result is a list, flatten it + if ( + len( + self.collection.get(ids=[result_id], include=[])[ + "ids" + ] + ) + > 0 + ): # Check if the result already exists + self.collection.update( + ids=result_id, + embeddings=embeddings, + documents=result, + metadatas={ + "task": task["task_name"], + "result": result, + }, + ) + + # If the result is not a list, add it + else: + self.collection.add( + ids=result_id, + embeddings=embeddings, + documents=result, + metadatas={ + "task": task["task_name"], + "result": result, + }, + ) + except Exception as error: + print( + colored(f"Error adding to ChromaDB: {error}", "red") + ) + + def query( + self, + query: str, + ) -> List[dict]: + """Queries the ChromaDB collection with a query for the top results + + Args: + query (str): _description_ + top_results_num (int): _description_ + + Returns: + List[dict]: _description_ + """ + try: + count: int = self.collection.count() + if count == 0: + return [] + results = self.collection.query( + query_texts=query, + n_results=min(self.top_results_num, count), + include=["metadatas"], + ) + out = [item["task"] for item in results["metadatas"][0]] + out = limit_tokens_from_string( + out, "gpt-4", self.limit_tokens + ) + return out + except Exception as error: + print(colored(f"Error querying ChromaDB: {error}", "red")) diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py index fe62dc20..1ebc8880 100644 --- a/swarms/structs/agent.py +++ b/swarms/structs/agent.py @@ -10,6 +10,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple from termcolor import colored +from swarms.memory.base_vector_db import VectorDatabase from swarms.prompts.agent_system_prompts import ( FLOW_SYSTEM_PROMPT, agent_system_prompt_2, @@ -26,6 +27,7 @@ from swarms.utils.parse_code import ( extract_code_in_backticks_in_string, ) from swarms.utils.pdf_to_text import pdf_to_text +from swarms.utils.token_count_tiktoken import limit_tokens_from_string # Utils @@ -35,11 +37,13 @@ def stop_when_repeats(response: str) -> bool: return "Stop" in response.lower() +# Parse done token def parse_done_token(response: str) -> bool: """Parse the response to see if the done token is present""" return "" in response +# Agent ID generator def agent_id(): """Generate an agent id""" return str(uuid.uuid4()) @@ -58,16 +62,40 @@ class Agent: * Ability to provide a loop interval Args: + id (str): The id of the agent llm (Any): The language model to use - max_loops (int): The maximum number of loops to run - stopping_condition (Optional[Callable[[str], bool]]): A stopping condition - loop_interval (int): The interval between loops - retry_attempts (int): The number of retry attempts - retry_interval (int): The interval between retry attempts - interactive (bool): Whether or not to run in interactive mode - dashboard (bool): Whether or not to print the dashboard - dynamic_temperature_enabled(bool): Dynamical temperature handling - **kwargs (Any): Any additional keyword arguments + template (Optional[str]): The template to use + max_loops (int): The maximum number of loops + stopping_condition (Optional[Callable[[str], bool]]): The stopping condition + loop_interval (int): The loop interval + retry_attempts (int): The retry attempts + retry_interval (int): The retry interval + return_history (bool): Return the history + stopping_token (str): The stopping token + dynamic_loops (Optional[bool]): Dynamic loops + interactive (bool): Interactive mode + dashboard (bool): Dashboard mode + agent_name (str): The name of the agent + agent_description (str): The description of the agent + system_prompt (str): The system prompt + tools (List[BaseTool]): The tools + dynamic_temperature_enabled (Optional[bool]): Dynamic temperature enabled + sop (Optional[str]): The standard operating procedure + sop_list (Optional[List[str]]): The standard operating procedure list + saved_state_path (Optional[str]): The saved state path + autosave (Optional[bool]): Autosave + context_length (Optional[int]): The context length + user_name (str): The user name + self_healing_enabled (Optional[bool]): Self healing enabled + code_interpreter (Optional[bool]): Code interpreter + multi_modal (Optional[bool]): Multi modal + pdf_path (Optional[str]): The pdf path + list_of_pdf (Optional[str]): The list of pdf + tokenizer (Optional[Any]): The tokenizer + memory (Optional[VectorDatabase]): The memory + preset_stopping_token (Optional[bool]): Preset stopping token + *args: Variable length argument list. + **kwargs: Arbitrary keyword arguments. Methods: run(task: str, **kwargs: Any): Run the agent on a task @@ -143,15 +171,14 @@ class Agent: dynamic_loops: Optional[bool] = False, interactive: bool = False, dashboard: bool = False, - agent_name: str = "Autonomous Agent XYZ1B", + agent_name: str = "Autonomous-Agent-XYZ1B", agent_description: str = None, system_prompt: str = FLOW_SYSTEM_PROMPT, tools: List[BaseTool] = None, dynamic_temperature_enabled: Optional[bool] = False, sop: Optional[str] = None, sop_list: Optional[List[str]] = None, - # memory: Optional[Vectorstore] = None, - saved_state_path: Optional[str] = "flow_state.json", + saved_state_path: Optional[str] = None, autosave: Optional[bool] = False, context_length: Optional[int] = 8192, user_name: str = "Human:", @@ -161,6 +188,8 @@ class Agent: pdf_path: Optional[str] = None, list_of_pdf: Optional[str] = None, tokenizer: Optional[Any] = None, + memory: Optional[VectorDatabase] = None, + preset_stopping_token: Optional[bool] = False, *args, **kwargs: Any, ): @@ -187,7 +216,7 @@ class Agent: self.system_prompt = system_prompt self.agent_name = agent_name self.agent_description = agent_description - self.saved_state_path = saved_state_path + self.saved_state_path = f"{self.agent_name}_state.json" self.autosave = autosave self.response_filters = [] self.self_healing_enabled = self_healing_enabled @@ -196,6 +225,8 @@ class Agent: self.pdf_path = pdf_path self.list_of_pdf = list_of_pdf self.tokenizer = tokenizer + self.memory = memory + self.preset_stopping_token = preset_stopping_token # The max_loops will be set dynamically if the dynamic_loop if self.dynamic_loops: @@ -211,11 +242,15 @@ class Agent: # Memory self.feedback = [] - self.memory = [] + self.short_memory = [] # Initialize the code executor self.code_executor = SubprocessCodeInterpreter() + # If the preset stopping token is enabled then set the stopping token to the preset stopping token + if preset_stopping_token: + self.stopping_token = "" + def provide_feedback(self, feedback: str) -> None: """Allow users to provide feedback on the responses.""" self.feedback.append(feedback) @@ -349,20 +384,29 @@ class Agent: """ Take the history and truncate it to fit into the model context length """ - truncated_history = self.memory[-1][-self.context_length :] - self.memory[-1] = truncated_history + # truncated_history = self.short_memory[-1][-self.context_length :] + # self.short_memory[-1] = truncated_history + # out = limit_tokens_from_string( + # "\n".join(truncated_history), self.llm.model_name + # ) + truncated_history = self.short_memory[-1][ + -self.context_length : + ] + text = "\n".join(truncated_history) + out = limit_tokens_from_string(text, "gpt-4") + return out def add_task_to_memory(self, task: str): """Add the task to the memory""" - self.memory.append([f"{self.user_name}: {task}"]) + self.short_memory.append([f"{self.user_name}: {task}"]) def add_message_to_memory(self, message: str): """Add the message to the memory""" - self.memory[-1].append(message) + self.short_memory[-1].append(message) def add_message_to_memory_and_truncate(self, message: str): """Add the message to the memory and truncate""" - self.memory[-1].append(message) + self.short_memory[-1].append(message) self.truncate_history() def print_dashboard(self, task: str): @@ -404,7 +448,36 @@ class Agent: ) ) - # print(dashboard) + def add_message_to_memory_db( + self, message: Dict[str, Any], metadata: Dict[str, Any] + ) -> None: + """Add the message to the memory + + Args: + message (Dict[str, Any]): _description_ + metadata (Dict[str, Any]): _description_ + """ + if self.memory is not None: + self.memory.add(message, metadata) + + def query_memorydb( + self, + message: Dict[str, Any], + num_results: int = 100, + ) -> Dict[str, Any]: + """Query the memory database + + Args: + message (Dict[str, Any]): _description_ + num_results (int): _description_ + + Returns: + Dict[str, Any]: _description_ + """ + if self.memory is not None: + return self.memory.query(message, num_results) + else: + return {} def activate_autonomous_agent(self): """Print the autonomous agent activation message""" @@ -588,18 +661,20 @@ class Agent: time.sleep(self.loop_interval) # Add the history to the memory - self.memory.append(history) + self.short_memory.append(history) # If autosave is enabled then save the state if self.autosave: - save_path = self.saved_state_path or "flow_state.json" print( colored( - f"Autosaving agent state to {save_path}", + ( + "Autosaving agent state to" + f" {self.saved_state_path}" + ), "green", ) ) - self.save_state(save_path) + self.save_state(self.saved_state_path) # If return history is enabled then return the response and history if self.return_history: @@ -685,13 +760,16 @@ class Agent: self.memory.append(history) if self.autosave: - save_path = self.saved_state_path or "flow_state.json" print( colored( - f"Autosaving agent state to {save_path}", "green" + ( + "Autosaving agent state to" + f" {self.saved_state_path}" + ), + "green", ) ) - self.save_state(save_path) + self.save_state(self.saved_state_path) if self.return_history: return response, history @@ -776,8 +854,13 @@ class Agent: return Agent(llm=llm, template=template) def save(self, file_path) -> None: + """Save the agent history to a file. + + Args: + file_path (_type_): _description_ + """ with open(file_path, "w") as f: - json.dump(self.memory, f) + json.dump(self.short_memory, f) print(f"Saved agent history to {file_path}") def load(self, file_path: str): @@ -788,7 +871,7 @@ class Agent: file_path (str): The path to the file containing the saved agent history. """ with open(file_path, "r") as f: - self.memory = json.load(f) + self.short_memory = json.load(f) print(f"Loaded agent history from {file_path}") def validate_response(self, response: str) -> bool: @@ -813,7 +896,9 @@ class Agent: "========================", "cyan", attrs=["bold"] ) ) - for loop_index, history in enumerate(self.memory, start=1): + for loop_index, history in enumerate( + self.short_memory, start=1 + ): print( colored( f"\nLoop {loop_index}:", "yellow", attrs=["bold"] @@ -856,10 +941,10 @@ class Agent: # Update the agent's history with the new interaction if self.interactive: - self.memory.append(f"AI: {response}") - self.memory.append(f"Human: {task}") + self.short_memory.append(f"AI: {response}") + self.short_memory.append(f"Human: {task}") else: - self.memory.append(f"AI: {response}") + self.short_memory.append(f"AI: {response}") return response except Exception as error: @@ -903,14 +988,14 @@ class Agent: print(message) """ - if len(self.memory) < 2: + if len(self.short_memory) < 2: return None, None # Remove the last response - self.memory.pop() + self.short_memory.pop() # Get the previous state - previous_state = self.memory[-1][-1] + previous_state = self.short_memory[-1][-1] return previous_state, f"Restored to {previous_state}" # Response Filtering @@ -930,7 +1015,6 @@ class Agent: """ Apply the response filters to the response - """ for word in self.response_filters: response = response.replace(word, "[FILTERED]") @@ -1029,21 +1113,28 @@ class Agent: >>> agent.save_state('saved_flow.json') """ state = { - "memory": self.memory, - # "llm_params": self.get_llm_params(), + "agent_id": str(self.id), + "agent_name": self.agent_name, + "agent_description": self.agent_description, + "system_prompt": self.system_prompt, + "sop": self.sop, + "memory": self.short_memory, "loop_interval": self.loop_interval, "retry_attempts": self.retry_attempts, "retry_interval": self.retry_interval, "interactive": self.interactive, "dashboard": self.dashboard, "dynamic_temperature": self.dynamic_temperature_enabled, + "autosave": self.autosave, + "saved_state_path": self.saved_state_path, + "max_loops": self.max_loops, } with open(file_path, "w") as f: json.dump(state, f, indent=4) - saved = colored("Saved agent state to", "green") - print(f"{saved} {file_path}") + saved = colored(f"Saved agent state to: {file_path}", "green") + print(saved) def load_state(self, file_path: str): """ @@ -1060,7 +1151,16 @@ class Agent: state = json.load(f) # Restore other saved attributes - self.memory = state.get("memory", []) + self.id = state.get("agent_id", self.id) + self.agent_name = state.get("agent_name", self.agent_name) + self.agent_description = state.get( + "agent_description", self.agent_description + ) + self.system_prompt = state.get( + "system_prompt", self.system_prompt + ) + self.sop = state.get("sop", self.sop) + self.short_memory = state.get("short_memory", []) self.max_loops = state.get("max_loops", 5) self.loop_interval = state.get("loop_interval", 1) self.retry_attempts = state.get("retry_attempts", 3) @@ -1120,7 +1220,7 @@ class Agent: def reset(self): """Reset the agent""" - self.memory = [] + self.short_memory = [] def run_code(self, code: str): """ @@ -1143,7 +1243,7 @@ class Agent: text = pdf_to_text(pdf) return text - def pdf_chunker(self, text: str = None): + def pdf_chunker(self, text: str = None, num_limits: int = 1000): """Chunk the pdf into sentences Args: @@ -1153,13 +1253,21 @@ class Agent: _type_: _description_ """ text = text or self.pdf_connector() - pass + text = limit_tokens_from_string(text, num_limits) + return text def tools_prompt_prep( self, docs: str = None, scenarios: str = None ): """ - Prepare the tool prompt + Tools prompt prep + + Args: + docs (str, optional): _description_. Defaults to None. + scenarios (str, optional): _description_. Defaults to None. + + Returns: + _type_: _description_ """ PROMPT = f""" # Task @@ -1214,61 +1322,72 @@ class Agent: ‘‘‘ """ - # def self_healing(self, **kwargs): - # """ - # Self healing by debugging errors and refactoring its own code - - # Args: - # **kwargs (Any): Any additional keyword arguments - # """ - # pass - - # def refactor_code( - # self, - # file: str, - # changes: List, - # confirm: bool = False - # ): - # """ - # Refactor the code - # """ - # with open(file) as f: - # original_file_lines = f.readlines() - - # # Filter out the changes that are not confirmed - # operation_changes = [ - # change for change in changes if "operation" in change - # ] - # explanations = [ - # change["explanation"] for change in changes if "explanation" in change - # ] - - # # Sort the changes in reverse line order - # # explanations.sort(key=lambda x: x["line", reverse=True]) - - # def error_prompt_inject( - # self, - # file_path: str, - # args: List, - # error: str, - # ): - # with open(file_path, "r") as f: - # file_lines = f.readlines() - - # file_with_lines = [] - # for i, line in enumerate(file_lines): - # file_with_lines.append(str(i + 1) + "" + line) - # file_with_lines = "".join(file_with_lines) - - # prompt = f""" - # Here is the script that needs fixing:\n\n - # {file_with_lines}\n\n - # Here are the arguments it was provided:\n\n - # {args}\n\n - # Here is the error message:\n\n - # {error}\n - # "Please provide your suggested changes, and remember to stick to the " - # exact format as described above. - # """ - - # print(prompt) + def self_healing(self, **kwargs): + """ + Self healing by debugging errors and refactoring its own code + + Args: + **kwargs (Any): Any additional keyword arguments + """ + pass + + def refactor_code( + self, file: str, changes: List, confirm: bool = False + ): + """_summary_ + + Args: + file (str): _description_ + changes (List): _description_ + confirm (bool, optional): _description_. Defaults to False. + """ + # with open(file) as f: + # original_file_lines = f.readlines() + + # # Filter out the changes that are not confirmed + # operation_changes = [ + # change for change in changes if "operation" in change + # ] + # explanations = [ + # change["explanation"] for change in changes if "explanation" in change + # ] + + # Sort the changes in reverse line order + # explanations.sort(key=lambda x: x["line", reverse=True]) + pass + + def error_prompt_inject( + self, + file_path: str, + args: List, + error: str, + ): + """ + Error prompt injection + + Args: + file_path (str): _description_ + args (List): _description_ + error (str): _description_ + + """ + # with open(file_path, "r") as f: + # file_lines = f.readlines() + + # file_with_lines = [] + # for i, line in enumerate(file_lines): + # file_with_lines.append(str(i + 1) + "" + line) + # file_with_lines = "".join(file_with_lines) + + # prompt = f""" + # Here is the script that needs fixing:\n\n + # {file_with_lines}\n\n + # Here are the arguments it was provided:\n\n + # {args}\n\n + # Here is the error message:\n\n + # {error}\n + # "Please provide your suggested changes, and remember to stick to the " + # exact format as described above. + # """ + # print(prompt) + pass diff --git a/swarms/tools/__init__.py b/swarms/tools/__init__.py index e69de29b..dc81462c 100644 --- a/swarms/tools/__init__.py +++ b/swarms/tools/__init__.py @@ -0,0 +1,3 @@ +from swarms.tools.tool_func_doc_scraper import scrape_tool_func_docs + +__all__ = ["scrape_tool_func_docs"] \ No newline at end of file diff --git a/swarms/tools/tool_func_doc_scraper.py b/swarms/tools/tool_func_doc_scraper.py new file mode 100644 index 00000000..003d7101 --- /dev/null +++ b/swarms/tools/tool_func_doc_scraper.py @@ -0,0 +1,36 @@ +import inspect +from typing import Callable +from termcolor import colored + +def scrape_tool_func_docs(fn: Callable) -> str: + """ + Scrape the docstrings and parameters of a function decorated with `tool` and return a formatted string. + + Args: + fn (Callable): The function to scrape. + + Returns: + str: A string containing the function's name, documentation string, and a list of its parameters. Each parameter is represented as a line containing the parameter's name, default value, and annotation. + """ + try: + + # If the function is a tool, get the original function + if hasattr(fn, "func"): + fn = fn.func + + signature = inspect.signature(fn) + parameters = [] + for name, param in signature.parameters.items(): + parameters.append( + f"Name: {name}, Type:" + f" {param.default if param.default is not param.empty else 'None'}," + " Annotation:" + f" {param.annotation if param.annotation is not param.empty else 'None'}" + ) + parameters_str = "\n".join(parameters) + return ( + f"Function: {fn.__name__}\nDocstring:" + f" {inspect.getdoc(fn)}\nParameters:\n{parameters_str}" + ) + except Exception as error: + print(colored(f"Error scraping tool function docs {error} try optimizing your inputs with different variables and attempt once more.", "red")) \ No newline at end of file diff --git a/swarms/utils/token_count_tiktoken.py b/swarms/utils/token_count_tiktoken.py new file mode 100644 index 00000000..f8a47b98 --- /dev/null +++ b/swarms/utils/token_count_tiktoken.py @@ -0,0 +1,27 @@ +import tiktoken + + +def limit_tokens_from_string( + string: str, model: str = "gpt-4", limit: int = 500 +) -> str: + """Limits the number of tokens in a string + + Args: + string (str): _description_ + model (str): _description_ + limit (int): _description_ + + Returns: + str: _description_ + """ + try: + encoding = tiktoken.encoding_for_model(model) + except Exception: + encoding = tiktoken.encoding_for_model( + "gpt2" + ) # Fallback for others. + + encoded = encoding.encode(string) + + out = encoding.decode(encoded[:limit]) + return out