From ef4759472e833e5c8bc48d832b54cef0019062ad Mon Sep 17 00:00:00 2001 From: Kye Date: Tue, 16 Jan 2024 14:30:19 -0500 Subject: [PATCH] [FEATS][ csv_to_text] [json_to_text] [txt_to_text] [data_to_text] [Agent][ingest_docs] --- swarms/structs/agent.py | 65 ++++++++++++++++++++---------------- swarms/utils/__init__.py | 10 ++++++ swarms/utils/data_to_text.py | 49 +++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 29 deletions(-) create mode 100644 swarms/utils/data_to_text.py diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py index 3903d4ad..af770466 100644 --- a/swarms/structs/agent.py +++ b/swarms/structs/agent.py @@ -29,6 +29,7 @@ from swarms.utils.parse_code import ( ) from swarms.utils.pdf_to_text import pdf_to_text from swarms.utils.token_count_tiktoken import limit_tokens_from_string +from swarms.utils.data_to_text import data_to_text # Utils @@ -52,15 +53,10 @@ def agent_id(): class Agent: """ - Agent is the structure that provides autonomy to any llm in a reliable and effective fashion. - The agent structure is designed to be used with any llm and provides the following features: + Agent is the backbone to connect LLMs with tools and long term memory. Agent also provides the ability to + ingest any type of docs like PDFs, Txts, Markdown, Json, and etc for the agent. Here is a list of features. + - Features: - * Interactive, AI generates, then user input - * Message history and performance history fed -> into context -> truncate if too long - * Ability to save and load flows - * Ability to provide feedback on responses - * Ability to provide a loop interval Args: llm (Any): The language model to use @@ -191,6 +187,7 @@ class Agent: traceback: Any = None, traceback_handlers: Any = None, streaming_on: Optional[bool] = False, + docs: List[str] = None, *args, **kwargs: Any, ): @@ -234,9 +231,8 @@ class Agent: self.traceback = traceback self.traceback_handlers = traceback_handlers self.streaming_on = streaming_on - - # self.system_prompt = AGENT_SYSTEM_PROMPT_3 - + self.docs = docs + # The max_loops will be set dynamically if the dynamic_loop if self.dynamic_loops: self.max_loops = "auto" @@ -266,6 +262,12 @@ class Agent: self.tools_prompt_prep(self.tool_docs, SCENARIOS) ) + # self.short_memory_test = Conversation(time_enabled=True) + + # If the docs exist then ingest the docs + if self.docs: + self.ingest_docs(self.docs) + def set_system_prompt(self, system_prompt: str): """Set the system prompt""" self.system_prompt = system_prompt @@ -640,10 +642,6 @@ class Agent: AGENT_SYSTEM_PROMPT_3, response ) - # # Retreiving long term memory - # if self.memory: - # task = self.agent_memory_prompt(response, task) - attempt = 0 while attempt < self.retry_attempts: try: @@ -717,6 +715,15 @@ class Agent: print(f"Error running agent: {error}") raise + def __call__(self, task: str, img: str = None, *args, **kwargs): + """Call the agent + + Args: + task (str): _description_ + img (str, optional): _description_. Defaults to None. + """ + self.run(task, img, *args, **kwargs) + def _run(self, **kwargs: Any) -> str: """Run the agent on a task @@ -823,20 +830,6 @@ class Agent: except Exception as error: print(colored(f"Error running bulk run: {error}", "red")) - @staticmethod - def from_llm_and_template(llm: Any, template: str) -> "Agent": - """Create AgentStream from LLM and a string template.""" - return Agent(llm=llm, template=template) - - @staticmethod - def from_llm_and_template_file( - llm: Any, template_file: str - ) -> "Agent": - """Create AgentStream from LLM and a template file.""" - with open(template_file, "r") as f: - template = f.read() - return Agent(llm=llm, template=template) - def save(self, file_path) -> None: """Save the agent history to a file. @@ -1352,3 +1345,17 @@ class Agent: ‘‘‘ """ return PROMPT + + def ingest_docs(self, docs: List[str], *args, **kwargs): + """Ingest the docs into the memory + + Args: + docs (List[str]): _description_ + + Returns: + _type_: _description_ + """ + for doc in docs: + data = data_to_text(doc) + + return self.short_memory.append(data) diff --git a/swarms/utils/__init__.py b/swarms/utils/__init__.py index c1479507..77c99ccd 100644 --- a/swarms/utils/__init__.py +++ b/swarms/utils/__init__.py @@ -12,6 +12,12 @@ from swarms.utils.prep_torch_model_inference import ( prep_torch_inference, ) from swarms.utils.token_count_tiktoken import limit_tokens_from_string +from swarms.utils.data_to_text import ( + csv_to_text, + json_to_text, + txt_to_text, + data_to_text, +) __all__ = [ @@ -27,4 +33,8 @@ __all__ = [ "prep_torch_inference", "print_class_parameters", "check_device", + "csv_to_text", + "json_to_text", + "txt_to_text", + "data_to_text", ] diff --git a/swarms/utils/data_to_text.py b/swarms/utils/data_to_text.py new file mode 100644 index 00000000..5e220f19 --- /dev/null +++ b/swarms/utils/data_to_text.py @@ -0,0 +1,49 @@ +import os +import csv +import json +from swarms.utils.pdf_to_text import pdf_to_text + + +def csv_to_text(file): + with open(file, "r") as file: + reader = csv.reader(file) + data = list(reader) + return str(data) + + +def json_to_text(file): + with open(file, "r") as file: + data = json.load(file) + return json.dumps(data) + + +def txt_to_text(file): + with open(file, "r") as file: + data = file.read() + return data + + +def data_to_text(file): + """ + Converts the given data file to text format. + + Args: + file (str): The path to the data file. + + Returns: + str: The text representation of the data file. + + Raises: + ValueError: If the file extension is not supported. + """ + _, ext = os.path.splitext(file) + if ext == ".csv": + return csv_to_text(file) + elif ext == ".json": + return json_to_text(file) + elif ext == ".txt": + return txt_to_text(file) + elif ext == ".pdf": + return pdf_to_text(file) + else: + raise ValueError(f"Unsupported file extension: {ext}")