[FEATS][ csv_to_text] [json_to_text] [txt_to_text] [data_to_text] [Agent][ingest_docs]

2 years ago · ef4759472e
parent b61c250140
commit ef4759472e
3 changed files with 95 additions and 29 deletions
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@ -29,6 +29,7 @@ from swarms.utils.parse_code import (
 )
 from swarms.utils.pdf_to_text import pdf_to_text
 from swarms.utils.token_count_tiktoken import limit_tokens_from_string
 from swarms.utils.data_to_text import data_to_text
 # Utils
@ -52,15 +53,10 @@ def agent_id():
 class Agent:
    """
-    Agent is the structure that provides autonomy to any llm in a reliable and effective fashion.
+    Agent is the backbone to connect LLMs with tools and long term memory. Agent also provides the ability to
-    The agent structure is designed to be used with any llm and provides the following features:
+    ingest any type of docs like PDFs, Txts, Markdown, Json, and etc for the agent. Here is a list of features.
    Features:
    * Interactive, AI generates, then user input
    * Message history and performance history fed -> into context -> truncate if too long
    * Ability to save and load flows
    * Ability to provide feedback on responses
    * Ability to provide a loop interval
    Args:
        llm (Any): The language model to use
@ -191,6 +187,7 @@ class Agent:
        traceback: Any = None,
        traceback_handlers: Any = None,
        streaming_on: Optional[bool] = False,
        docs: List[str] = None,
        *args,
        **kwargs: Any,
    ):
@ -234,8 +231,7 @@ class Agent:
        self.traceback = traceback
        self.traceback_handlers = traceback_handlers
        self.streaming_on = streaming_on
-
+        self.docs = docs
        # self.system_prompt = AGENT_SYSTEM_PROMPT_3
        # The max_loops will be set dynamically if the dynamic_loop
        if self.dynamic_loops:
@ -266,6 +262,12 @@ class Agent:
                self.tools_prompt_prep(self.tool_docs, SCENARIOS)
            )
        # self.short_memory_test = Conversation(time_enabled=True)
        # If the docs exist then ingest the docs
        if self.docs:
            self.ingest_docs(self.docs)
    def set_system_prompt(self, system_prompt: str):
        """Set the system prompt"""
        self.system_prompt = system_prompt
@ -640,10 +642,6 @@ class Agent:
                    AGENT_SYSTEM_PROMPT_3, response
                )
                # # Retreiving long term memory
                # if self.memory:
                #     task = self.agent_memory_prompt(response, task)
                attempt = 0
                while attempt < self.retry_attempts:
                    try:
@ -717,6 +715,15 @@ class Agent:
            print(f"Error running agent: {error}")
            raise
    def __call__(self, task: str, img: str = None, *args, **kwargs):
        """Call the agent
        Args:
            task (str): _description_
            img (str, optional): _description_. Defaults to None.
        """
        self.run(task, img, *args, **kwargs)
    def _run(self, **kwargs: Any) -> str:
        """Run the agent on a task
@ -823,20 +830,6 @@ class Agent:
        except Exception as error:
            print(colored(f"Error running bulk run: {error}", "red"))
    @staticmethod
    def from_llm_and_template(llm: Any, template: str) -> "Agent":
        """Create AgentStream from LLM and a string template."""
        return Agent(llm=llm, template=template)
    @staticmethod
    def from_llm_and_template_file(
        llm: Any, template_file: str
    ) -> "Agent":
        """Create AgentStream from LLM and a template file."""
        with open(template_file, "r") as f:
            template = f.read()
        return Agent(llm=llm, template=template)
    def save(self, file_path) -> None:
        """Save the agent history to a file.
@ -1352,3 +1345,17 @@ class Agent:
        ‘‘‘
        """
        return PROMPT
    def ingest_docs(self, docs: List[str], *args, **kwargs):
        """Ingest the docs into the memory
        Args:
            docs (List[str]): _description_
        Returns:
            _type_: _description_
        """
        for doc in docs:
            data = data_to_text(doc)
        return self.short_memory.append(data)
--- a/swarms/utils/init.py
+++ b/swarms/utils/init.py
@ -12,6 +12,12 @@ from swarms.utils.prep_torch_model_inference import (
    prep_torch_inference,
 )
 from swarms.utils.token_count_tiktoken import limit_tokens_from_string
 from swarms.utils.data_to_text import (
    csv_to_text,
    json_to_text,
    txt_to_text,
    data_to_text,
 )
 __all__ = [
@ -27,4 +33,8 @@ __all__ = [
    "prep_torch_inference",
    "print_class_parameters",
    "check_device",
    "csv_to_text",
    "json_to_text",
    "txt_to_text",
    "data_to_text",
 ]
--- a/swarms/utils/data_to_text.py
+++ b/swarms/utils/data_to_text.py
@ -0,0 +1,49 @@
 import os
 import csv
 import json
 from swarms.utils.pdf_to_text import pdf_to_text
 def csv_to_text(file):
    with open(file, "r") as file:
        reader = csv.reader(file)
        data = list(reader)
    return str(data)
 def json_to_text(file):
    with open(file, "r") as file:
        data = json.load(file)
    return json.dumps(data)
 def txt_to_text(file):
    with open(file, "r") as file:
        data = file.read()
    return data
 def data_to_text(file):
    """
    Converts the given data file to text format.
    Args:
        file (str): The path to the data file.
    Returns:
        str: The text representation of the data file.
    Raises:
        ValueError: If the file extension is not supported.
    """
    _, ext = os.path.splitext(file)
    if ext == ".csv":
        return csv_to_text(file)
    elif ext == ".json":
        return json_to_text(file)
    elif ext == ".txt":
        return txt_to_text(file)
    elif ext == ".pdf":
        return pdf_to_text(file)
    else:
        raise ValueError(f"Unsupported file extension: {ext}")