[FEATS][Agent][Output Parser] [Excution sandbox]

2 years ago · 87c0d7477c
parent c529244913
commit 87c0d7477c
18 changed files with 810 additions and 58 deletions
--- a/example.py
+++ b/example.py
@ -11,4 +11,4 @@ agent = Agent(
 )

 # Run the workflow on a task
-agent.run("Generate a 10,000 word blog on health and wellness.")
+agent("Generate a 10,000 word blog on health and wellness.")
--- a/multi_modal_rag_agent.py
+++ b/multi_modal_rag_agent.py
@ -24,7 +24,7 @@ memory = ChromaDB(
    metric="cosine",
    n_results=3,
    multimodal=True,
-    docs_folder="images",
+    # docs_folder="images",
    output_dir="results",
 )

@ -61,7 +61,7 @@ agent = Agent(
    autosave=True,
    sop=VISUAL_CHAIN_OF_THOUGHT,
    verbose=True,
-    tools=[make_new_file],
+    # tools=[make_new_file],
    long_term_memory=memory,
 )

--- a/playground/demos/langchain_example/langchain_example.py
+++ b/playground/demos/langchain_example/langchain_example.py
@ -0,0 +1,26 @@
+import os
+from dotenv import load_dotenv
+from swarms import Agent
+from langchain.llms import OpenAIChat
+
+# Loading environment variables from .env file
+load_dotenv()
+
+# Initialize the model
+llm = OpenAIChat(
+    openai_api_key=os.getenv("OPENAI_API_KEY"),
+    max_tokens=1000,
+)
+
+# Initialize the agent
+agent = Agent(
+    llm=llm,
+    max_loops="auto",
+    autosave=True,
+    dashboard=False,
+    streaming_on=True,
+    verbose=True,
+)
+
+# Run the workflow on a task
+agent.run("Generate a 10,000 word blog on health and wellness.")
--- a/playground/demos/simple_rag/simple_rag.py
+++ b/playground/demos/simple_rag/simple_rag.py
@ -0,0 +1,30 @@
+from swarms import Agent, OpenAIChat, ChromaDB
+
+# Making an instance of the ChromaDB class
+memory = ChromaDB(
+    metric="cosine",
+    n_results=3,
+    output_dir="results",
+    docs_folder="docs",
+)
+
+# Initializing the agent with the Gemini instance and other parameters
+agent = Agent(
+    agent_name="Covid-19-Chat",
+    agent_description=(
+        "This agent provides information about COVID-19 symptoms."
+    ),
+    llm=OpenAIChat(),
+    max_loops="auto",
+    autosave=True,
+    verbose=True,
+    long_term_memory=memory,
+    stopping_condition="finish",
+)
+
+# Defining the task and image path
+task = ("What are the symptoms of COVID-19?",)
+
+# Running the agent with the specified task and image
+out = agent.run(task)
+print(out)
--- a/playground/demos/simple_rag/simple_rag_text.py
+++ b/playground/demos/simple_rag/simple_rag_text.py
@ -0,0 +1,24 @@
+# Text embeddings, image embeddings, and multimodal embeddings
+# Add text and image embeddings into postgresl database
+
+from swarms.models.jina_embeds import JinaEmbeddings
+from swarms.models.gigabind import Gigabind
+
+# Model
+model = JinaEmbeddings(
+    max_length=8192,
+    device="cuda",
+    quantize=True,
+    huggingface_api_key="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",
+)
+
+
+# Encode text
+
+embeddings = model("Encode this super long document text")
+
+
+# Embed images or text
+model = Gigabind()
+
+multi_modal_embeddings = model(text=[text], imgs=[img1, img2, img3])
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

 [tool.poetry]
 name = "swarms"
-version = "4.1.1"
+version = "4.1.3"
 description = "Swarms - Pytorch"
 license = "MIT"
 authors = ["Kye Gomez <kye@apac.ai>"]
--- a/swarms/memory/chroma_db.py
+++ b/swarms/memory/chroma_db.py
@ -119,7 +119,6 @@ class ChromaDB:
            display_markdown_message(
                f"Traversing directory: {docs_folder}"
            )
-            self.docs = docs_folder
            self.traverse_directory()

    def add(
@ -198,7 +197,7 @@ class ChromaDB:
            ".png",
        ]
        images = []
-        for root, dirs, files in os.walk(self.docs):
+        for root, dirs, files in os.walk(self.docs_folder):
            for file in files:
                _, ext = os.path.splitext(file)
                if ext.lower() in image_extensions:
--- a/swarms/models/chest_agent.py
+++ b/swarms/models/chest_agent.py
@ -0,0 +1,89 @@
+import io
+import requests
+import torch
+from PIL import Image
+from transformers import (
+    AutoModelForCausalLM,
+    AutoProcessor,
+    GenerationConfig,
+)
+from swarms.models.base_multimodal_model import (
+    BaseMultiModalModel,
+)  # noqa: F401
+
+
+class ChestMultiModalAgent(BaseMultiModalModel):
+    """
+    Initialize the ChestAgent.
+
+    Args:
+        device (str): The device to run the model on. Default is "cuda".
+        dtype (torch.dtype): The data type to use for the model. Default is torch.float16.
+        model_name (str): The name or path of the pre-trained model to use. Default is "StanfordAIMI/CheXagent-8b".
+
+    Example:
+        >>> agent = ChestAgent()
+        >>> agent.run("What are the symptoms of COVID-19?", "https://example.com/image.jpg")
+
+    """
+
+    def __init__(
+        self,
+        device="cuda",
+        dtype=torch.float16,
+        model_name="StanfordAIMI/CheXagent-8b",
+        *args,
+        **kwargs,
+    ):
+        # Step 1: Setup constants
+        self.device = device
+        self.dtype = dtype
+
+        # Step 2: Load Processor and Model
+        self.processor = AutoProcessor.from_pretrained(
+            model_name, trust_remote_code=True
+        )
+        self.generation_config = GenerationConfig.from_pretrained(
+            model_name
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=self.dtype,
+            trust_remote_code=True,
+            *args,
+            **kwargs,
+        )
+
+    def run(self, task: str, img: str, *args, **kwargs):
+        """
+        Run the ChestAgent to generate findings based on an image and a prompt.
+
+        Args:
+            image_path (str): The URL or local path of the image.
+            prompt (str): The prompt to use for generating findings.
+
+        Returns:
+            str: The generated findings.
+        """
+        # Step 3: Fetch the images
+        images = [
+            Image.open(io.BytesIO(requests.get(img).content)).convert(
+                "RGB"
+            )
+        ]
+
+        # Step 4: Generate the Findings section
+        inputs = self.processor(
+            images=images,
+            text=f" USER: <s>{task} ASSISTANT: <s>",
+            return_tensors="pt",
+        ).to(device=self.device, dtype=self.dtype)
+        output = self.model.generate(
+            **inputs,
+            generation_config=self.generation_config,
+        )[0]
+        response = self.processor.tokenizer.decode(
+            output, skip_special_tokens=True
+        )
+
+        return response
--- a/swarms/models/jina_embeds.py
+++ b/swarms/models/jina_embeds.py
@ -1,3 +1,4 @@
+import os
 import logging

 import torch
@ -8,67 +9,71 @@ from transformers import (
    AutoTokenizer,
    BitsAndBytesConfig,
 )
+from swarms.models.base_embedding_model import BaseEmbeddingModel


 def cos_sim(a, b):
    return a @ b.T / (norm(a) * norm(b))


-class JinaEmbeddings:
+class JinaEmbeddings(BaseEmbeddingModel):
    """
-    A class for running inference on a given model.
-
-    Attributes:
-        model_id (str): The ID of the model.
-        device (str): The device to run the model on (either 'cuda' or 'cpu').
-        max_length (int): The maximum length of the output sequence.
-        quantize (bool, optional): Whether to use quantization. Defaults to False.
-        quantization_config (dict, optional): The configuration for quantization.
-        verbose (bool, optional): Whether to print verbose logs. Defaults to False.
-        logger (logging.Logger, optional): The logger to use. Defaults to a basic logger.
-
-    # Usage
-    ```
-    from swarms.models import JinaEmbeddings
-
-    model = JinaEmbeddings()
-
-    embeddings = model("Encode this text")
-
-    print(embeddings)
-
-
-    ```
+    Jina Embeddings model.
+
+    Args:
+        model_id (str): The model id to use. Default is "jinaai/jina-embeddings-v2-base-en".
+        device (str): The device to run the model on. Default is "cuda".
+        huggingface_api_key (str): The Hugging Face API key. Default is None.
+        max_length (int): The maximum length of the response. Default is 500.
+        quantize (bool): Whether to quantize the model. Default is False.
+        quantization_config (dict): The quantization configuration. Default is None.
+        verbose (bool): Whether to print verbose logs. Default is False.
+        distributed (bool): Whether to use distributed processing. Default is False.
+        decoding (bool): Whether to use decoding. Default is False.
+        cos_sim (callable): The cosine similarity function. Default is cos_sim.
+
+    Methods:
+        run: _description_
+
+    Examples:
+        >>> model = JinaEmbeddings(
+        >>>     max_length=8192,
+        >>>     device="cuda",
+        >>>     quantize=True,
+        >>>     huggingface_api_key="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"
+        >>> )
+        >>> embeddings = model("Encode this super long document text")
    """

    def __init__(
        self,
-        model_id: str,
+        model_id: str = "jinaai/jina-embeddings-v2-base-en",
        device: str = None,
+        huggingface_api_key: str = None,
        max_length: int = 500,
        quantize: bool = False,
        quantization_config: dict = None,
        verbose=False,
-        # logger=None,
        distributed=False,
        decoding=False,
-        cos_sim: bool = False,
+        cos_sim: callable = cos_sim,
        *args,
        **kwargs,
    ):
+        super().__init__(*args, **kwargs)
        self.logger = logging.getLogger(__name__)
        self.device = (
            device
            if device
            else ("cuda" if torch.cuda.is_available() else "cpu")
        )
+        self.huggingface_api_key = huggingface_api_key
        self.model_id = model_id
        self.max_length = max_length
        self.verbose = verbose
        self.distributed = distributed
        self.decoding = decoding
        self.model, self.tokenizer = None, None
-        # self.log = Logging()
        self.cos_sim = cos_sim

        if self.distributed:
@ -76,6 +81,10 @@ class JinaEmbeddings:
                torch.cuda.device_count() > 1
            ), "You need more than 1 gpu for distributed processing"

+        # If API key then set it
+        if self.huggingface_api_key:
+            os.environ["HF_TOKEN"] = self.huggingface_api_key
+
        bnb_config = None
        if quantize:
            if not quantization_config:
@ -101,7 +110,6 @@ class JinaEmbeddings:
            )
            raise

-    def load_model(self):
        """Load the model"""
        if not self.model or not self.tokenizer:
            try:
@ -130,7 +138,7 @@ class JinaEmbeddings:
                )
                raise

-    def run(self, task: str):
+    def run(self, task: str, *args, **kwargs):
        """
        Generate a response based on the prompt text.

@ -141,13 +149,12 @@ class JinaEmbeddings:
        Returns:
        - Generated text (str).
        """
-        self.load_model()

        max_length = self.max_length

        try:
            embeddings = self.model.encode(
-                [task], max_length=max_length
+                [task], max_length=max_length, *args, **kwargs
            )

            if self.cos_sim:
@ -181,7 +188,7 @@ class JinaEmbeddings:
        # Wrapping synchronous calls with async
        return self.run(task, *args, **kwargs)

-    def __call__(self, task: str):
+    def __call__(self, task: str, *args, **kwargs):
        """
        Generate a response based on the prompt text.

@ -198,7 +205,7 @@ class JinaEmbeddings:

        try:
            embeddings = self.model.encode(
-                [task], max_length=max_length
+                [task], max_length=max_length, *args, **kwargs
            )

            if self.cos_sim:
@ -231,3 +238,6 @@ class JinaEmbeddings:
            return {"allocated": allocated, "reserved": reserved}
        else:
            return {"error": "GPU not available"}
+
+    def try_embed_chunk(self, chunk: str) -> list[float]:
+        return super().try_embed_chunk(chunk)
--- a/swarms/prompts/worker_prompt.py
+++ b/swarms/prompts/worker_prompt.py
@ -3,8 +3,8 @@ import datetime
 time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")


-def worker_tools_sop_promp(name: str, memory: str = None):
-    return """
+def worker_tools_sop_promp(name: str, memory: str):
+    out = """
    You are {name}, 
    Your decisions must always be made independently without seeking user assistance. 
    Play to your strengths as an LLM and pursue simple strategies with no legal complications.
@ -45,7 +45,7 @@ def worker_tools_sop_promp(name: str, memory: str = None):
        "thoughts": {
            "text": "thought",
            "reasoning": "reasoning",
-            "plan": "- short bulleted\n- list that conveys\n- long-term plan",
+            "plan": "- short bulleted - list that conveys - long-term plan",
            "criticism": "constructive self-criticism",
            "speak": "thoughts summary to say to user"
        },
@ -62,4 +62,6 @@ def worker_tools_sop_promp(name: str, memory: str = None):
    [{memory}]
    
    Human: Determine which next command to use, and respond using the format specified above:
-    """.format(name=name, memory=memory)
+    """.format(name=name, memory=memory, time=time)
+
+    return str(out)
--- a/swarms/structs/init.py
+++ b/swarms/structs/init.py
@ -58,6 +58,12 @@ from swarms.structs.utils import (
    parse_tasks,
 )
 from swarms.structs.tool_json_schema import JSON
+from swarms.structs.majority_voting import (
+    most_frequent,
+    parse_code_completion,
+    majority_voting,
+)
+

 __all__ = [
    "Agent",
@ -114,4 +120,7 @@ __all__ = [
    "StepRequestBody",
    "TaskRequestBody",
    "JSON",
+    "most_frequent",
+    "parse_code_completion",
+    "majority_voting",
 ]
--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@ -180,6 +180,7 @@ class Agent:
        docs: List[str] = None,
        docs_folder: str = None,
        verbose: bool = False,
+        parser: Optional[Callable] = None,
        *args,
        **kwargs,
    ):
@ -224,6 +225,7 @@ class Agent:
        self.docs = docs
        self.docs_folder = docs_folder
        self.verbose = verbose
+        self.parser = parser

        # The max_loops will be set dynamically if the dynamic_loop
        if self.dynamic_loops:
@ -268,12 +270,24 @@ class Agent:

        # If tools are provided then set the tool prompt by adding to sop
        if self.tools:
-            self.sop = self.sop + worker_tools_sop_promp(
-                self.agent_name, memory=""
+            tools_prompt = worker_tools_sop_promp(
+                name=self.agent_name,
+                memory=self.short_memory.return_history_as_string(),
            )

+            # Append the tools prompt to the sop
+            self.sop = f"{self.sop}\n{tools_prompt}"
+
        # If the long term memory is provided then set the long term memory prompt

+        # Agentic stuff
+        self.reply = ""
+        self.question = None
+        self.answer = ""
+
+        # Initialize the llm with the conditional variables
+        # self.llm = llm(*args, **kwargs)
+
    def set_system_prompt(self, system_prompt: str):
        """Set the system prompt"""
        self.system_prompt = system_prompt
@ -551,6 +565,10 @@ class Agent:
                            )
                            print(response)

+                        # If parser exists then parse the response
+                        if self.parser:
+                            response = self.parser(response)
+
                        # If code interpreter is enabled then run the code
                        if self.code_interpreter:
                            self.run_code(response)
--- a/swarms/structs/majority_voting.py
+++ b/swarms/structs/majority_voting.py
@ -0,0 +1,97 @@
+import re
+from collections import Counter
+
+
+def extract_last_python_code_block(text):
+    """
+    Extracts the last Python code block from the given text.
+
+    Args:
+        text (str): The text to search for Python code blocks.
+
+    Returns:
+        str or None: The last Python code block found in the text, or None if no code block is found.
+    """
+    # The regular expression pattern for Python code blocks
+    pattern = r"```[pP]ython(.*?)```"
+
+    # Find all matches in the text
+    matches = re.findall(pattern, text, re.DOTALL)
+
+    # If there are matches, return the last one
+    if matches:
+        return matches[-1].strip()
+    else:
+        return None
+
+
+def parse_code_completion(agent_response, question):
+    """
+    Parses the code completion response from the agent and extracts the last Python code block.
+
+    Args:
+        agent_response (str): The response from the agent.
+        question (str): The original question.
+
+    Returns:
+        tuple: A tuple containing the parsed Python code and a boolean indicating success.
+    """
+    python_code = extract_last_python_code_block(agent_response)
+    if python_code is None:
+        if agent_response.count("impl]") == 0:
+            python_code = agent_response
+        else:
+            python_code_lines = agent_response.split("\n")
+            python_code = ""
+            in_func = False
+            for line in python_code_lines:
+                if in_func:
+                    python_code += line + "\n"
+                if "impl]" in line:
+                    in_func = True
+    if python_code.count("def") == 0:
+        python_code = question + python_code
+    return python_code, True
+
+
+def most_frequent(
+    clist: list,
+    cmp_func: callable = None,
+):
+    """
+    Finds the most frequent element in a list based on a comparison function.
+
+    Args:
+        clist (list): The list of elements to search.
+        cmp_func (function, optional): The comparison function used to determine the frequency of elements.
+            If not provided, the default comparison function is used.
+
+    Returns:
+        tuple: A tuple containing the most frequent element and its frequency.
+    """
+    counter = 0
+    num = clist[0]
+
+    for i in clist:
+        current_frequency = sum(cmp_func(i, item) for item in clist)
+        print(current_frequency)
+        if current_frequency > counter:
+            counter = current_frequency
+            num = i
+
+    return num, counter
+
+
+def majority_voting(answers: list):
+    """
+    Performs majority voting on a list of answers and returns the most common answer.
+
+    Args:
+        answers (list): A list of answers.
+
+    Returns:
+        The most common answer in the list.
+    """
+    counter = Counter(answers)
+    answer = counter.most_common(1)[0][0]
+    return answer
--- a/swarms/tools/format_tools.py
+++ b/swarms/tools/format_tools.py
@ -13,6 +13,23 @@ GENERATION_MARKER = "|GENERATION|"


 class Jsonformer:
+    """
+    Initializes the FormatTools class.
+
+    Args:
+        model (PreTrainedModel): The pre-trained model.
+        tokenizer (PreTrainedTokenizer): The tokenizer for the model.
+        json_schema (Dict[str, Any]): The JSON schema.
+        prompt (str): The prompt for generation.
+
+    Keyword Args:
+        debug (bool, optional): Whether to enable debug mode. Defaults to False.
+        max_array_length (int, optional): The maximum length of an array. Defaults to 10.
+        max_number_tokens (int, optional): The maximum number of tokens for numbers. Defaults to 6.
+        temperature (float, optional): The temperature for generation. Defaults to 1.0.
+        max_string_token_length (int, optional): The maximum length of a string token. Defaults to 10.
+    """
+
    value: Dict[str, Any] = {}

    def __init__(
@ -57,6 +74,19 @@ class Jsonformer:
    def generate_number(
        self, temperature: Union[float, None] = None, iterations=0
    ):
+        """
+        Generates a number based on the given prompt.
+
+        Args:
+            temperature (float, optional): The temperature value for number generation. Defaults to None.
+            iterations (int, optional): The number of iterations for generating a valid number. Defaults to 0.
+
+        Returns:
+            float: The generated number.
+
+        Raises:
+            ValueError: If a valid number cannot be generated after 3 iterations.
+        """
        prompt = self.get_prompt()
        self.debug("[generate_number]", prompt, is_prompt=True)
        input_tokens = self.tokenizer.encode(
@ -94,6 +124,12 @@ class Jsonformer:
            )

    def generate_boolean(self) -> bool:
+        """
+        Generates a boolean value based on the given prompt.
+
+        Returns:
+            bool: The generated boolean value.
+        """
        prompt = self.get_prompt()
        self.debug("[generate_boolean]", prompt, is_prompt=True)

--- a/swarms/utils/check_function_result.py
+++ b/swarms/utils/check_function_result.py
@ -0,0 +1,249 @@
+import signal
+import platform
+import os
+import multiprocessing
+import tempfile
+import contextlib
+from typing import Dict, Optional
+import faulthandler
+import io
+
+
+class WriteOnlyStringIO(io.StringIO):
+    """StringIO that throws an exception when it's read from"""
+
+    def read(self, *args, **kwargs):
+        raise IOError
+
+    def readline(self, *args, **kwargs):
+        raise IOError
+
+    def readlines(self, *args, **kwargs):
+        raise IOError
+
+    def readable(self, *args, **kwargs):
+        """Returns True if the IO object can be read."""
+        return False
+
+
+@contextlib.contextmanager
+def chdir(root):
+    """
+    Context manager that changes the current working directory to the specified root directory.
+
+    Args:
+        root (str): The root directory to change to.
+
+    Yields:
+        None
+
+    Raises:
+        BaseException: If an error occurs within the context.
+
+    Returns:
+        None
+    """
+    if root == ".":
+        yield
+        return
+    cwd = os.getcwd()
+    os.chdir(root)
+    try:
+        yield
+    except BaseException as error:
+        raise error
+    finally:
+        os.chdir(cwd)
+
+
+@contextlib.contextmanager
+def create_tempdir():
+    with tempfile.TemporaryDirectory() as dirname:
+        with chdir(dirname):
+            yield dirname
+
+
+@contextlib.contextmanager
+def swallow_io():
+    """
+    A context manager that redirects stdout, stderr, and stdin to a WriteOnlyStringIO object.
+    This allows for capturing and suppressing the output of a function.
+
+    Usage:
+    with swallow_io():
+        # code to capture output
+
+    """
+    stream = WriteOnlyStringIO()
+    with contextlib.redirect_stdout(stream):
+        with contextlib.redirect_stderr(stream):
+            with redirect_stdin(stream):
+                yield
+
+
+class redirect_stdin(contextlib._RedirectStream):  # type: ignore
+    _stream = "stdin"
+
+
+@contextlib.contextmanager
+def time_limit(seconds: float):
+    def signal_handler(signum, frame):
+        raise TimeoutError("Timed out")
+
+    signal.setitimer(signal.ITIMER_REAL, seconds)
+    signal.signal(signal.SIGALRM, signal_handler)
+    try:
+        yield
+    finally:
+        signal.setitimer(signal.ITIMER_REAL, 0)
+
+
+def check_function_result(
+    python_code: str, timeout: float = 5.0
+) -> Dict:
+    """
+    Evaluates the functional correctness of a completion by running the test
+    suite provided in the problem.
+
+    :param completion_id: an optional completion ID so we can match
+        the results later even if execution finishes asynchronously.
+    """
+
+    def unsafe_execute():
+        with create_tempdir():
+            # These system calls are needed when cleaning up tempdir.
+            import os
+            import shutil
+
+            rmtree = shutil.rmtree
+            rmdir = os.rmdir
+            chdir = os.chdir
+
+            # Disable functionalities that can make destructive changes to the test.
+            reliability_guard()
+
+            # Construct the check program and run it.
+            check_program = python_code + "\n"
+
+            try:
+                exec_globals = {}
+                with swallow_io():
+                    with time_limit(timeout):
+                        exec(check_program, exec_globals)
+                result.append("passed")
+            except TimeoutError:
+                result.append("timed out")
+            except BaseException as e:
+                result.append(f"failed: {e}")
+
+            # Needed for cleaning up.
+            shutil.rmtree = rmtree
+            os.rmdir = rmdir
+            os.chdir = chdir
+
+    manager = multiprocessing.Manager()
+    result = manager.list()
+
+    p = multiprocessing.Process(target=unsafe_execute)
+    p.start()
+    p.join(timeout=timeout + 1)
+    if p.is_alive():
+        p.kill()
+
+    if not result:
+        result.append("timed out")
+
+    return dict(
+        passed=result[0] == "passed",
+        result=result[0],
+    )
+
+
+def reliability_guard(maximum_memory_bytes: Optional[int] = None):
+    """
+    This disables various destructive functions and prevents the generated code
+    from interfering with the test (e.g. fork bomb, killing other processes,
+    removing filesystem files, etc.)
+
+    WARNING
+    This function is NOT a security sandbox. Untrusted code, including, model-
+    generated code, should not be blindly executed outside of one. See the
+    Codex paper for more information about OpenAI's code sandbox, and proceed
+    with caution.
+    """
+
+    if maximum_memory_bytes is not None:
+        import resource
+
+        resource.setrlimit(
+            resource.RLIMIT_AS,
+            (maximum_memory_bytes, maximum_memory_bytes),
+        )
+        resource.setrlimit(
+            resource.RLIMIT_DATA,
+            (maximum_memory_bytes, maximum_memory_bytes),
+        )
+        if not platform.uname().system == "Darwin":
+            resource.setrlimit(
+                resource.RLIMIT_STACK,
+                (maximum_memory_bytes, maximum_memory_bytes),
+            )
+
+    faulthandler.disable()
+
+    import builtins
+
+    builtins.exit = None
+    builtins.quit = None
+
+    import os
+
+    os.environ["OMP_NUM_THREADS"] = "1"
+
+    os.kill = None
+    os.system = None
+    os.putenv = None
+    os.remove = None
+    os.removedirs = None
+    os.rmdir = None
+    os.fchdir = None
+    os.setuid = None
+    os.fork = None
+    os.forkpty = None
+    os.killpg = None
+    os.rename = None
+    os.renames = None
+    os.truncate = None
+    os.replace = None
+    os.unlink = None
+    os.fchmod = None
+    os.fchown = None
+    os.chmod = None
+    os.chown = None
+    os.chroot = None
+    os.fchdir = None
+    os.lchflags = None
+    os.lchmod = None
+    os.lchown = None
+    os.getcwd = None
+    os.chdir = None
+
+    import shutil
+
+    shutil.rmtree = None
+    shutil.move = None
+    shutil.chown = None
+
+    import subprocess
+
+    subprocess.Popen = None  # type: ignore
+
+    __builtins__["help"] = None
+
+    import sys
+
+    sys.modules["ipdb"] = None
+    sys.modules["joblib"] = None
+    sys.modules["resource"] = None
+    sys.modules["psutil"] = None
+    sys.modules["tkinter"] = None
--- a/swarms/utils/data_to_text.py
+++ b/swarms/utils/data_to_text.py
@ -23,6 +23,16 @@ def txt_to_text(file):
    return data


+def md_to_text(file):
+    if not os.path.exists(file):
+        raise FileNotFoundError(
+            f"No such file or directory: '{file}'"
+        )
+    with open(file, "r") as file:
+        data = file.read()
+    return data
+
+
 def data_to_text(file):
    """
    Converts the given data file to text format.
@ -34,16 +44,35 @@ def data_to_text(file):
        str: The text representation of the data file.

    Raises:
-        ValueError: If the file extension is not supported.
+        FileNotFoundError: If the file does not exist.
+        IOError: If there is an error reading the file.
+
+    Examples:
+        >>> data_to_text("data.csv")
+        'This is the text representation of the data file.'
+
    """
-    _, ext = os.path.splitext(file)
-    if ext == ".csv":
-        return csv_to_text(file)
-    elif ext == ".json":
-        return json_to_text(file)
-    elif ext == ".txt":
-        return txt_to_text(file)
-    elif ext == ".pdf":
-        return pdf_to_text(file)
-    else:
-        raise ValueError(f"Unsupported file extension: {ext}")
+    if not os.path.exists(file):
+        raise FileNotFoundError(f"File not found: {file}")
+
+    try:
+        _, ext = os.path.splitext(file)
+        ext = (
+            ext.lower()
+        )  # Convert extension to lowercase for case-insensitive comparison
+        if ext == ".csv":
+            return csv_to_text(file)
+        elif ext == ".json":
+            return json_to_text(file)
+        elif ext == ".txt":
+            return txt_to_text(file)
+        elif ext == ".pdf":
+            return pdf_to_text(file)
+        elif ext == ".md":
+            return md_to_text(file)
+        else:
+            with open(file, "r") as file:
+                data = file.read()
+            return data
+    except Exception as e:
+        raise IOError(f"Error reading file: {file}") from e
--- a/swarms/utils/execution_sandbox.py
+++ b/swarms/utils/execution_sandbox.py
@ -0,0 +1,71 @@
+import asyncio
+import logging
+import traceback
+from typing import Tuple
+
+
+async def execute_code_async(code: str) -> Tuple[str, str]:
+    """
+    This function takes a string of code as input, adds some documentation to it,
+    and then attempts to execute the code asynchronously. If the code execution is successful,
+    the function returns the new code and an empty string. If the code execution
+    fails, the function returns the new code and the error message.
+
+    Args:
+        code (str): The original code.
+
+    Returns:
+        Tuple[str, str]: The new code with added documentation and the error message (if any).
+    """
+
+    # Validate the input
+    if not isinstance(code, str):
+        raise ValueError("The code must be a string.")
+
+    # Add some documentation to the code
+    documentation = """
+    '''
+    This code has been prepared for deployment in an execution sandbox.
+    '''
+    """
+
+    # Combine the documentation and the original code
+    new_code = documentation + "\n" + code
+
+    # Attempt to execute the code
+    error_message = ""
+    try:
+        # Use a secure environment to execute the code (e.g., a Docker container)
+        # This is just a placeholder and would require additional setup and dependencies
+        # exec_in_docker(new_code)
+        out = exec(new_code)
+        return out
+        # logging.info("Code executed successfully.")
+    except Exception:
+        error_message = traceback.format_exc()
+        logging.error(
+            "Code execution failed. Error: %s", error_message
+        )
+
+    # Return the new code and the error message
+    return out, error_message
+
+
+def execute_code_sandbox(
+    code: str, async_on: bool = False
+) -> Tuple[str, str]:
+    """
+    Executes the given code in a sandbox environment.
+
+    Args:
+        code (str): The code to be executed.
+        async_on (bool, optional): Indicates whether to execute the code asynchronously.
+                                   Defaults to False.
+
+    Returns:
+        Tuple[str, str]: A tuple containing the stdout and stderr outputs of the code execution.
+    """
+    if async_on:
+        return asyncio.run(execute_code_async(code))
+    else:
+        return execute_code_async(code)
--- a/swarms/utils/jsonl_utils.py
+++ b/swarms/utils/jsonl_utils.py
@ -0,0 +1,63 @@
+from typing import Iterable, Dict
+import gzip
+import json
+import os
+
+
+ROOT = os.path.dirname(os.path.abspath(__file__))
+
+
+def read_problems_from_jsonl(filename: str) -> Iterable[Dict]:
+    return {task["task_id"]: task for task in stream_jsonl(filename)}
+
+
+def stream_jsonl(filename: str) -> Iterable[Dict]:
+    """
+    Stream JSONL data from a file.
+
+    Args:
+        filename (str): The path to the JSONL file.
+
+    Yields:
+        Dict: A dictionary representing each JSON object in the file.
+    """
+    if filename.endswith(".gz"):
+        with open(filename, "rb") as gzfp:
+            with gzip.open(gzfp, "rt") as fp:
+                for line in fp:
+                    if any(not x.isspace() for x in line):
+                        yield json.loads(line)
+
+    else:
+        with open(filename, "r") as fp:
+            for line in fp:
+                if any(not x.isspace() for x in line):
+                    yield json.loads(line)
+
+
+def write_jsonl(
+    filename: str, data: Iterable[Dict], append: bool = False
+):
+    """
+    Write a list of dictionaries to a JSONL file.
+
+    Args:
+        filename (str): The path to the output file.
+        data (Iterable[Dict]): The data to be written to the file.
+        append (bool, optional): If True, append to an existing file.
+            If False, overwrite the file. Defaults to False.
+    """
+    if append:
+        mode = "ab"
+    else:
+        mode = "wb"
+    filename = os.path.expanduser(filename)
+    if filename.endswith(".gz"):
+        with open(filename, mode) as fp:
+            with gzip.GzipFile(fileobj=fp, mode="wb") as gzfp:
+                for x in data:
+                    gzfp.write(json.dumps(x) + "\n").encode("utf-8")
+    else:
+        with open(filename, mode) as fp:
+            for x in data:
+                fp.write((json.dumps(x) + "\n").encode("utf-8"))