From 997fd1e1430429127fd977f9628d006e47f329a6 Mon Sep 17 00:00:00 2001
From: Kye <kye@apacmediasolutions.com>
Date: Tue, 26 Mar 2024 10:14:00 -0700
Subject: [PATCH] [CLEANUP]

---
 swarms/chunkers/__init__.py                   |   5 -
 swarms/chunkers/base_chunker.py               | 163 ------------------
 swarms/chunkers/chunk_seperator.py            |   7 -
 swarms/chunkers/text_chunker.py               |  13 --
 swarms/memory/__init__.py                     |   2 +-
 swarms/models/__init__.py                     |   5 +-
 swarms/models/petals.py                       |   1 +
 swarms/schedulers/__init__.py                 |   6 -
 swarms/structs/__init__.py                    |   7 +
 .../{schedulers => structs}/agent_process.py  |   0
 10 files changed, 11 insertions(+), 198 deletions(-)
 delete mode 100644 swarms/chunkers/__init__.py
 delete mode 100644 swarms/chunkers/base_chunker.py
 delete mode 100644 swarms/chunkers/chunk_seperator.py
 delete mode 100644 swarms/chunkers/text_chunker.py
 delete mode 100644 swarms/schedulers/__init__.py
 rename swarms/{schedulers => structs}/agent_process.py (100%)

diff --git a/swarms/chunkers/__init__.py b/swarms/chunkers/__init__.py
deleted file mode 100644
index b55d15c2..00000000
--- a/swarms/chunkers/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from swarms.chunkers.base_chunker import BaseChunker
-from swarms.chunkers.chunk_seperator import ChunkSeparator
-from swarms.chunkers.text_chunker import TextChunker
-
-__all__ = ["ChunkSeparator", "BaseChunker", "TextChunker"]
diff --git a/swarms/chunkers/base_chunker.py b/swarms/chunkers/base_chunker.py
deleted file mode 100644
index 47f73a4e..00000000
--- a/swarms/chunkers/base_chunker.py
+++ /dev/null
@@ -1,163 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC
-from dataclasses import dataclass, field
-
-from swarms.artifacts.text_artifact import TextArtifact
-from swarms.chunkers.chunk_seperator import ChunkSeparator
-from swarms.tokenizers.base_tokenizer import BaseTokenizer
-from swarms.tokenizers.openai_tokenizers import OpenAITokenizer
-
-
-@dataclass
-class BaseChunker(ABC):
-    """
-    Base class for chunking text into smaller chunks.
-    """
-
-    DEFAULT_SEPARATORS = [ChunkSeparator(" ")]
-
-    separators: list[ChunkSeparator] = field(
-        default_factory=lambda: BaseChunker.DEFAULT_SEPARATORS
-    )
-    tokenizer: BaseTokenizer = field(
-        default_factory=lambda: OpenAITokenizer(
-            model=OpenAITokenizer.DEFAULT_OPENAI_GPT_3_CHAT_MODEL
-        )
-    )
-    max_tokens: int = field(
-        default_factory=lambda: BaseChunker.tokenizer.max_tokens
-    )
-
-    def chunk(self, text: str | str) -> list[str]:
-        """
-        Chunk the given text into smaller chunks.
-
-        Args:
-            text (TextArtifact | str): The text to be chunked.
-
-        Returns:
-            list[TextArtifact]: The list of chunked text artifacts.
-        """
-        text = text.value if isinstance(text, str) else text
-
-        return [
-            TextArtifact(c) for c in self._chunk_recursively(text)
-        ]
-
-    def _chunk_recursively(
-        self,
-        chunk: str,
-        current_separator: ChunkSeparator | None = None,
-    ) -> list[str]:
-        """
-        Recursively chunk the given chunk into smaller subchunks.
-
-        Args:
-            chunk (str): The chunk to be recursively chunked.
-            current_separator (Optional[ChunkSeparator], optional): The current separator to be used. Defaults to None.
-
-        Returns:
-            list[str]: The list of recursively chunked subchunks.
-        """
-        token_count = self.tokenizer.count_tokens(chunk)
-
-        if token_count <= self.max_tokens:
-            return [chunk]
-        else:
-            balance_index = -1
-            balance_diff = float("inf")
-            tokens_count = 0
-            half_token_count = token_count // 2
-
-            # If a separator is provided, only use separators after it.
-            if current_separator:
-                separators = self.separators[
-                    self.separators.index(current_separator) :
-                ]
-            else:
-                separators = self.separators
-
-            # Loop through available separators to find the best split.
-            for separator in separators:
-                # Split the chunk into subchunks using the current separator.
-                subchunks = list(
-                    filter(None, chunk.split(separator.value))
-                )
-
-                # Check if the split resulted in more than one subchunk.
-                if len(subchunks) > 1:
-                    # Iterate through the subchunks and calculate token counts.
-                    for index, subchunk in enumerate(subchunks):
-                        if index < len(subchunks):
-                            if separator.is_prefix:
-                                subchunk = separator.value + subchunk
-                            else:
-                                subchunk = subchunk + separator.value
-
-                        tokens_count += self.tokenizer.count_tokens(
-                            subchunk
-                        )
-
-                        # Update the best split if the current one is more balanced.
-                        if (
-                            abs(tokens_count - half_token_count)
-                            < balance_diff
-                        ):
-                            balance_index = index
-                            balance_diff = abs(
-                                tokens_count - half_token_count
-                            )
-
-                    # Create the two subchunks based on the best separator.
-                    if separator.is_prefix:
-                        # If the separator is a prefix, append it before this subchunk.
-                        first_subchunk = (
-                            separator.value
-                            + separator.value.join(
-                                subchunks[: balance_index + 1]
-                            )
-                        )
-                        second_subchunk = (
-                            separator.value
-                            + separator.value.join(
-                                subchunks[balance_index + 1 :]
-                            )
-                        )
-                    else:
-                        # If the separator is not a prefix, append it after this subchunk.
-                        first_subchunk = (
-                            separator.value.join(
-                                subchunks[: balance_index + 1]
-                            )
-                            + separator.value
-                        )
-                        second_subchunk = separator.value.join(
-                            subchunks[balance_index + 1 :]
-                        )
-
-                    # Continue recursively chunking the subchunks.
-                    first_subchunk_rec = self._chunk_recursively(
-                        first_subchunk.strip(), separator
-                    )
-                    second_subchunk_rec = self._chunk_recursively(
-                        second_subchunk.strip(), separator
-                    )
-
-                    # Return the concatenated results of the subchunks if both are non-empty.
-                    if first_subchunk_rec and second_subchunk_rec:
-                        return (
-                            first_subchunk_rec + second_subchunk_rec
-                        )
-                    # If only one subchunk is non-empty, return it.
-                    elif first_subchunk_rec:
-                        return first_subchunk_rec
-                    elif second_subchunk_rec:
-                        return second_subchunk_rec
-                    else:
-                        return []
-            # If none of the separators result in a balanced split, split the chunk in half.
-            midpoint = len(chunk) // 2
-            return self._chunk_recursively(
-                chunk[:midpoint]
-            ) + self._chunk_recursively(chunk[midpoint:])
diff --git a/swarms/chunkers/chunk_seperator.py b/swarms/chunkers/chunk_seperator.py
deleted file mode 100644
index d554be48..00000000
--- a/swarms/chunkers/chunk_seperator.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from dataclasses import dataclass
-
-
-@dataclass
-class ChunkSeparator:
-    value: str
-    is_prefix: bool = False
diff --git a/swarms/chunkers/text_chunker.py b/swarms/chunkers/text_chunker.py
deleted file mode 100644
index b8b17bf1..00000000
--- a/swarms/chunkers/text_chunker.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from swarms.chunkers.base_chunker import BaseChunker
-from swarms.chunkers.chunk_seperator import ChunkSeparator
-
-
-class TextChunker(BaseChunker):
-    DEFAULT_SEPARATORS = [
-        ChunkSeparator("\n\n"),
-        ChunkSeparator("\n"),
-        ChunkSeparator(". "),
-        ChunkSeparator("! "),
-        ChunkSeparator("? "),
-        ChunkSeparator(" "),
-    ]
diff --git a/swarms/memory/__init__.py b/swarms/memory/__init__.py
index b92e35e1..7b56e444 100644
--- a/swarms/memory/__init__.py
+++ b/swarms/memory/__init__.py
@@ -14,4 +14,4 @@ __all__ = [
     "DictSharedMemory",
     "ShortTermMemory",
     "VisualShortTermMemory",
-]
\ No newline at end of file
+]
diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py
index f26a2eeb..18f25b53 100644
--- a/swarms/models/__init__.py
+++ b/swarms/models/__init__.py
@@ -38,8 +38,6 @@ from swarms.models.popular_llms import (
     ReplicateLLM as Replicate,
 )
 from swarms.models.qwen import QwenVLMultiModal  # noqa: E402
-
-# from swarms.models.roboflow_model import RoboflowMultiModal
 from swarms.models.sam_supervision import SegmentAnythingMarkGenerator
 from swarms.models.sampling_params import SamplingParams, SamplingType
 from swarms.models.together import TogetherLLM  # noqa: E402
@@ -88,4 +86,5 @@ __all__ = [
     "AudioModality",
     "ImageModality",
     "VideoModality",
-]
\ No newline at end of file
+    "MosaicML",
+]
diff --git a/swarms/models/petals.py b/swarms/models/petals.py
index 699d7d9d..7a7823f2 100644
--- a/swarms/models/petals.py
+++ b/swarms/models/petals.py
@@ -1,6 +1,7 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from swarms.models.base_llm import AbstractLLM
 
+
 class Petals(AbstractLLM):
     """Petals Bloom models."""
 
diff --git a/swarms/schedulers/__init__.py b/swarms/schedulers/__init__.py
deleted file mode 100644
index 803b2278..00000000
--- a/swarms/schedulers/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from swarms.schedulers.agent_process import (
-    AgentProcess,
-    AgentProcessQueue,
-)
-
-__all__ = ["AgentProcess", "AgentProcessQueue"]
diff --git a/swarms/structs/__init__.py b/swarms/structs/__init__.py
index f1090b06..18999e9f 100644
--- a/swarms/structs/__init__.py
+++ b/swarms/structs/__init__.py
@@ -75,6 +75,11 @@ from swarms.structs.utils import (
     parse_tasks,
 )
 from swarms.structs.auto_swarm import AutoSwarm, AutoSwarmRouter
+from swarms.structs.agent_process import (
+    AgentProcess,
+    AgentProcessQueue,
+)
+
 
 __all__ = [
     "Agent",
@@ -142,4 +147,6 @@ __all__ = [
     "AgentJob",
     "AutoSwarm",
     "AutoSwarmRouter",
+    "AgentProcess",
+    "AgentProcessQueue",
 ]
diff --git a/swarms/schedulers/agent_process.py b/swarms/structs/agent_process.py
similarity index 100%
rename from swarms/schedulers/agent_process.py
rename to swarms/structs/agent_process.py