[CLEANUP]

2 years ago · 997fd1e143
parent f4c6692671
commit 997fd1e143
10 changed files with 11 additions and 198 deletions
--- a/swarms/chunkers/init.py
+++ b/swarms/chunkers/init.py
@ -1,5 +0,0 @@
-from swarms.chunkers.base_chunker import BaseChunker
-from swarms.chunkers.chunk_seperator import ChunkSeparator
-from swarms.chunkers.text_chunker import TextChunker
-
-__all__ = ["ChunkSeparator", "BaseChunker", "TextChunker"]
--- a/swarms/chunkers/base_chunker.py
+++ b/swarms/chunkers/base_chunker.py
@ -1,163 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC
-from dataclasses import dataclass, field
-
-from swarms.artifacts.text_artifact import TextArtifact
-from swarms.chunkers.chunk_seperator import ChunkSeparator
-from swarms.tokenizers.base_tokenizer import BaseTokenizer
-from swarms.tokenizers.openai_tokenizers import OpenAITokenizer
-
-
-@dataclass
-class BaseChunker(ABC):
-    """
-    Base class for chunking text into smaller chunks.
-    """
-
-    DEFAULT_SEPARATORS = [ChunkSeparator(" ")]
-
-    separators: list[ChunkSeparator] = field(
-        default_factory=lambda: BaseChunker.DEFAULT_SEPARATORS
-    )
-    tokenizer: BaseTokenizer = field(
-        default_factory=lambda: OpenAITokenizer(
-            model=OpenAITokenizer.DEFAULT_OPENAI_GPT_3_CHAT_MODEL
-        )
-    )
-    max_tokens: int = field(
-        default_factory=lambda: BaseChunker.tokenizer.max_tokens
-    )
-
-    def chunk(self, text: str | str) -> list[str]:
-        """
-        Chunk the given text into smaller chunks.
-
-        Args:
-            text (TextArtifact | str): The text to be chunked.
-
-        Returns:
-            list[TextArtifact]: The list of chunked text artifacts.
-        """
-        text = text.value if isinstance(text, str) else text
-
-        return [
-            TextArtifact(c) for c in self._chunk_recursively(text)
-        ]
-
-    def _chunk_recursively(
-        self,
-        chunk: str,
-        current_separator: ChunkSeparator | None = None,
-    ) -> list[str]:
-        """
-        Recursively chunk the given chunk into smaller subchunks.
-
-        Args:
-            chunk (str): The chunk to be recursively chunked.
-            current_separator (Optional[ChunkSeparator], optional): The current separator to be used. Defaults to None.
-
-        Returns:
-            list[str]: The list of recursively chunked subchunks.
-        """
-        token_count = self.tokenizer.count_tokens(chunk)
-
-        if token_count <= self.max_tokens:
-            return [chunk]
-        else:
-            balance_index = -1
-            balance_diff = float("inf")
-            tokens_count = 0
-            half_token_count = token_count // 2
-
-            # If a separator is provided, only use separators after it.
-            if current_separator:
-                separators = self.separators[
-                    self.separators.index(current_separator) :
-                ]
-            else:
-                separators = self.separators
-
-            # Loop through available separators to find the best split.
-            for separator in separators:
-                # Split the chunk into subchunks using the current separator.
-                subchunks = list(
-                    filter(None, chunk.split(separator.value))
-                )
-
-                # Check if the split resulted in more than one subchunk.
-                if len(subchunks) > 1:
-                    # Iterate through the subchunks and calculate token counts.
-                    for index, subchunk in enumerate(subchunks):
-                        if index < len(subchunks):
-                            if separator.is_prefix:
-                                subchunk = separator.value + subchunk
-                            else:
-                                subchunk = subchunk + separator.value
-
-                        tokens_count += self.tokenizer.count_tokens(
-                            subchunk
-                        )
-
-                        # Update the best split if the current one is more balanced.
-                        if (
-                            abs(tokens_count - half_token_count)
-                            < balance_diff
-                        ):
-                            balance_index = index
-                            balance_diff = abs(
-                                tokens_count - half_token_count
-                            )
-
-                    # Create the two subchunks based on the best separator.
-                    if separator.is_prefix:
-                        # If the separator is a prefix, append it before this subchunk.
-                        first_subchunk = (
-                            separator.value
-                            + separator.value.join(
-                                subchunks[: balance_index + 1]
-                            )
-                        )
-                        second_subchunk = (
-                            separator.value
-                            + separator.value.join(
-                                subchunks[balance_index + 1 :]
-                            )
-                        )
-                    else:
-                        # If the separator is not a prefix, append it after this subchunk.
-                        first_subchunk = (
-                            separator.value.join(
-                                subchunks[: balance_index + 1]
-                            )
-                            + separator.value
-                        )
-                        second_subchunk = separator.value.join(
-                            subchunks[balance_index + 1 :]
-                        )
-
-                    # Continue recursively chunking the subchunks.
-                    first_subchunk_rec = self._chunk_recursively(
-                        first_subchunk.strip(), separator
-                    )
-                    second_subchunk_rec = self._chunk_recursively(
-                        second_subchunk.strip(), separator
-                    )
-
-                    # Return the concatenated results of the subchunks if both are non-empty.
-                    if first_subchunk_rec and second_subchunk_rec:
-                        return (
-                            first_subchunk_rec + second_subchunk_rec
-                        )
-                    # If only one subchunk is non-empty, return it.
-                    elif first_subchunk_rec:
-                        return first_subchunk_rec
-                    elif second_subchunk_rec:
-                        return second_subchunk_rec
-                    else:
-                        return []
-            # If none of the separators result in a balanced split, split the chunk in half.
-            midpoint = len(chunk) // 2
-            return self._chunk_recursively(
-                chunk[:midpoint]
-            ) + self._chunk_recursively(chunk[midpoint:])
--- a/swarms/chunkers/chunk_seperator.py
+++ b/swarms/chunkers/chunk_seperator.py
@ -1,7 +0,0 @@
-from dataclasses import dataclass
-
-
-@dataclass
-class ChunkSeparator:
-    value: str
-    is_prefix: bool = False
--- a/swarms/chunkers/text_chunker.py
+++ b/swarms/chunkers/text_chunker.py
@ -1,13 +0,0 @@
-from swarms.chunkers.base_chunker import BaseChunker
-from swarms.chunkers.chunk_seperator import ChunkSeparator
-
-
-class TextChunker(BaseChunker):
-    DEFAULT_SEPARATORS = [
-        ChunkSeparator("\n\n"),
-        ChunkSeparator("\n"),
-        ChunkSeparator(". "),
-        ChunkSeparator("! "),
-        ChunkSeparator("? "),
-        ChunkSeparator(" "),
-    ]
--- a/swarms/memory/init.py
+++ b/swarms/memory/init.py
@ -14,4 +14,4 @@ __all__ = [
    "DictSharedMemory",
    "ShortTermMemory",
    "VisualShortTermMemory",
-]
+]
--- a/swarms/models/init.py
+++ b/swarms/models/init.py
@ -38,8 +38,6 @@ from swarms.models.popular_llms import (
    ReplicateLLM as Replicate,
 )
 from swarms.models.qwen import QwenVLMultiModal  # noqa: E402
-
-# from swarms.models.roboflow_model import RoboflowMultiModal
 from swarms.models.sam_supervision import SegmentAnythingMarkGenerator
 from swarms.models.sampling_params import SamplingParams, SamplingType
 from swarms.models.together import TogetherLLM  # noqa: E402
@ -88,4 +86,5 @@ __all__ = [
    "AudioModality",
    "ImageModality",
    "VideoModality",
-]
+    "MosaicML",
+]
--- a/swarms/models/petals.py
+++ b/swarms/models/petals.py
@ -1,6 +1,7 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from swarms.models.base_llm import AbstractLLM

+
 class Petals(AbstractLLM):
    """Petals Bloom models."""

--- a/swarms/schedulers/init.py
+++ b/swarms/schedulers/init.py
@ -1,6 +0,0 @@
-from swarms.schedulers.agent_process import (
-    AgentProcess,
-    AgentProcessQueue,
-)
-
-__all__ = ["AgentProcess", "AgentProcessQueue"]
--- a/swarms/structs/init.py
+++ b/swarms/structs/init.py
@ -75,6 +75,11 @@ from swarms.structs.utils import (
    parse_tasks,
 )
 from swarms.structs.auto_swarm import AutoSwarm, AutoSwarmRouter
+from swarms.structs.agent_process import (
+    AgentProcess,
+    AgentProcessQueue,
+)
+

 __all__ = [
    "Agent",
@ -142,4 +147,6 @@ __all__ = [
    "AgentJob",
    "AutoSwarm",
    "AutoSwarmRouter",
+    "AgentProcess",
+    "AgentProcessQueue",
 ]
--- a/swarms/schedulers/agent_process.py
+++ b/swarms/schedulers/agent_process.py