diff --git a/playground/models/custom_model_vllm.py b/playground/models/custom_model_vllm.py
index 4f7f2a9b..70a7d710 100644
--- a/playground/models/custom_model_vllm.py
+++ b/playground/models/custom_model_vllm.py
@@ -29,17 +29,17 @@ class vLLMLM(AbstractLLM):
         model_name: str = "acebook/opt-13b",
         tensor_parallel_size: int = 4,
         *args,
-        **kwargs
+        **kwargs,
     ):
         super().__init__(*args, **kwargs)
         self.model_name = model_name
         self.tensor_parallel_size = tensor_parallel_size
-        
+
         self.llm = LLM(
             model_name=self.model_name,
             tensor_parallel_size=self.tensor_parallel_size,
         )
-        
+
     def run(self, task: str, *args, **kwargs):
         """
         Runs the LLM model to generate output for the given task.
@@ -54,8 +54,8 @@ class vLLMLM(AbstractLLM):
 
         """
         return self.llm.generate(task)
-    
-    
+
+
 # Initializing the agent with the vLLMLM instance and other parameters
 model = vLLMLM(
     "facebook/opt-13b",
@@ -86,4 +86,4 @@ agent = Agent(
         docs_folder="docs",
     ),
     stopping_condition="finish",
-)
\ No newline at end of file
+)
diff --git a/pyproject.toml b/pyproject.toml
index a9954fd8..82554af5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,10 +56,7 @@ chromadb = "*"
 termcolor = "2.2.0"
 torchvision = "0.16.1"
 rich = "13.5.2"
-sqlalchemy = "*"
 bitsandbytes = "*"
-pgvector = "*"
-cohere = "*"
 sentence-transformers = "*"
 peft = "*"
 psutil = "*"
diff --git a/swarms/agents/base.py b/swarms/agents/base.py
index 73066865..cfad5729 100644
--- a/swarms/agents/base.py
+++ b/swarms/agents/base.py
@@ -1,4 +1,5 @@
-from typing import Dict, List
+from abc import abstractmethod
+from typing import Dict, List, Union, Optional
 
 
 class AbstractAgent:
@@ -36,7 +37,8 @@ class AbstractAgent:
     def reset(self):
         """(Abstract method) Reset the agent."""
 
-    def run(self, task: str):
+    @abstractmethod
+    def run(self, task: str, *args, **kwargs):
         """Run the agent once"""
 
     def _arun(self, taks: str):
@@ -53,3 +55,65 @@ class AbstractAgent:
 
     def _astep(self, message: str):
         """Asynchronous step"""
+
+    def send(
+        self,
+        message: Union[Dict, str],
+        recipient,  # add AbstractWorker
+        request_reply: Optional[bool] = None,
+    ):
+        """(Abstract method) Send a message to another worker."""
+
+    async def a_send(
+        self,
+        message: Union[Dict, str],
+        recipient,  # add AbstractWorker
+        request_reply: Optional[bool] = None,
+    ):
+        """(Aabstract async method) Send a message to another worker."""
+
+    def receive(
+        self,
+        message: Union[Dict, str],
+        sender,  # add AbstractWorker
+        request_reply: Optional[bool] = None,
+    ):
+        """(Abstract method) Receive a message from another worker."""
+
+    async def a_receive(
+        self,
+        message: Union[Dict, str],
+        sender,  # add AbstractWorker
+        request_reply: Optional[bool] = None,
+    ):
+        """(Abstract async method) Receive a message from another worker."""
+
+    def generate_reply(
+        self,
+        messages: Optional[List[Dict]] = None,
+        sender=None,  # Optional["AbstractWorker"] = None,
+        **kwargs,
+    ) -> Union[str, Dict, None]:
+        """(Abstract method) Generate a reply based on the received messages.
+
+        Args:
+            messages (list[dict]): a list of messages received.
+            sender: sender of an Agent instance.
+        Returns:
+            str or dict or None: the generated reply. If None, no reply is generated.
+        """
+
+    async def a_generate_reply(
+        self,
+        messages: Optional[List[Dict]] = None,
+        sender=None,  # Optional["AbstractWorker"] = None,
+        **kwargs,
+    ) -> Union[str, Dict, None]:
+        """(Abstract async method) Generate a reply based on the received messages.
+
+        Args:
+            messages (list[dict]): a list of messages received.
+            sender: sender of an Agent instance.
+        Returns:
+            str or dict or None: the generated reply. If None, no reply is generated.
+        """
diff --git a/swarms/agents/multion_agent.py b/swarms/agents/multion_agent.py
deleted file mode 100644
index efeb5a43..00000000
--- a/swarms/agents/multion_agent.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import os
-
-import multion
-from dotenv import load_dotenv
-
-from swarms.models.base_llm import AbstractLLM
-
-# Load environment variables
-load_dotenv()
-
-# Muliton key
-MULTION_API_KEY = os.getenv("MULTION_API_KEY")
-
-
-class MultiOnAgent(AbstractLLM):
-    """
-    Represents a multi-on agent that performs browsing tasks.
-
-    Args:
-        max_steps (int): The maximum number of steps to perform during browsing.
-        starting_url (str): The starting URL for browsing.
-
-    Attributes:
-        max_steps (int): The maximum number of steps to perform during browsing.
-        starting_url (str): The starting URL for browsing.
-    """
-
-    def __init__(
-        self,
-        multion_api_key: str = MULTION_API_KEY,
-        max_steps: int = 4,
-        starting_url: str = "https://www.google.com",
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.multion_api_key = multion_api_key
-        self.max_steps = max_steps
-        self.starting_url = starting_url
-
-    def run(self, task: str, *args, **kwargs):
-        """
-        Runs a browsing task.
-
-        Args:
-            task (str): The task to perform during browsing.
-            *args: Additional positional arguments.
-            **kwargs: Additional keyword arguments.
-
-        Returns:
-            dict: The response from the browsing task.
-        """
-        multion.login(
-            use_api=True,
-            multion_api_key=str(self.multion_api_key),
-            *args,
-            **kwargs,
-        )
-
-        response = multion.browse(
-            {
-                "cmd": task,
-                "url": self.starting_url,
-                "maxSteps": self.max_steps,
-            },
-            *args,
-            **kwargs,
-        )
-
-        return response.result, response.status, response.lastUrl
diff --git a/swarms/loaders/__init__.py b/swarms/loaders/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/swarms/memory/__init__.py b/swarms/memory/__init__.py
index 72318d28..0dafab08 100644
--- a/swarms/memory/__init__.py
+++ b/swarms/memory/__init__.py
@@ -4,22 +4,18 @@ from swarms.memory.base_vectordb import AbstractVectorDatabase
 from swarms.memory.chroma_db import ChromaDB
 from swarms.memory.dict_internal_memory import DictInternalMemory
 from swarms.memory.dict_shared_memory import DictSharedMemory
-from swarms.memory.lanchain_chroma import LangchainChromaVectorMemory
 from swarms.memory.short_term_memory import ShortTermMemory
 from swarms.memory.sqlite import SQLiteDB
 from swarms.memory.visual_memory import VisualShortTermMemory
-from swarms.memory.weaviate_db import WeaviateDB
 
 __all__ = [
     "AbstractVectorDatabase",
     "AbstractDatabase",
     "ShortTermMemory",
     "SQLiteDB",
-    "WeaviateDB",
     "VisualShortTermMemory",
     "ActionSubtaskEntry",
     "ChromaDB",
     "DictInternalMemory",
     "DictSharedMemory",
-    "LangchainChromaVectorMemory",
 ]
diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py
index 3f8fb3e2..4c2e9ac5 100644
--- a/swarms/models/__init__.py
+++ b/swarms/models/__init__.py
@@ -2,17 +2,8 @@ from swarms.models.anthropic import Anthropic  # noqa: E402
 from swarms.models.base_embedding_model import BaseEmbeddingModel
 from swarms.models.base_llm import AbstractLLM  # noqa: E402
 from swarms.models.base_multimodal_model import BaseMultiModalModel
-
-# noqa: E402
 from swarms.models.biogpt import BioGPT  # noqa: E402
 from swarms.models.clipq import CLIPQ  # noqa: E402
-
-# from swarms.models.dalle3 import Dalle3
-# from swarms.models.distilled_whisperx import DistilWhisperModel # noqa: E402
-# from swarms.models.whisperx_model import WhisperX  # noqa: E402
-# from swarms.models.kosmos_two import Kosmos  # noqa: E402
-# from swarms.models.cog_agent import CogAgent  # noqa: E402
-## Function calling models
 from swarms.models.fire_function import FireFunctionCaller
 from swarms.models.fuyu import Fuyu  # noqa: E402
 from swarms.models.gemini import Gemini  # noqa: E402
@@ -22,20 +13,11 @@ from swarms.models.huggingface import HuggingfaceLLM  # noqa: E402
 from swarms.models.idefics import Idefics  # noqa: E402
 from swarms.models.kosmos_two import Kosmos  # noqa: E402
 from swarms.models.layoutlm_document_qa import LayoutLMDocumentQA
-
-# noqa: E402
 from swarms.models.llava import LavaMultiModal  # noqa: E402
 from swarms.models.mistral import Mistral  # noqa: E402
 from swarms.models.mixtral import Mixtral  # noqa: E402
 from swarms.models.mpt import MPT7B  # noqa: E402
 from swarms.models.nougat import Nougat  # noqa: E402
-from swarms.models.openai_models import (
-    AzureOpenAI,
-    OpenAI,
-    OpenAIChat,
-)
-
-# noqa: E402
 from swarms.models.openai_tts import OpenAITTS  # noqa: E402
 from swarms.models.petals import Petals  # noqa: E402
 from swarms.models.qwen import QwenVLMultiModal  # noqa: E402
@@ -43,14 +25,7 @@ from swarms.models.roboflow_model import RoboflowMultiModal
 from swarms.models.sam_supervision import SegmentAnythingMarkGenerator
 from swarms.models.sampling_params import SamplingParams, SamplingType
 from swarms.models.timm import TimmModel  # noqa: E402
-
-# from swarms.models.modelscope_pipeline import ModelScopePipeline
-# from swarms.models.modelscope_llm import (
-#     ModelScopeAutoModel,
-# )  # noqa: E402
 from swarms.models.together import TogetherLLM  # noqa: E402
-
-############## Types
 from swarms.models.types import (  # noqa: E402
     AudioModality,
     ImageModality,
@@ -59,60 +34,49 @@ from swarms.models.types import (  # noqa: E402
     VideoModality,
 )
 from swarms.models.ultralytics_model import UltralyticsModel
-
-# noqa: E402
 from swarms.models.vilt import Vilt  # noqa: E402
 from swarms.models.wizard_storytelling import WizardLLMStoryTeller
-
-# noqa: E402
-# from swarms.models.vllm import vLLM  # noqa: E402
 from swarms.models.zephyr import Zephyr  # noqa: E402
 from swarms.models.zeroscope import ZeroscopeTTV  # noqa: E402
 
 __all__ = [
-    "AbstractLLM",
     "Anthropic",
-    "Petals",
-    "Mistral",
-    "OpenAI",
-    "AzureOpenAI",
-    "OpenAIChat",
-    "Zephyr",
+    "AbstractLLM",
+    "BaseEmbeddingModel",
     "BaseMultiModalModel",
-    "Idefics",
-    "Vilt",
-    "Nougat",
-    "LayoutLMDocumentQA",
     "BioGPT",
+    "CLIPQ",
+    "FireFunctionCaller",
+    "Fuyu",
+    "Gigabind",
+    "GPT4VisionAPI",
     "HuggingfaceLLM",
+    "Idefics",
+    "Kosmos",
+    "LavaMultiModal",
+    "LayoutLMDocumentQA",
+    "Mistral",
+    "Mixtral",
     "MPT7B",
-    "WizardLLMStoryTeller",
-    # "Dalle3",
-    # "DistilWhisperModel",
-    "GPT4VisionAPI",
-    # "vLLM",
-    "OpenAITTS",
+    "Nougat",
     "Gemini",
-    "Gigabind",
-    "Mixtral",
-    "ZeroscopeTTV",
-    "TextModality",
-    "ImageModality",
-    "AudioModality",
-    "VideoModality",
-    "MultimodalData",
-    "TogetherLLM",
-    "TimmModel",
-    "UltralyticsModel",
-    "LavaMultiModal",
+    "OpenAITTS",
+    "Petals",
     "QwenVLMultiModal",
-    "CLIPQ",
-    "Kosmos",
-    "Fuyu",
-    "BaseEmbeddingModel",
     "RoboflowMultiModal",
     "SegmentAnythingMarkGenerator",
-    "SamplingType",
     "SamplingParams",
-    "FireFunctionCaller",
+    "SamplingType",
+    "TimmModel",
+    "TogetherLLM",
+    "UltralyticsModel",
+    "Vilt",
+    "WizardLLMStoryTeller",
+    "Zephyr",
+    "ZeroscopeTTV",
+    "AudioModality",
+    "ImageModality",
+    "MultimodalData",
+    "TextModality",
+    "VideoModality",
 ]
diff --git a/swarms/models/anthropic.py b/swarms/models/anthropic.py
deleted file mode 100644
index 5193a6bc..00000000
--- a/swarms/models/anthropic.py
+++ /dev/null
@@ -1,575 +0,0 @@
-import contextlib
-import datetime
-import functools
-import importlib
-import re
-import warnings
-from importlib.metadata import version
-from typing import (
-    Any,
-    AsyncIterator,
-    Callable,
-    Dict,
-    Iterator,
-    List,
-    Mapping,
-    Optional,
-    Set,
-    Tuple,
-    Union,
-)
-
-from langchain.callbacks.manager import (
-    AsyncCallbackManagerForLLMRun,
-    CallbackManagerForLLMRun,
-)
-from langchain.llms.base import LLM
-from langchain.schema.language_model import BaseLanguageModel
-from langchain.schema.output import GenerationChunk
-from langchain.schema.prompt import PromptValue
-from langchain.utils import get_from_dict_or_env
-from packaging.version import parse
-from pydantic import Field, SecretStr, root_validator
-from requests import HTTPError, Response
-
-
-def xor_args(*arg_groups: Tuple[str, ...]) -> Callable:
-    """Validate specified keyword args are mutually exclusive."""
-
-    def decorator(func: Callable) -> Callable:
-        @functools.wraps(func)
-        def wrapper(*args: Any, **kwargs: Any) -> Any:
-            """Validate exactly one arg in each group is not None."""
-            counts = [
-                sum(
-                    1
-                    for arg in arg_group
-                    if kwargs.get(arg) is not None
-                )
-                for arg_group in arg_groups
-            ]
-            invalid_groups = [
-                i for i, count in enumerate(counts) if count != 1
-            ]
-            if invalid_groups:
-                invalid_group_names = [
-                    ", ".join(arg_groups[i]) for i in invalid_groups
-                ]
-                raise ValueError(
-                    "Exactly one argument in each of the following"
-                    " groups must be defined:"
-                    f" {', '.join(invalid_group_names)}"
-                )
-            return func(*args, **kwargs)
-
-        return wrapper
-
-    return decorator
-
-
-def raise_for_status_with_text(response: Response) -> None:
-    """Raise an error with the response text."""
-    try:
-        response.raise_for_status()
-    except HTTPError as e:
-        raise ValueError(response.text) from e
-
-
-@contextlib.contextmanager
-def mock_now(dt_value):  # type: ignore
-    """Context manager for mocking out datetime.now() in unit tests.
-
-    Example:
-    with mock_now(datetime.datetime(2011, 2, 3, 10, 11)):
-        assert datetime.datetime.now() == datetime.datetime(2011, 2, 3, 10, 11)
-    """
-
-    class MockDateTime(datetime.datetime):
-        """Mock datetime.datetime.now() with a fixed datetime."""
-
-        @classmethod
-        def now(cls):  # type: ignore
-            # Create a copy of dt_value.
-            return datetime.datetime(
-                dt_value.year,
-                dt_value.month,
-                dt_value.day,
-                dt_value.hour,
-                dt_value.minute,
-                dt_value.second,
-                dt_value.microsecond,
-                dt_value.tzinfo,
-            )
-
-    real_datetime = datetime.datetime
-    datetime.datetime = MockDateTime
-    try:
-        yield datetime.datetime
-    finally:
-        datetime.datetime = real_datetime
-
-
-def guard_import(
-    module_name: str,
-    *,
-    pip_name: Optional[str] = None,
-    package: Optional[str] = None,
-) -> Any:
-    """Dynamically imports a module and raises a helpful exception if the module is not
-    installed."""
-    try:
-        module = importlib.import_module(module_name, package)
-    except ImportError:
-        raise ImportError(
-            f"Could not import {module_name} python package. Please"
-            " install it with `pip install"
-            f" {pip_name or module_name}`."
-        )
-    return module
-
-
-def check_package_version(
-    package: str,
-    lt_version: Optional[str] = None,
-    lte_version: Optional[str] = None,
-    gt_version: Optional[str] = None,
-    gte_version: Optional[str] = None,
-) -> None:
-    """Check the version of a package."""
-    imported_version = parse(version(package))
-    if lt_version is not None and imported_version >= parse(
-        lt_version
-    ):
-        raise ValueError(
-            f"Expected {package} version to be < {lt_version}."
-            f" Received {imported_version}."
-        )
-    if lte_version is not None and imported_version > parse(
-        lte_version
-    ):
-        raise ValueError(
-            f"Expected {package} version to be <= {lte_version}."
-            f" Received {imported_version}."
-        )
-    if gt_version is not None and imported_version <= parse(
-        gt_version
-    ):
-        raise ValueError(
-            f"Expected {package} version to be > {gt_version}."
-            f" Received {imported_version}."
-        )
-    if gte_version is not None and imported_version < parse(
-        gte_version
-    ):
-        raise ValueError(
-            f"Expected {package} version to be >= {gte_version}."
-            f" Received {imported_version}."
-        )
-
-
-def get_pydantic_field_names(pydantic_cls: Any) -> Set[str]:
-    """Get field names, including aliases, for a pydantic class.
-
-    Args:
-        pydantic_cls: Pydantic class."""
-    all_required_field_names = set()
-    for field in pydantic_cls.__fields__.values():
-        all_required_field_names.add(field.name)
-        if field.has_alias:
-            all_required_field_names.add(field.alias)
-    return all_required_field_names
-
-
-def build_extra_kwargs(
-    extra_kwargs: Dict[str, Any],
-    values: Dict[str, Any],
-    all_required_field_names: Set[str],
-) -> Dict[str, Any]:
-    """Build extra kwargs from values and extra_kwargs.
-
-    Args:
-        extra_kwargs: Extra kwargs passed in by user.
-        values: Values passed in by user.
-        all_required_field_names: All required field names for the pydantic class.
-    """
-    for field_name in list(values):
-        if field_name in extra_kwargs:
-            raise ValueError(f"Found {field_name} supplied twice.")
-        if field_name not in all_required_field_names:
-            warnings.warn(
-                f"""WARNING! {field_name} is not default parameter.
-                {field_name} was transferred to model_kwargs.
-                Please confirm that {field_name} is what you intended."""
-            )
-            extra_kwargs[field_name] = values.pop(field_name)
-
-    invalid_model_kwargs = all_required_field_names.intersection(
-        extra_kwargs.keys()
-    )
-    if invalid_model_kwargs:
-        raise ValueError(
-            f"Parameters {invalid_model_kwargs} should be specified"
-            " explicitly. Instead they were passed in as part of"
-            " `model_kwargs` parameter."
-        )
-
-    return extra_kwargs
-
-
-def convert_to_secret_str(value: Union[SecretStr, str]) -> SecretStr:
-    """Convert a string to a SecretStr if needed."""
-    if isinstance(value, SecretStr):
-        return value
-    return SecretStr(value)
-
-
-class _AnthropicCommon(BaseLanguageModel):
-    client: Any = None  #: :meta private:
-    async_client: Any = None  #: :meta private:
-    model: str = Field(default="claude-2", alias="model_name")
-    """Model name to use."""
-
-    max_tokens_to_sample: int = Field(default=256, alias="max_tokens")
-    """Denotes the number of tokens to predict per generation."""
-
-    temperature: Optional[float] = None
-    """A non-negative float that tunes the degree of randomness in generation."""
-
-    top_k: Optional[int] = None
-    """Number of most likely tokens to consider at each step."""
-
-    top_p: Optional[float] = None
-    """Total probability mass of tokens to consider at each step."""
-
-    streaming: bool = False
-    """Whether to stream the results."""
-
-    default_request_timeout: Optional[float] = None
-    """Timeout for requests to Anthropic Completion API. Default is 600 seconds."""
-
-    anthropic_api_url: Optional[str] = None
-
-    anthropic_api_key: Optional[SecretStr] = None
-
-    HUMAN_PROMPT: Optional[str] = None
-    AI_PROMPT: Optional[str] = None
-    count_tokens: Optional[Callable[[str], int]] = None
-    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
-
-    @root_validator(pre=True)
-    def build_extra(cls, values: Dict) -> Dict:
-        extra = values.get("model_kwargs", {})
-        all_required_field_names = get_pydantic_field_names(cls)
-        values["model_kwargs"] = build_extra_kwargs(
-            extra, values, all_required_field_names
-        )
-        return values
-
-    @root_validator()
-    def validate_environment(cls, values: Dict) -> Dict:
-        """Validate that api key and python package exists in environment."""
-        values["anthropic_api_key"] = convert_to_secret_str(
-            get_from_dict_or_env(
-                values, "anthropic_api_key", "ANTHROPIC_API_KEY"
-            )
-        )
-        # Get custom api url from environment.
-        values["anthropic_api_url"] = get_from_dict_or_env(
-            values,
-            "anthropic_api_url",
-            "ANTHROPIC_API_URL",
-            default="https://api.anthropic.com",
-        )
-
-        try:
-            import anthropic
-
-            check_package_version("anthropic", gte_version="0.3")
-            values["client"] = anthropic.Anthropic(
-                base_url=values["anthropic_api_url"],
-                api_key=values[
-                    "anthropic_api_key"
-                ].get_secret_value(),
-                timeout=values["default_request_timeout"],
-            )
-            values["async_client"] = anthropic.AsyncAnthropic(
-                base_url=values["anthropic_api_url"],
-                api_key=values[
-                    "anthropic_api_key"
-                ].get_secret_value(),
-                timeout=values["default_request_timeout"],
-            )
-            values["HUMAN_PROMPT"] = anthropic.HUMAN_PROMPT
-            values["AI_PROMPT"] = anthropic.AI_PROMPT
-            values["count_tokens"] = values["client"].count_tokens
-
-        except ImportError:
-            raise ImportError(
-                "Could not import anthropic python package. "
-                "Please it install it with `pip install anthropic`."
-            )
-        return values
-
-    @property
-    def _default_params(self) -> Mapping[str, Any]:
-        """Get the default parameters for calling Anthropic API."""
-        d = {
-            "max_tokens_to_sample": self.max_tokens_to_sample,
-            "model": self.model,
-        }
-        if self.temperature is not None:
-            d["temperature"] = self.temperature
-        if self.top_k is not None:
-            d["top_k"] = self.top_k
-        if self.top_p is not None:
-            d["top_p"] = self.top_p
-        return {**d, **self.model_kwargs}
-
-    @property
-    def _identifying_params(self) -> Mapping[str, Any]:
-        """Get the identifying parameters."""
-        return {**{}, **self._default_params}
-
-    def _get_anthropic_stop(
-        self, stop: Optional[List[str]] = None
-    ) -> List[str]:
-        if not self.HUMAN_PROMPT or not self.AI_PROMPT:
-            raise NameError(
-                "Please ensure the anthropic package is loaded"
-            )
-
-        if stop is None:
-            stop = []
-
-        # Never want model to invent new turns of Human / Assistant dialog.
-        stop.extend([self.HUMAN_PROMPT])
-
-        return stop
-
-
-class Anthropic(LLM, _AnthropicCommon):
-    """Anthropic large language models.
-
-    To use, you should have the ``anthropic`` python package installed, and the
-    environment variable ``ANTHROPIC_API_KEY`` set with your API key, or pass
-    it as a named parameter to the constructor.
-
-    Example:
-        .. code-block:: python
-
-            import anthropic
-            from langchain.llms import Anthropic
-
-            model = Anthropic(model="<model_name>", anthropic_api_key="my-api-key")
-
-            # Simplest invocation, automatically wrapped with HUMAN_PROMPT
-            # and AI_PROMPT.
-            response = model("What are the biggest risks facing humanity?")
-
-            # Or if you want to use the chat mode, build a few-shot-prompt, or
-            # put words in the Assistant's mouth, use HUMAN_PROMPT and AI_PROMPT:
-            raw_prompt = "What are the biggest risks facing humanity?"
-            prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}"
-            response = model(prompt)
-    """
-
-    class Config:
-        """Configuration for this pydantic object."""
-
-        allow_population_by_field_name = True
-        arbitrary_types_allowed = True
-
-    @root_validator()
-    def raise_warning(cls, values: Dict) -> Dict:
-        """Raise warning that this class is deprecated."""
-        warnings.warn(
-            "There may be an updated version of"
-            f" {cls.__name__} available."
-        )
-        return values
-
-    @property
-    def _llm_type(self) -> str:
-        """Return type of llm."""
-        return "anthropic-llm"
-
-    def _wrap_prompt(self, prompt: str) -> str:
-        if not self.HUMAN_PROMPT or not self.AI_PROMPT:
-            raise NameError(
-                "Please ensure the anthropic package is loaded"
-            )
-
-        if prompt.startswith(self.HUMAN_PROMPT):
-            return prompt  # Already wrapped.
-
-        # Guard against common errors in specifying wrong number of newlines.
-        corrected_prompt, n_subs = re.subn(
-            r"^\n*Human:", self.HUMAN_PROMPT, prompt
-        )
-        if n_subs == 1:
-            return corrected_prompt
-
-        # As a last resort, wrap the prompt ourselves to emulate instruct-style.
-        return (
-            f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT} Sure, here"
-            " you go:\n"
-        )
-
-    def _call(
-        self,
-        prompt: str,
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> str:
-        r"""Call out to Anthropic's completion endpoint.
-
-        Args:
-            prompt: The prompt to pass into the model.
-            stop: Optional list of stop words to use when generating.
-
-        Returns:
-            The string generated by the model.
-
-        Example:
-            .. code-block:: python
-
-                prompt = "What are the biggest risks facing humanity?"
-                prompt = f"\n\nHuman: {prompt}\n\nAssistant:"
-                response = model(prompt)
-
-        """
-        if self.streaming:
-            completion = ""
-            for chunk in self._stream(
-                prompt=prompt,
-                stop=stop,
-                run_manager=run_manager,
-                **kwargs,
-            ):
-                completion += chunk.text
-            return completion
-
-        stop = self._get_anthropic_stop(stop)
-        params = {**self._default_params, **kwargs}
-        response = self.client.completions.create(
-            prompt=self._wrap_prompt(prompt),
-            stop_sequences=stop,
-            **params,
-        )
-        return response.completion
-
-    def convert_prompt(self, prompt: PromptValue) -> str:
-        return self._wrap_prompt(prompt.to_string())
-
-    async def _acall(
-        self,
-        prompt: str,
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> str:
-        """Call out to Anthropic's completion endpoint asynchronously."""
-        if self.streaming:
-            completion = ""
-            async for chunk in self._astream(
-                prompt=prompt,
-                stop=stop,
-                run_manager=run_manager,
-                **kwargs,
-            ):
-                completion += chunk.text
-            return completion
-
-        stop = self._get_anthropic_stop(stop)
-        params = {**self._default_params, **kwargs}
-
-        response = await self.async_client.completions.create(
-            prompt=self._wrap_prompt(prompt),
-            stop_sequences=stop,
-            **params,
-        )
-        return response.completion
-
-    def _stream(
-        self,
-        prompt: str,
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> Iterator[GenerationChunk]:
-        r"""Call Anthropic completion_stream and return the resulting generator.
-
-        Args:
-            prompt: The prompt to pass into the model.
-            stop: Optional list of stop words to use when generating.
-        Returns:
-            A generator representing the stream of tokens from Anthropic.
-        Example:
-            .. code-block:: python
-
-                prompt = "Write a poem about a stream."
-                prompt = f"\n\nHuman: {prompt}\n\nAssistant:"
-                generator = anthropic.stream(prompt)
-                for token in generator:
-                    yield token
-        """
-        stop = self._get_anthropic_stop(stop)
-        params = {**self._default_params, **kwargs}
-
-        for token in self.client.completions.create(
-            prompt=self._wrap_prompt(prompt),
-            stop_sequences=stop,
-            stream=True,
-            **params,
-        ):
-            chunk = GenerationChunk(text=token.completion)
-            yield chunk
-            if run_manager:
-                run_manager.on_llm_new_token(chunk.text, chunk=chunk)
-
-    async def _astream(
-        self,
-        prompt: str,
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> AsyncIterator[GenerationChunk]:
-        r"""Call Anthropic completion_stream and return the resulting generator.
-
-        Args:
-            prompt: The prompt to pass into the model.
-            stop: Optional list of stop words to use when generating.
-        Returns:
-            A generator representing the stream of tokens from Anthropic.
-        Example:
-            .. code-block:: python
-                prompt = "Write a poem about a stream."
-                prompt = f"\n\nHuman: {prompt}\n\nAssistant:"
-                generator = anthropic.stream(prompt)
-                for token in generator:
-                    yield token
-        """
-        stop = self._get_anthropic_stop(stop)
-        params = {**self._default_params, **kwargs}
-
-        async for token in await self.async_client.completions.create(
-            prompt=self._wrap_prompt(prompt),
-            stop_sequences=stop,
-            stream=True,
-            **params,
-        ):
-            chunk = GenerationChunk(text=token.completion)
-            yield chunk
-            if run_manager:
-                await run_manager.on_llm_new_token(
-                    chunk.text, chunk=chunk
-                )
-
-    def get_num_tokens(self, text: str) -> int:
-        """Calculate number of tokens."""
-        if not self.count_tokens:
-            raise NameError(
-                "Please ensure the anthropic package is loaded"
-            )
-        return self.count_tokens(text)
diff --git a/swarms/models/azure_openai_llm.py b/swarms/models/azure_openai_llm.py
deleted file mode 100644
index aebb03fb..00000000
--- a/swarms/models/azure_openai_llm.py
+++ /dev/null
@@ -1,223 +0,0 @@
-from __future__ import annotations
-
-import logging
-import os
-from typing import Any, Callable, Mapping
-
-import openai
-from langchain_core.pydantic_v1 import (
-    Field,
-    SecretStr,
-    root_validator,
-)
-from langchain_core.utils import (
-    convert_to_secret_str,
-    get_from_dict_or_env,
-)
-from langchain_openai.llms.base import BaseOpenAI
-
-logger = logging.getLogger(__name__)
-
-
-class AzureOpenAI(BaseOpenAI):
-    """Azure-specific OpenAI large language models.
-
-    To use, you should have the ``openai`` python package installed, and the
-    environment variable ``OPENAI_API_KEY`` set with your API key.
-
-    Any parameters that are valid to be passed to the openai.create call can be passed
-    in, even if not explicitly saved on this class.
-
-    Example:
-        .. code-block:: python
-
-            from swarms import AzureOpenAI
-
-            openai = AzureOpenAI(model_name="gpt-3.5-turbo-instruct")
-    """
-
-    azure_endpoint: str | None = None
-    """Your Azure endpoint, including the resource.
-
-        Automatically inferred from env var `AZURE_OPENAI_ENDPOINT` if not provided.
-
-        Example: `https://example-resource.azure.openai.com/`
-    """
-    deployment_name: str | None = Field(
-        default=None, alias="azure_deployment"
-    )
-    """A model deployment. 
-
-        If given sets the base client URL to include `/deployments/{azure_deployment}`.
-        Note: this means you won't be able to use non-deployment endpoints.
-    """
-    openai_api_version: str = Field(default="", alias="api_version")
-    """Automatically inferred from env var `OPENAI_API_VERSION` if not provided."""
-    openai_api_key: SecretStr | None = Field(
-        default=None, alias="api_key"
-    )
-    """Automatically inferred from env var `AZURE_OPENAI_API_KEY` if not provided."""
-    azure_ad_token: SecretStr | None = None
-    """Your Azure Active Directory token.
-
-        Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided.
-
-        For more: 
-        https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id.
-    """  # noqa: E501
-    azure_ad_token_provider: Callable[[], str] | None = None
-    """A function that returns an Azure Active Directory token.
-
-        Will be invoked on every request.
-    """
-    openai_api_type: str = ""
-    """Legacy, for openai<1.0.0 support."""
-    validate_base_url: bool = True
-    """For backwards compatibility. If legacy val openai_api_base is passed in, try to 
-        infer if it is a base_url or azure_endpoint and update accordingly.
-    """
-
-    @classmethod
-    def get_lc_namespace(cls) -> list[str]:
-        """Get the namespace of the langchain object."""
-        return ["langchain", "llms", "openai"]
-
-    @root_validator()
-    def validate_environment(cls, values: dict) -> dict:
-        """Validate that api key and python package exists in environment."""
-        if values["n"] < 1:
-            raise ValueError("n must be at least 1.")
-        if values["streaming"] and values["n"] > 1:
-            raise ValueError("Cannot stream results when n > 1.")
-        if values["streaming"] and values["best_of"] > 1:
-            raise ValueError(
-                "Cannot stream results when best_of > 1."
-            )
-
-        # Check OPENAI_KEY for backwards compatibility.
-        # TODO: Remove OPENAI_API_KEY support to avoid possible conflict when using
-        # other forms of azure credentials.
-        openai_api_key = (
-            values["openai_api_key"]
-            or os.getenv("AZURE_OPENAI_API_KEY")
-            or os.getenv("OPENAI_API_KEY")
-        )
-        values["openai_api_key"] = (
-            convert_to_secret_str(openai_api_key)
-            if openai_api_key
-            else None
-        )
-
-        values["azure_endpoint"] = values[
-            "azure_endpoint"
-        ] or os.getenv("AZURE_OPENAI_ENDPOINT")
-        azure_ad_token = values["azure_ad_token"] or os.getenv(
-            "AZURE_OPENAI_AD_TOKEN"
-        )
-        values["azure_ad_token"] = (
-            convert_to_secret_str(azure_ad_token)
-            if azure_ad_token
-            else None
-        )
-        values["openai_api_base"] = values[
-            "openai_api_base"
-        ] or os.getenv("OPENAI_API_BASE")
-        values["openai_proxy"] = get_from_dict_or_env(
-            values,
-            "openai_proxy",
-            "OPENAI_PROXY",
-            default="",
-        )
-        values["openai_organization"] = (
-            values["openai_organization"]
-            or os.getenv("OPENAI_ORG_ID")
-            or os.getenv("OPENAI_ORGANIZATION")
-        )
-        values["openai_api_version"] = values[
-            "openai_api_version"
-        ] or os.getenv("OPENAI_API_VERSION")
-        values["openai_api_type"] = get_from_dict_or_env(
-            values,
-            "openai_api_type",
-            "OPENAI_API_TYPE",
-            default="azure",
-        )
-        # For backwards compatibility. Before openai v1, no distinction was made
-        # between azure_endpoint and base_url (openai_api_base).
-        openai_api_base = values["openai_api_base"]
-        if openai_api_base and values["validate_base_url"]:
-            if "/openai" not in openai_api_base:
-                values["openai_api_base"] = (
-                    values["openai_api_base"].rstrip("/") + "/openai"
-                )
-                raise ValueError(
-                    "As of openai>=1.0.0, Azure endpoints should be"
-                    " specified via the `azure_endpoint` param not"
-                    " `openai_api_base` (or alias `base_url`)."
-                )
-            if values["deployment_name"]:
-                raise ValueError(
-                    "As of openai>=1.0.0, if `deployment_name` (or"
-                    " alias `azure_deployment`) is specified then"
-                    " `openai_api_base` (or alias `base_url`) should"
-                    " not be. Instead use `deployment_name` (or alias"
-                    " `azure_deployment`) and `azure_endpoint`."
-                )
-                values["deployment_name"] = None
-        client_params = {
-            "api_version": values["openai_api_version"],
-            "azure_endpoint": values["azure_endpoint"],
-            "azure_deployment": values["deployment_name"],
-            "api_key": (
-                values["openai_api_key"].get_secret_value()
-                if values["openai_api_key"]
-                else None
-            ),
-            "azure_ad_token": (
-                values["azure_ad_token"].get_secret_value()
-                if values["azure_ad_token"]
-                else None
-            ),
-            "azure_ad_token_provider": values[
-                "azure_ad_token_provider"
-            ],
-            "organization": values["openai_organization"],
-            "base_url": values["openai_api_base"],
-            "timeout": values["request_timeout"],
-            "max_retries": values["max_retries"],
-            "default_headers": values["default_headers"],
-            "default_query": values["default_query"],
-            "http_client": values["http_client"],
-        }
-        values["client"] = openai.AzureOpenAI(
-            **client_params
-        ).completions
-        values["async_client"] = openai.AsyncAzureOpenAI(
-            **client_params
-        ).completions
-
-        return values
-
-    @property
-    def _identifying_params(self) -> Mapping[str, Any]:
-        return {
-            **{"deployment_name": self.deployment_name},
-            **super()._identifying_params,
-        }
-
-    @property
-    def _invocation_params(self) -> dict[str, Any]:
-        openai_params = {"model": self.deployment_name}
-        return {**openai_params, **super()._invocation_params}
-
-    @property
-    def _llm_type(self) -> str:
-        """Return type of llm."""
-        return "azure"
-
-    @property
-    def lc_attributes(self) -> dict[str, Any]:
-        return {
-            "openai_api_type": self.openai_api_type,
-            "openai_api_version": self.openai_api_version,
-        }
diff --git a/swarms/models/base_vision_model.py b/swarms/models/base_vision_model.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/swarms/models/cohere_chat.py b/swarms/models/cohere_chat.py
deleted file mode 100644
index 98cc30bb..00000000
--- a/swarms/models/cohere_chat.py
+++ /dev/null
@@ -1,258 +0,0 @@
-import logging
-from typing import Any, Callable, Dict, List, Optional
-
-from langchain.callbacks.manager import (
-    AsyncCallbackManagerForLLMRun,
-    CallbackManagerForLLMRun,
-)
-from langchain.llms.base import LLM
-from langchain.llms.utils import enforce_stop_tokens
-from langchain.load.serializable import Serializable
-from langchain.utils import get_from_dict_or_env
-from pydantic import Extra, Field, root_validator
-from tenacity import (
-    before_sleep_log,
-    retry,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_exponential,
-)
-
-logger = logging.getLogger(__name__)
-
-
-def _create_retry_decorator(llm) -> Callable[[Any], Any]:
-    import cohere
-
-    min_seconds = 4
-    max_seconds = 10
-    # Wait 2^x * 1 second between each retry starting with
-    # 4 seconds, then up to 10 seconds, then 10 seconds afterwards
-    return retry(
-        reraise=True,
-        stop=stop_after_attempt(llm.max_retries),
-        wait=wait_exponential(
-            multiplier=1, min=min_seconds, max=max_seconds
-        ),
-        retry=retry_if_exception_type(cohere.error.CohereError),
-        before_sleep=before_sleep_log(logger, logging.WARNING),
-    )
-
-
-def completion_with_retry(llm, **kwargs: Any) -> Any:
-    """Use tenacity to retry the completion call."""
-    retry_decorator = _create_retry_decorator(llm)
-
-    @retry_decorator
-    def _completion_with_retry(**kwargs: Any) -> Any:
-        return llm.client.generate(**kwargs)
-
-    return _completion_with_retry(**kwargs)
-
-
-def acompletion_with_retry(llm, **kwargs: Any) -> Any:
-    """Use tenacity to retry the completion call."""
-    retry_decorator = _create_retry_decorator(llm)
-
-    @retry_decorator
-    async def _completion_with_retry(**kwargs: Any) -> Any:
-        return await llm.async_client.generate(**kwargs)
-
-    return _completion_with_retry(**kwargs)
-
-
-class BaseCohere(Serializable):
-    """Base class for Cohere models."""
-
-    client: Any  #: :meta private:
-    async_client: Any  #: :meta private:
-    model: Optional[str] = Field(
-        default=None, description="Model name to use."
-    )
-    """Model name to use."""
-
-    temperature: float = 0.75
-    """A non-negative float that tunes the degree of randomness in generation."""
-
-    cohere_api_key: Optional[str] = None
-
-    stop: Optional[List[str]] = None
-
-    streaming: bool = Field(default=False)
-    """Whether to stream the results."""
-
-    user_agent: str = "langchain"
-    """Identifier for the application making the request."""
-
-    @root_validator()
-    def validate_environment(cls, values: Dict) -> Dict:
-        """Validate that api key and python package exists in environment."""
-        try:
-            import cohere
-        except ImportError:
-            raise ImportError(
-                "Could not import cohere python package. "
-                "Please install it with `pip install cohere`."
-            )
-        else:
-            cohere_api_key = get_from_dict_or_env(
-                values, "cohere_api_key", "COHERE_API_KEY"
-            )
-            client_name = values["user_agent"]
-            values["client"] = cohere.Client(
-                cohere_api_key, client_name=client_name
-            )
-            values["async_client"] = cohere.AsyncClient(
-                cohere_api_key, client_name=client_name
-            )
-        return values
-
-
-class Cohere(LLM, BaseCohere):
-    """Cohere large language models.
-
-    To use, you should have the ``cohere`` python package installed, and the
-    environment variable ``COHERE_API_KEY`` set with your API key, or pass
-    it as a named parameter to the constructor.
-
-    Example:
-        .. code-block:: python
-
-            from langchain.llms import Cohere
-
-            cohere = Cohere(model="gptd-instruct-tft", cohere_api_key="my-api-key")
-    """
-
-    max_tokens: int = 256
-    """Denotes the number of tokens to predict per generation."""
-
-    k: int = 0
-    """Number of most likely tokens to consider at each step."""
-
-    p: int = 1
-    """Total probability mass of tokens to consider at each step."""
-
-    frequency_penalty: float = 0.0
-    """Penalizes repeated tokens according to frequency. Between 0 and 1."""
-
-    presence_penalty: float = 0.0
-    """Penalizes repeated tokens. Between 0 and 1."""
-
-    truncate: Optional[str] = None
-    """Specify how the client handles inputs longer than the maximum token
-    length: Truncate from START, END or NONE"""
-
-    max_retries: int = 10
-    """Maximum number of retries to make when generating."""
-
-    class Config:
-        """Configuration for this pydantic object."""
-
-        extra = Extra.forbid
-
-    @property
-    def _default_params(self) -> Dict[str, Any]:
-        """Get the default parameters for calling Cohere API."""
-        return {
-            "max_tokens": self.max_tokens,
-            "temperature": self.temperature,
-            "k": self.k,
-            "p": self.p,
-            "frequency_penalty": self.frequency_penalty,
-            "presence_penalty": self.presence_penalty,
-            "truncate": self.truncate,
-        }
-
-    @property
-    def lc_secrets(self) -> Dict[str, str]:
-        return {"cohere_api_key": "COHERE_API_KEY"}
-
-    @property
-    def _identifying_params(self) -> Dict[str, Any]:
-        """Get the identifying parameters."""
-        return {**{"model": self.model}, **self._default_params}
-
-    @property
-    def _llm_type(self) -> str:
-        """Return type of llm."""
-        return "cohere"
-
-    def _invocation_params(
-        self, stop: Optional[List[str]], **kwargs: Any
-    ) -> dict:
-        params = self._default_params
-        if self.stop is not None and stop is not None:
-            raise ValueError(
-                "`stop` found in both the input and default params."
-            )
-        elif self.stop is not None:
-            params["stop_sequences"] = self.stop
-        else:
-            params["stop_sequences"] = stop
-        return {**params, **kwargs}
-
-    def _process_response(
-        self, response: Any, stop: Optional[List[str]]
-    ) -> str:
-        text = response.generations[0].text
-        # If stop tokens are provided, Cohere's endpoint returns them.
-        # In order to make this consistent with other endpoints, we strip them.
-        if stop:
-            text = enforce_stop_tokens(text, stop)
-        return text
-
-    def _call(
-        self,
-        prompt: str,
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> str:
-        """Call out to Cohere's generate endpoint.
-
-        Args:
-            prompt: The prompt to pass into the model.
-            stop: Optional list of stop words to use when generating.
-
-        Returns:
-            The string generated by the model.
-
-        Example:
-            .. code-block:: python
-
-                response = cohere("Tell me a joke.")
-        """
-        params = self._invocation_params(stop, **kwargs)
-        response = completion_with_retry(
-            self, model=self.model, prompt=prompt, **params
-        )
-        _stop = params.get("stop_sequences")
-        return self._process_response(response, _stop)
-
-    async def _acall(
-        self,
-        prompt: str,
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> str:
-        """Async call out to Cohere's generate endpoint.
-
-        Args:
-            prompt: The prompt to pass into the model.
-            stop: Optional list of stop words to use when generating.
-
-        Returns:
-            The string generated by the model.
-
-        Example:
-            .. code-block:: python
-
-                response = await cohere("Tell me a joke.")
-        """
-        params = self._invocation_params(stop, **kwargs)
-        response = await acompletion_with_retry(
-            self, model=self.model, prompt=prompt, **params
-        )
-        _stop = params.get("stop_sequences")
-        return self._process_response(response, _stop)
diff --git a/swarms/models/diffusers_general.py b/swarms/models/diffusers_general.py
deleted file mode 100644
index 9d7ea250..00000000
--- a/swarms/models/diffusers_general.py
+++ /dev/null
@@ -1 +0,0 @@
-# Base implementation for the diffusers library
diff --git a/swarms/models/eleven_labs.py b/swarms/models/eleven_labs.py
deleted file mode 100644
index 759c65bb..00000000
--- a/swarms/models/eleven_labs.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import tempfile
-from enum import Enum
-from typing import Any, Dict, Union
-
-from langchain.utils import get_from_dict_or_env
-from pydantic import model_validator
-
-from swarms.tools.tool import BaseTool
-
-
-def _import_elevenlabs() -> Any:
-    try:
-        import elevenlabs
-    except ImportError as e:
-        raise ImportError(
-            "Cannot import elevenlabs, please install `pip install"
-            " elevenlabs`."
-        ) from e
-    return elevenlabs
-
-
-class ElevenLabsModel(str, Enum):
-    """Models available for Eleven Labs Text2Speech."""
-
-    MULTI_LINGUAL = "eleven_multilingual_v1"
-    MONO_LINGUAL = "eleven_monolingual_v1"
-
-
-class ElevenLabsText2SpeechTool(BaseTool):
-    """Tool that queries the Eleven Labs Text2Speech API.
-
-    In order to set this up, follow instructions at:
-    https://docs.elevenlabs.io/welcome/introduction
-
-    Attributes:
-        model (ElevenLabsModel): The model to use for text to speech.
-            Defaults to ElevenLabsModel.MULTI_LINGUAL.
-        name (str): The name of the tool. Defaults to "eleven_labs_text2speech".
-        description (str): The description of the tool.
-            Defaults to "A wrapper around Eleven Labs Text2Speech. Useful for when you need to convert text to speech. It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi."
-
-
-    Usage:
-    >>> from swarms.models import ElevenLabsText2SpeechTool
-    >>> stt = ElevenLabsText2SpeechTool()
-    >>> speech_file = stt.run("Hello world!")
-    >>> stt.play(speech_file)
-    >>> stt.stream_speech("Hello world!")
-
-    """
-
-    model: Union[ElevenLabsModel, str] = ElevenLabsModel.MULTI_LINGUAL
-
-    name: str = "eleven_labs_text2speech"
-    description: str = (
-        "A wrapper around Eleven Labs Text2Speech. Useful for when"
-        " you need to convert text to speech. It supports multiple"
-        " languages, including English, German, Polish, Spanish,"
-        " Italian, French, Portuguese, and Hindi. "
-    )
-
-    @model_validator(mode="before")
-    @classmethod
-    def validate_environment(cls, values: Dict) -> Dict:
-        """Validate that api key exists in environment."""
-        _ = get_from_dict_or_env(
-            values, "eleven_api_key", "ELEVEN_API_KEY"
-        )
-
-        return values
-
-    def _run(
-        self,
-        task: str,
-    ) -> str:
-        """Use the tool."""
-        elevenlabs = _import_elevenlabs()
-        try:
-            speech = elevenlabs.generate(text=task, model=self.model)
-            with tempfile.NamedTemporaryFile(
-                mode="bx", suffix=".wav", delete=False
-            ) as f:
-                f.write(speech)
-            return f.name
-        except Exception as e:
-            raise RuntimeError(
-                f"Error while running ElevenLabsText2SpeechTool: {e}"
-            )
-
-    def play(self, speech_file: str) -> None:
-        """Play the text as speech."""
-        elevenlabs = _import_elevenlabs()
-        with open(speech_file, mode="rb") as f:
-            speech = f.read()
-
-        elevenlabs.play(speech)
-
-    def stream_speech(self, query: str) -> None:
-        """Stream the text as speech as it is generated.
-        Play the text in your speakers."""
-        elevenlabs = _import_elevenlabs()
-        speech_stream = elevenlabs.generate(
-            text=query, model=self.model, stream=True
-        )
-        elevenlabs.stream(speech_stream)
-
-    def save(self, speech_file: str, path: str) -> None:
-        """Save the speech file to a path."""
-        raise NotImplementedError(
-            "Saving not implemented for this tool."
-        )
-
-    def __str__(self):
-        return "ElevenLabsText2SpeechTool"
diff --git a/swarms/models/inference_engine.py b/swarms/models/inference_engine.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/swarms/models/mistral_function_caller.py b/swarms/models/mistral_function_caller.py
deleted file mode 100644
index f3b0d32f..00000000
--- a/swarms/models/mistral_function_caller.py
+++ /dev/null
@@ -1 +0,0 @@
-""""""
diff --git a/swarms/models/model_registry.py b/swarms/models/model_registry.py
deleted file mode 100644
index ee5bab81..00000000
--- a/swarms/models/model_registry.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import inspect
-import pkgutil
-
-
-class ModelRegistry:
-    """
-    A registry for storing and querying models.
-
-    Attributes:
-        models (dict): A dictionary of model names and corresponding model classes.
-
-    Methods:
-        __init__(): Initializes the ModelRegistry object and retrieves all available models.
-        _get_all_models(): Retrieves all available models from the models package.
-        query(text): Queries the models based on the given text and returns a dictionary of matching models.
-    """
-
-    def __init__(self):
-        self.models = self._get_all_models()
-
-    def _get_all_models(self):
-        """
-        Retrieves all available models from the models package.
-
-        Returns:
-            dict: A dictionary of model names and corresponding model classes.
-        """
-        models = {}
-        for importer, modname, ispkg in pkgutil.iter_modules(
-            models.__path__
-        ):
-            module = importer.find_module(modname).load_module(
-                modname
-            )
-            for name, obj in inspect.getmembers(module):
-                if inspect.isclass(obj):
-                    models[name] = obj
-        return models
-
-    def query(self, text):
-        """
-        Queries the models based on the given text and returns a dictionary of matching models.
-
-        Args:
-            text (str): The text to search for in the model names.
-
-        Returns:
-            dict: A dictionary of matching model names and corresponding model classes.
-        """
-        return {
-            name: model
-            for name, model in self.models.items()
-            if text in name
-        }
-
-    def run_model(
-        self, model_name: str, task: str, img: str, *args, **kwargs
-    ):
-        """
-        Runs the specified model for the given task and image.
-
-        Args:
-            model_name (str): The name of the model to run.
-            task (str): The task to perform using the model.
-            img (str): The image to process.
-            *args: Additional positional arguments to pass to the model's run method.
-            **kwargs: Additional keyword arguments to pass to the model's run method.
-
-        Returns:
-            The result of running the model.
-
-        Raises:
-            ValueError: If the specified model is not found in the model registry.
-        """
-        if model_name not in self.models:
-            raise ValueError(f"Model {model_name} not found")
-
-        # Get the model
-        model = self.models[model_name]
-
-        # Run the model
-        return model.run(task, img, *args, **kwargs)
diff --git a/swarms/models/modelscope_llm.py b/swarms/models/modelscope_llm.py
deleted file mode 100644
index 03cd978d..00000000
--- a/swarms/models/modelscope_llm.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from typing import Optional
-
-from modelscope import AutoModelForCausalLM, AutoTokenizer
-
-from swarms.models.base_llm import AbstractLLM
-
-
-class ModelScopeAutoModel(AbstractLLM):
-    """
-    ModelScopeAutoModel is a class that represents a model for generating text using the ModelScope framework.
-
-    Args:
-        model_name (str): The name or path of the pre-trained model.
-        tokenizer_name (str, optional): The name or path of the tokenizer to use. Defaults to None.
-        device (str, optional): The device to use for model inference. Defaults to "cuda".
-        device_map (str, optional): The device mapping for multi-GPU setups. Defaults to "auto".
-        max_new_tokens (int, optional): The maximum number of new tokens to generate. Defaults to 500.
-        skip_special_tokens (bool, optional): Whether to skip special tokens during decoding. Defaults to True.
-        *args: Additional positional arguments.
-        **kwargs: Additional keyword arguments.
-
-    Attributes:
-        tokenizer (AutoTokenizer): The tokenizer used for tokenizing input text.
-        model (AutoModelForCausalLM): The pre-trained model for generating text.
-
-    Methods:
-        run(task, *args, **kwargs): Generates text based on the given task.
-
-    Examples:
-    >>> from swarms.models import ModelScopeAutoModel
-    >>> mp = ModelScopeAutoModel(
-    ...     model_name="gpt2",
-    ... )
-    >>> mp.run("Generate a 10,000 word blog on health and wellness.")
-    """
-
-    def __init__(
-        self,
-        model_name: str,
-        tokenizer_name: Optional[str] = None,
-        device: str = "cuda",
-        device_map: str = "auto",
-        max_new_tokens: int = 500,
-        skip_special_tokens: bool = True,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.model_name = model_name
-        self.tokenizer_name = tokenizer_name
-        self.device = device
-        self.device_map = device_map
-        self.max_new_tokens = max_new_tokens
-        self.skip_special_tokens = skip_special_tokens
-
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            self.tokenizer_name
-        )
-        self.model = AutoModelForCausalLM.from_pretrained(
-            self.model_name, device_map=device_map * args, **kwargs
-        )
-
-    def run(self, task: str, *args, **kwargs):
-        """
-        Run the model on the given task.
-
-        Parameters:
-            task (str): The input task to be processed.
-            *args: Additional positional arguments.
-            **kwargs: Additional keyword arguments.
-
-        Returns:
-            str: The generated output from the model.
-        """
-        text = self.tokenizer(task, return_tensors="pt")
-
-        outputs = self.model.generate(
-            **text, max_new_tokens=self.max_new_tokens, **kwargs
-        )
-
-        return self.tokenizer.decode(
-            outputs[0], skip_special_tokens=self.skip_special_tokens
-        )
diff --git a/swarms/models/modelscope_pipeline.py b/swarms/models/modelscope_pipeline.py
deleted file mode 100644
index ed75b33b..00000000
--- a/swarms/models/modelscope_pipeline.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from modelscope.pipelines import pipeline
-
-from swarms.models.base_llm import AbstractLLM
-
-
-class ModelScopePipeline(AbstractLLM):
-    """
-    A class representing a ModelScope pipeline.
-
-    Args:
-        type_task (str): The type of task for the pipeline.
-        model_name (str): The name of the model for the pipeline.
-        *args: Variable length argument list.
-        **kwargs: Arbitrary keyword arguments.
-
-    Attributes:
-        type_task (str): The type of task for the pipeline.
-        model_name (str): The name of the model for the pipeline.
-        model: The pipeline model.
-
-    Methods:
-        run: Runs the pipeline for a given task.
-
-    Examples:
-    >>> from swarms.models import ModelScopePipeline
-    >>> mp = ModelScopePipeline(
-    ...     type_task="text-generation",
-    ...     model_name="gpt2",
-    ... )
-    >>> mp.run("Generate a 10,000 word blog on health and wellness.")
-
-    """
-
-    def __init__(
-        self, type_task: str, model_name: str, *args, **kwargs
-    ):
-        super().__init__(*args, **kwargs)
-        self.type_task = type_task
-        self.model_name = model_name
-
-        self.model = pipeline(
-            self.type_task, model=self.model_name, *args, **kwargs
-        )
-
-    def run(self, task: str, *args, **kwargs):
-        """
-        Runs the pipeline for a given task.
-
-        Args:
-            task (str): The task to be performed by the pipeline.
-            *args: Variable length argument list.
-            **kwargs: Arbitrary keyword arguments.
-
-        Returns:
-            The result of running the pipeline on the given task.
-
-        """
-        return self.model(task, *args, **kwargs)
diff --git a/swarms/models/openai_function_caller.py b/swarms/models/openai_function_caller.py
deleted file mode 100644
index e6822793..00000000
--- a/swarms/models/openai_function_caller.py
+++ /dev/null
@@ -1,262 +0,0 @@
-from typing import Any, Dict, List, Optional, Union
-
-import openai
-import requests
-from pydantic import BaseModel, validator
-from tenacity import (
-    retry,
-    stop_after_attempt,
-    wait_random_exponential,
-)
-from termcolor import colored
-
-
-class FunctionSpecification(BaseModel):
-    """
-    Defines the specification for a function including its parameters and metadata.
-
-    Attributes:
-    -----------
-    name: str
-        The name of the function.
-    description: str
-        A brief description of what the function does.
-    parameters: Dict[str, Any]
-        The parameters required by the function, with their details.
-    required: Optional[List[str]]
-        List of required parameter names.
-
-    Methods:
-    --------
-    validate_params(params: Dict[str, Any]) -> None:
-        Validates the parameters against the function's specification.
-
-
-
-    Example:
-
-    # Example Usage
-    def get_current_weather(location: str, format: str) -> str:
-    ``'
-    Example function to get current weather.
-
-    Args:
-        location (str): The city and state, e.g. San Francisco, CA.
-        format (str): The temperature unit, e.g. celsius or fahrenheit.
-
-    Returns:
-        str: Weather information.
-    '''
-    # Implementation goes here
-    return "Sunny, 23°C"
-
-
-    weather_function_spec = FunctionSpecification(
-        name="get_current_weather",
-        description="Get the current weather",
-        parameters={
-            "location": {"type": "string", "description": "The city and state"},
-            "format": {
-                "type": "string",
-                "enum": ["celsius", "fahrenheit"],
-                "description": "The temperature unit",
-            },
-        },
-        required=["location", "format"],
-    )
-
-    # Validating parameters for the function
-    params = {"location": "San Francisco, CA", "format": "celsius"}
-    weather_function_spec.validate_params(params)
-
-    # Calling the function
-    print(get_current_weather(**params))
-    """
-
-    name: str
-    description: str
-    parameters: Dict[str, Any]
-    required: Optional[List[str]] = None
-
-    @validator("parameters")
-    def check_parameters(cls, params):
-        if not isinstance(params, dict):
-            raise ValueError("Parameters must be a dictionary.")
-        return params
-
-    def validate_params(self, params: Dict[str, Any]) -> None:
-        """
-        Validates the parameters against the function's specification.
-
-        Args:
-            params (Dict[str, Any]): The parameters to validate.
-
-        Raises:
-            ValueError: If any required parameter is missing or if any parameter is invalid.
-        """
-        for key, value in params.items():
-            if key in self.parameters:
-                self.parameters[key]
-                # Perform specific validation based on param_spec
-                # This can include type checking, range validation, etc.
-            else:
-                raise ValueError(f"Unexpected parameter: {key}")
-
-        for req_param in self.required or []:
-            if req_param not in params:
-                raise ValueError(
-                    f"Missing required parameter: {req_param}"
-                )
-
-
-class OpenAIFunctionCaller:
-    def __init__(
-        self,
-        openai_api_key: str,
-        model: str = "text-davinci-003",
-        max_tokens: int = 3000,
-        temperature: float = 0.5,
-        top_p: float = 1.0,
-        n: int = 1,
-        stream: bool = False,
-        stop: Optional[str] = None,
-        echo: bool = False,
-        frequency_penalty: float = 0.0,
-        presence_penalty: float = 0.0,
-        logprobs: Optional[int] = None,
-        best_of: int = 1,
-        logit_bias: Dict[str, float] = None,
-        user: str = None,
-        messages: List[Dict] = None,
-        timeout_sec: Union[float, None] = None,
-    ):
-        self.openai_api_key = openai_api_key
-        self.model = model
-        self.max_tokens = max_tokens
-        self.temperature = temperature
-        self.top_p = top_p
-        self.n = n
-        self.stream = stream
-        self.stop = stop
-        self.echo = echo
-        self.frequency_penalty = frequency_penalty
-        self.presence_penalty = presence_penalty
-        self.logprobs = logprobs
-        self.best_of = best_of
-        self.logit_bias = logit_bias
-        self.user = user
-        self.messages = messages if messages is not None else []
-        self.timeout_sec = timeout_sec
-
-    def add_message(self, role: str, content: str):
-        self.messages.append({"role": role, "content": content})
-
-    @retry(
-        wait=wait_random_exponential(multiplier=1, max=40),
-        stop=stop_after_attempt(3),
-    )
-    def chat_completion_request(
-        self,
-        messages,
-        tools=None,
-        tool_choice=None,
-    ):
-        headers = {
-            "Content-Type": "application/json",
-            "Authorization": "Bearer " + openai.api_key,
-        }
-        json_data = {"model": self.model, "messages": messages}
-        if tools is not None:
-            json_data.update({"tools": tools})
-        if tool_choice is not None:
-            json_data.update({"tool_choice": tool_choice})
-        try:
-            response = requests.post(
-                "https://api.openai.com/v1/chat/completions",
-                headers=headers,
-                json=json_data,
-            )
-            return response
-        except Exception as e:
-            print("Unable to generate ChatCompletion response")
-            print(f"Exception: {e}")
-            return e
-
-    def pretty_print_conversation(self, messages):
-        role_to_color = {
-            "system": "red",
-            "user": "green",
-            "assistant": "blue",
-            "tool": "magenta",
-        }
-
-        for message in messages:
-            if message["role"] == "system":
-                print(
-                    colored(
-                        f"system: {message['content']}\n",
-                        role_to_color[message["role"]],
-                    )
-                )
-            elif message["role"] == "user":
-                print(
-                    colored(
-                        f"user: {message['content']}\n",
-                        role_to_color[message["role"]],
-                    )
-                )
-            elif message["role"] == "assistant" and message.get(
-                "function_call"
-            ):
-                print(
-                    colored(
-                        f"assistant: {message['function_call']}\n",
-                        role_to_color[message["role"]],
-                    )
-                )
-            elif message["role"] == "assistant" and not message.get(
-                "function_call"
-            ):
-                print(
-                    colored(
-                        f"assistant: {message['content']}\n",
-                        role_to_color[message["role"]],
-                    )
-                )
-            elif message["role"] == "tool":
-                print(
-                    colored(
-                        (
-                            f"function ({message['name']}):"
-                            f" {message['content']}\n"
-                        ),
-                        role_to_color[message["role"]],
-                    )
-                )
-
-    def call(self, task: str, *args, **kwargs) -> Dict:
-        return openai.Completion.create(
-            engine=self.model,
-            prompt=task,
-            max_tokens=self.max_tokens,
-            temperature=self.temperature,
-            top_p=self.top_p,
-            n=self.n,
-            stream=self.stream,
-            stop=self.stop,
-            echo=self.echo,
-            frequency_penalty=self.frequency_penalty,
-            presence_penalty=self.presence_penalty,
-            logprobs=self.logprobs,
-            best_of=self.best_of,
-            logit_bias=self.logit_bias,
-            user=self.user,
-            messages=self.messages,
-            timeout_sec=self.timeout_sec,
-            *args,
-            **kwargs,
-        )
-
-    def run(self, task: str, *args, **kwargs) -> str:
-        response = self.call(task, *args, **kwargs)
-        return response["choices"][0]["text"].strip()
diff --git a/swarms/models/openai_models.py b/swarms/models/openai_models.py
deleted file mode 100644
index f5273e88..00000000
--- a/swarms/models/openai_models.py
+++ /dev/null
@@ -1,1165 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-import functools
-import logging
-import sys
-from importlib.metadata import version
-from typing import (
-    AbstractSet,
-    Any,
-    AsyncIterator,
-    Callable,
-    Collection,
-    Iterator,
-    Literal,
-    Mapping,
-)
-
-from langchain.callbacks.manager import (
-    AsyncCallbackManagerForLLMRun,
-    CallbackManagerForLLMRun,
-)
-from langchain.llms.base import BaseLLM
-from langchain.pydantic_v1 import Field, root_validator
-from langchain.schema import Generation, LLMResult
-from langchain.schema.output import GenerationChunk
-from langchain.utils import (
-    get_from_dict_or_env,
-    get_pydantic_field_names,
-)
-from langchain.utils.utils import build_extra_kwargs
-from packaging.version import parse
-from tenacity import (
-    RetryCallState,
-    before_sleep_log,
-    retry,
-    retry_base,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_exponential,
-)
-
-logger = logging.getLogger(__name__)
-
-
-@functools.lru_cache
-def _log_error_once(msg: str) -> None:
-    """Log an error once."""
-    logger.error(msg)
-
-
-def create_base_retry_decorator(
-    error_types: list[type[BaseException]],
-    max_retries: int = 1,
-    run_manager: (
-        AsyncCallbackManagerForLLMRun | CallbackManagerForLLMRun
-    )
-    | None = None,
-) -> Callable[[Any], Any]:
-    """Create a retry decorator for a given LLM and provided list of error types."""
-
-    _logging = before_sleep_log(logger, logging.WARNING)
-
-    def _before_sleep(retry_state: RetryCallState) -> None:
-        _logging(retry_state)
-        if run_manager:
-            if isinstance(run_manager, AsyncCallbackManagerForLLMRun):
-                coro = run_manager.on_retry(retry_state)
-                try:
-                    loop = asyncio.get_event_loop()
-                    if loop.is_running():
-                        loop.create_task(coro)
-                    else:
-                        asyncio.run(coro)
-                except Exception as e:
-                    _log_error_once(f"Error in on_retry: {e}")
-            else:
-                run_manager.on_retry(retry_state)
-        return None
-
-    min_seconds = 4
-    max_seconds = 10
-    # Wait 2^x * 1 second between each retry starting with
-    # 4 seconds, then up to 10 seconds, then 10 seconds afterwards
-    retry_instance: retry_base = retry_if_exception_type(
-        error_types[0]
-    )
-    for error in error_types[1:]:
-        retry_instance = retry_instance | retry_if_exception_type(
-            error
-        )
-    return retry(
-        reraise=True,
-        stop=stop_after_attempt(max_retries),
-        wait=wait_exponential(
-            multiplier=1, min=min_seconds, max=max_seconds
-        ),
-        retry=retry_instance,
-        before_sleep=_before_sleep,
-    )
-
-
-def is_openai_v1() -> bool:
-    _version = parse(version("openai"))
-    return _version.major >= 1
-
-
-def update_token_usage(
-    keys: set[str],
-    response: dict[str, Any],
-    token_usage: dict[str, Any],
-) -> None:
-    """Update token usage."""
-    _keys_to_use = keys.intersection(response["usage"])
-    for _key in _keys_to_use:
-        if _key not in token_usage:
-            token_usage[_key] = response["usage"][_key]
-        else:
-            token_usage[_key] += response["usage"][_key]
-
-
-def _stream_response_to_generation_chunk(
-    stream_response: dict[str, Any],
-) -> GenerationChunk:
-    """Convert a stream response to a generation chunk."""
-    return GenerationChunk(
-        text=stream_response["choices"][0]["text"],
-        generation_info=dict(
-            finish_reason=stream_response["choices"][0].get(
-                "finish_reason", None
-            ),
-            logprobs=stream_response["choices"][0].get(
-                "logprobs", None
-            ),
-        ),
-    )
-
-
-def _update_response(
-    response: dict[str, Any], stream_response: dict[str, Any]
-) -> None:
-    """Update response from the stream response."""
-    response["choices"][0]["text"] += stream_response["choices"][0][
-        "text"
-    ]
-    response["choices"][0]["finish_reason"] = stream_response[
-        "choices"
-    ][0].get("finish_reason", None)
-    response["choices"][0]["logprobs"] = stream_response["choices"][
-        0
-    ]["logprobs"]
-
-
-def _streaming_response_template() -> dict[str, Any]:
-    return {
-        "choices": [
-            {
-                "text": "",
-                "finish_reason": None,
-                "logprobs": None,
-            }
-        ]
-    }
-
-
-def _create_retry_decorator(
-    llm: BaseOpenAI | OpenAIChat,
-    run_manager: (
-        AsyncCallbackManagerForLLMRun | CallbackManagerForLLMRun
-    )
-    | None = None,
-) -> Callable[[Any], Any]:
-    import openai
-
-    errors = [
-        openai.error.Timeout,
-        openai.error.APIError,
-        openai.error.APIConnectionError,
-        openai.error.RateLimitError,
-        openai.error.ServiceUnavailableError,
-    ]
-    return create_base_retry_decorator(
-        error_types=errors,
-        max_retries=llm.max_retries,
-        run_manager=run_manager,
-    )
-
-
-def completion_with_retry(
-    llm: BaseOpenAI | OpenAIChat,
-    run_manager: CallbackManagerForLLMRun | None = None,
-    **kwargs: Any,
-) -> Any:
-    """Use tenacity to retry the completion call."""
-    retry_decorator = _create_retry_decorator(
-        llm, run_manager=run_manager
-    )
-
-    @retry_decorator
-    def _completion_with_retry(**kwargs: Any) -> Any:
-        return llm.client.create(**kwargs)
-
-    return _completion_with_retry(**kwargs)
-
-
-async def acompletion_with_retry(
-    llm: BaseOpenAI | OpenAIChat,
-    run_manager: AsyncCallbackManagerForLLMRun | None = None,
-    **kwargs: Any,
-) -> Any:
-    """Use tenacity to retry the async completion call."""
-    retry_decorator = _create_retry_decorator(
-        llm, run_manager=run_manager
-    )
-
-    @retry_decorator
-    async def _completion_with_retry(**kwargs: Any) -> Any:
-        # Use OpenAI's async api https://github.com/openai/openai-python#async-api
-        return await llm.client.acreate(**kwargs)
-
-    return await _completion_with_retry(**kwargs)
-
-
-class BaseOpenAI(BaseLLM):
-    """Base OpenAI large language model class."""
-
-    @property
-    def lc_secrets(self) -> dict[str, str]:
-        return {"openai_api_key": "OPENAI_API_KEY"}
-
-    @property
-    def lc_attributes(self) -> dict[str, Any]:
-        attributes: dict[str, Any] = {}
-        if self.openai_api_base != "":
-            attributes["openai_api_base"] = self.openai_api_base
-
-        if self.openai_organization != "":
-            attributes["openai_organization"] = (
-                self.openai_organization
-            )
-
-        if self.openai_proxy != "":
-            attributes["openai_proxy"] = self.openai_proxy
-
-        return attributes
-
-    @classmethod
-    def is_lc_serializable(cls) -> bool:
-        return True
-
-    client: Any = None  #: :meta private:
-    model_name: str = Field(
-        default="gpt-4-1106-preview", alias="model"
-    )
-    """Model name to use."""
-    temperature: float = 0.7
-    """What sampling temperature to use."""
-    max_tokens: int = 256
-    """The maximum number of tokens to generate in the completion.
-    -1 returns as many tokens as possible given the prompt and
-    the models maximal context size."""
-    top_p: float = 1
-    """Total probability mass of tokens to consider at each step."""
-    frequency_penalty: float = 0
-    """Penalizes repeated tokens according to frequency."""
-    presence_penalty: float = 0
-    """Penalizes repeated tokens."""
-    n: int = 1
-    """How many completions to generate for each prompt."""
-    best_of: int = 1
-    """Generates best_of completions server-side and returns the "best"."""
-    model_kwargs: dict[str, Any] = Field(default_factory=dict)
-    """Holds any model parameters valid for `create` call not explicitly specified."""
-    openai_api_key: str | None = None  # | None = None
-    openai_api_base: str | None = None
-    openai_organization: str | None = None
-    # to support explicit proxy for OpenAI
-    openai_proxy: str | None = None
-    batch_size: int = 20
-    """Batch size to use when passing multiple documents to generate."""
-    request_timeout: float | tuple[float, float] | None = None
-    """Timeout for requests to OpenAI completion API. Default is 600 seconds."""
-    logit_bias: dict[str, float] = Field(default_factory=dict)
-    """Adjust the probability of specific tokens being generated."""
-    max_retries: int = 6
-    """Maximum number of retries to make when generating."""
-    streaming: bool = False
-    """Whether to stream the results or not."""
-    allowed_special: Literal["all"] | AbstractSet[str] = set()
-    """Set of special tokens that are allowed。"""
-    disallowed_special: Literal["all"] | Collection[str] = "all"
-    """Set of special tokens that are not allowed。"""
-    tiktoken_model_name: str | None = None
-    """The model name to pass to tiktoken when using this class.
-    Tiktoken is used to count the number of tokens in documents to constrain
-    them to be under a certain limit. By default, when set to None, this will
-    be the same as the embedding model name. However, there are some cases
-    where you may want to use this Embedding class with a model name not
-    supported by tiktoken. This can include when using Azure embeddings or
-    when using one of the many model providers that expose an OpenAI-like
-    API but with different models. In those cases, in order to avoid erroring
-    when tiktoken is called, you can specify a model name to use here."""
-
-    def __new__(cls, **data: Any) -> OpenAIChat | BaseOpenAI:  # type: ignore
-        """Initialize the OpenAI object."""
-        data.get("model_name", "")
-        return super().__new__(cls)
-
-    class Config:
-        """Configuration for this pydantic object."""
-
-        allow_population_by_field_name = True
-
-    @root_validator(pre=True)
-    def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]:
-        """Build extra kwargs from additional params that were passed in."""
-        all_required_field_names = get_pydantic_field_names(cls)
-        extra = values.get("model_kwargs", {})
-        values["model_kwargs"] = build_extra_kwargs(
-            extra, values, all_required_field_names
-        )
-        return values
-
-    @root_validator()
-    def validate_environment(cls, values: dict) -> dict:
-        """Validate that api key and python package exists in environment."""
-        values["openai_api_key"] = get_from_dict_or_env(
-            values, "openai_api_key", "OPENAI_API_KEY"
-        )
-        values["openai_api_base"] = get_from_dict_or_env(
-            values,
-            "openai_api_base",
-            "OPENAI_API_BASE",
-            default="",
-        )
-        values["openai_proxy"] = get_from_dict_or_env(
-            values,
-            "openai_proxy",
-            "OPENAI_PROXY",
-            default="",
-        )
-        values["openai_organization"] = get_from_dict_or_env(
-            values,
-            "openai_organization",
-            "OPENAI_ORGANIZATION",
-            default="",
-        )
-        try:
-            import openai
-
-            values["client"] = openai.Completion
-        except ImportError:
-            raise ImportError(
-                "Could not import openai python package. "
-                "Please install it with `pip install openai`."
-            )
-        if values["streaming"] and values["n"] > 1:
-            raise ValueError("Cannot stream results when n > 1.")
-        if values["streaming"] and values["best_of"] > 1:
-            raise ValueError(
-                "Cannot stream results when best_of > 1."
-            )
-        return values
-
-    @property
-    def _default_params(self) -> dict[str, Any]:
-        """Get the default parameters for calling OpenAI API."""
-        normal_params = {
-            "temperature": self.temperature,
-            "max_tokens": self.max_tokens,
-            "top_p": self.top_p,
-            "frequency_penalty": self.frequency_penalty,
-            "presence_penalty": self.presence_penalty,
-            "n": self.n,
-            "request_timeout": self.request_timeout,
-            "logit_bias": self.logit_bias,
-        }
-
-        # Azure gpt-35-turbo doesn't support best_of
-        # don't specify best_of if it is 1
-        if self.best_of > 1:
-            normal_params["best_of"] = self.best_of
-
-        return {**normal_params, **self.model_kwargs}
-
-    def _stream(
-        self,
-        prompt: str,
-        stop: list[str] | None = None,
-        run_manager: CallbackManagerForLLMRun | None = None,
-        **kwargs: Any,
-    ) -> Iterator[GenerationChunk]:
-        params = {**self._invocation_params, **kwargs, "stream": True}
-        self.get_sub_prompts(
-            params, [prompt], stop
-        )  # this mutates params
-        for stream_resp in completion_with_retry(
-            self, prompt=prompt, run_manager=run_manager, **params
-        ):
-            chunk = _stream_response_to_generation_chunk(stream_resp)
-            yield chunk
-            if run_manager:
-                run_manager.on_llm_new_token(
-                    chunk.text,
-                    chunk=chunk,
-                    verbose=self.verbose,
-                    logprobs=(
-                        chunk.generation_info["logprobs"]
-                        if chunk.generation_info
-                        else None
-                    ),
-                )
-
-    async def _astream(
-        self,
-        prompt: str,
-        stop: list[str] | None = None,
-        run_manager: AsyncCallbackManagerForLLMRun | None = None,
-        **kwargs: Any,
-    ) -> AsyncIterator[GenerationChunk]:
-        params = {**self._invocation_params, **kwargs, "stream": True}
-        self.get_sub_prompts(
-            params, [prompt], stop
-        )  # this mutate params
-        async for stream_resp in await acompletion_with_retry(
-            self, prompt=prompt, run_manager=run_manager, **params
-        ):
-            chunk = _stream_response_to_generation_chunk(stream_resp)
-            yield chunk
-            if run_manager:
-                await run_manager.on_llm_new_token(
-                    chunk.text,
-                    chunk=chunk,
-                    verbose=self.verbose,
-                    logprobs=(
-                        chunk.generation_info["logprobs"]
-                        if chunk.generation_info
-                        else None
-                    ),
-                )
-
-    def _generate(
-        self,
-        prompts: list[str],
-        stop: list[str] | None = None,
-        run_manager: CallbackManagerForLLMRun | None = None,
-        **kwargs: Any,
-    ) -> LLMResult:
-        """Call out to OpenAI's endpoint with k unique prompts.
-
-        Args:
-            prompts: The prompts to pass into the model.
-            stop: Optional list of stop words to use when generating.
-
-        Returns:
-            The full LLM output.
-
-        Example:
-            .. code-block:: python
-
-                response = openai.generate(["Tell me a joke."])
-        """
-        # TODO: write a unit test for this
-        params = self._invocation_params
-        params = {**params, **kwargs}
-        sub_prompts = self.get_sub_prompts(params, prompts, stop)
-        choices = []
-        token_usage: dict[str, int] = {}
-        # Get the token usage from the response.
-        # Includes prompt, completion, and total tokens used.
-        _keys = {"completion_tokens", "prompt_tokens", "total_tokens"}
-        for _prompts in sub_prompts:
-            if self.streaming:
-                if len(_prompts) > 1:
-                    raise ValueError(
-                        "Cannot stream results with multiple prompts."
-                    )
-
-                generation: GenerationChunk | None = None
-                for chunk in self._stream(
-                    _prompts[0], stop, run_manager, **kwargs
-                ):
-                    if generation is None:
-                        generation = chunk
-                    else:
-                        generation += chunk
-                assert generation is not None
-                choices.append(
-                    {
-                        "text": generation.text,
-                        "finish_reason": (
-                            generation.generation_info.get(
-                                "finish_reason"
-                            )
-                            if generation.generation_info
-                            else None
-                        ),
-                        "logprobs": (
-                            generation.generation_info.get("logprobs")
-                            if generation.generation_info
-                            else None
-                        ),
-                    }
-                )
-            else:
-                response = completion_with_retry(
-                    self,
-                    prompt=_prompts,
-                    run_manager=run_manager,
-                    **params,
-                )
-                choices.extend(response["choices"])
-                update_token_usage(_keys, response, token_usage)
-        return self.create_llm_result(choices, prompts, token_usage)
-
-    async def _agenerate(
-        self,
-        prompts: list[str],
-        stop: list[str] | None = None,
-        run_manager: AsyncCallbackManagerForLLMRun | None = None,
-        **kwargs: Any,
-    ) -> LLMResult:
-        """Call out to OpenAI's endpoint async with k unique prompts."""
-        params = self._invocation_params
-        params = {**params, **kwargs}
-        sub_prompts = self.get_sub_prompts(params, prompts, stop)
-        choices = []
-        token_usage: dict[str, int] = {}
-        # Get the token usage from the response.
-        # Includes prompt, completion, and total tokens used.
-        _keys = {"completion_tokens", "prompt_tokens", "total_tokens"}
-        for _prompts in sub_prompts:
-            if self.streaming:
-                if len(_prompts) > 1:
-                    raise ValueError(
-                        "Cannot stream results with multiple prompts."
-                    )
-
-                generation: GenerationChunk | None = None
-                async for chunk in self._astream(
-                    _prompts[0], stop, run_manager, **kwargs
-                ):
-                    if generation is None:
-                        generation = chunk
-                    else:
-                        generation += chunk
-                assert generation is not None
-                choices.append(
-                    {
-                        "text": generation.text,
-                        "finish_reason": (
-                            generation.generation_info.get(
-                                "finish_reason"
-                            )
-                            if generation.generation_info
-                            else None
-                        ),
-                        "logprobs": (
-                            generation.generation_info.get("logprobs")
-                            if generation.generation_info
-                            else None
-                        ),
-                    }
-                )
-            else:
-                response = await acompletion_with_retry(
-                    self,
-                    prompt=_prompts,
-                    run_manager=run_manager,
-                    **params,
-                )
-                choices.extend(response["choices"])
-                update_token_usage(_keys, response, token_usage)
-        return self.create_llm_result(choices, prompts, token_usage)
-
-    def get_sub_prompts(
-        self,
-        params: dict[str, Any],
-        prompts: list[str],
-        stop: list[str] | None = None,
-    ) -> list[list[str]]:
-        """Get the sub prompts for llm call."""
-        if stop is not None:
-            if "stop" in params:
-                raise ValueError(
-                    "`stop` found in both the input and default"
-                    " params."
-                )
-            params["stop"] = stop
-        if params["max_tokens"] == -1:
-            if len(prompts) != 1:
-                raise ValueError(
-                    "max_tokens set to -1 not supported for multiple"
-                    " inputs."
-                )
-            params["max_tokens"] = self.max_tokens_for_prompt(
-                prompts[0]
-            )
-        sub_prompts = [
-            prompts[i : i + self.batch_size]
-            for i in range(0, len(prompts), self.batch_size)
-        ]
-        return sub_prompts
-
-    def create_llm_result(
-        self,
-        choices: Any,
-        prompts: list[str],
-        token_usage: dict[str, int],
-    ) -> LLMResult:
-        """Create the LLMResult from the choices and prompts."""
-        generations = []
-        for i, _ in enumerate(prompts):
-            sub_choices = choices[i * self.n : (i + 1) * self.n]
-            generations.append(
-                [
-                    Generation(
-                        text=choice["text"],
-                        generation_info=dict(
-                            finish_reason=choice.get("finish_reason"),
-                            logprobs=choice.get("logprobs"),
-                        ),
-                    )
-                    for choice in sub_choices
-                ]
-            )
-        llm_output = {
-            "token_usage": token_usage,
-            "model_name": self.model_name,
-        }
-        return LLMResult(
-            generations=generations, llm_output=llm_output
-        )
-
-    @property
-    def _invocation_params(self) -> dict[str, Any]:
-        """Get the parameters used to invoke the model."""
-        openai_creds: dict[str, Any] = {
-            "api_key": self.openai_api_key,
-            "api_base": self.openai_api_base,
-            "organization": self.openai_organization,
-        }
-        if self.openai_proxy:
-            import openai
-
-            openai.proxy = {"http": self.openai_proxy, "https": self.openai_proxy}  # type: ignore[assignment]  # noqa: E501
-        return {**openai_creds, **self._default_params}
-
-    @property
-    def _identifying_params(self) -> Mapping[str, Any]:
-        """Get the identifying parameters."""
-        return {
-            **{"model_name": self.model_name},
-            **self._default_params,
-        }
-
-    @property
-    def _llm_type(self) -> str:
-        """Return type of llm."""
-        return "openai"
-
-    def get_token_ids(self, text: str) -> list[int]:
-        """Get the token IDs using the tiktoken package."""
-        # tiktoken NOT supported for Python < 3.8
-        if sys.version_info[1] < 8:
-            return super().get_num_tokens(text)
-        try:
-            import tiktoken
-        except ImportError:
-            raise ImportError(
-                "Could not import tiktoken python package. This is"
-                " needed in order to calculate get_num_tokens. Please"
-                " install it with `pip install tiktoken`."
-            )
-
-        model_name = self.tiktoken_model_name or self.model_name
-        try:
-            enc = tiktoken.encoding_for_model(model_name)
-        except KeyError:
-            logger.warning(
-                "Warning: model not found. Using cl100k_base"
-                " encoding."
-            )
-            model = "cl100k_base"
-            enc = tiktoken.get_encoding(model)
-
-        return enc.encode(
-            text,
-            allowed_special=self.allowed_special,
-            disallowed_special=self.disallowed_special,
-        )
-
-    @staticmethod
-    def modelname_to_contextsize(modelname: str) -> int:
-        """Calculate the maximum number of tokens possible to generate for a model.
-
-        Args:
-            modelname: The modelname we want to know the context size for.
-
-        Returns:
-            The maximum context size
-
-        Example:
-            .. code-block:: python
-
-                max_tokens = openai.modelname_to_contextsize("text-davinci-003")
-        """
-        model_token_mapping = {
-            "gpt-4": 8192,
-            "gpt-4-0314": 8192,
-            "gpt-4-0613": 8192,
-            "gpt-4-32k": 32768,
-            "gpt-4-32k-0314": 32768,
-            "gpt-4-32k-0613": 32768,
-            "gpt-3.5-turbo": 4096,
-            "gpt-3.5-turbo-0301": 4096,
-            "gpt-3.5-turbo-0613": 4096,
-            "gpt-3.5-turbo-16k": 16385,
-            "gpt-3.5-turbo-16k-0613": 16385,
-            "gpt-3.5-turbo-instruct": 4096,
-            "text-ada-001": 2049,
-            "ada": 2049,
-            "text-babbage-001": 2040,
-            "babbage": 2049,
-            "text-curie-001": 2049,
-            "curie": 2049,
-            "davinci": 2049,
-            "text-davinci-003": 4097,
-            "text-davinci-002": 4097,
-            "code-davinci-002": 8001,
-            "code-davinci-001": 8001,
-            "code-cushman-002": 2048,
-            "code-cushman-001": 2048,
-        }
-
-        # handling finetuned models
-        if "ft-" in modelname:
-            modelname = modelname.split(":")[0]
-
-        context_size = model_token_mapping.get(modelname, None)
-
-        if context_size is None:
-            raise ValueError(
-                f"Unknown model: {modelname}. Please provide a valid"
-                " OpenAI model name.Known models are: "
-                + ", ".join(model_token_mapping.keys())
-            )
-
-        return context_size
-
-    @property
-    def max_context_size(self) -> int:
-        """Get max context size for this model."""
-        return self.modelname_to_contextsize(self.model_name)
-
-    def max_tokens_for_prompt(self, prompt: str) -> int:
-        """Calculate the maximum number of tokens possible to generate for a prompt.
-
-        Args:
-            prompt: The prompt to pass into the model.
-
-        Returns:
-            The maximum number of tokens to generate for a prompt.
-
-        Example:
-            .. code-block:: python
-
-                max_tokens = openai.max_token_for_prompt("Tell me a joke.")
-        """
-        num_tokens = self.get_num_tokens(prompt)
-        return self.max_context_size - num_tokens
-
-
-class OpenAI(BaseOpenAI):
-    """OpenAI large language models.
-
-    To use, you should have the ``openai`` python package installed, and the
-    environment variable ``OPENAI_API_KEY`` set with your API key.
-
-    Any parameters that are valid to be passed to the openai.create call can be passed
-    in, even if not explicitly saved on this class.
-
-    Example:
-        .. code-block:: python
-
-            from langchain.llms import OpenAI
-
-            openai = OpenAI(model_name="text-davinci-003")
-    """
-
-    @property
-    def _invocation_params(self) -> dict[str, Any]:
-        return {
-            **{"model": self.model_name},
-            **super()._invocation_params,
-        }
-
-
-class AzureOpenAI(BaseOpenAI):
-    """Azure-specific OpenAI large language models.
-
-    To use, you should have the ``openai`` python package installed, and the
-    environment variable ``OPENAI_API_KEY`` set with your API key.
-
-    Any parameters that are valid to be passed to the openai.create call can be passed
-    in, even if not explicitly saved on this class.
-
-    Example:
-        .. code-block:: python
-
-            from langchain.llms import AzureOpenAI
-
-            openai = AzureOpenAI(model_name="text-davinci-003")
-    """
-
-    deployment_name: str = ""
-    """Deployment name to use."""
-    openai_api_type: str = ""
-    openai_api_version: str = ""
-
-    @root_validator()
-    def validate_azure_settings(cls, values: dict) -> dict:
-        values["openai_api_version"] = get_from_dict_or_env(
-            values,
-            "openai_api_version",
-            "OPENAI_API_VERSION",
-        )
-        values["openai_api_type"] = get_from_dict_or_env(
-            values, "openai_api_type", "OPENAI_API_TYPE", "azure"
-        )
-        return values
-
-    @property
-    def _identifying_params(self) -> Mapping[str, Any]:
-        return {
-            **{"deployment_name": self.deployment_name},
-            **super()._identifying_params,
-        }
-
-    @property
-    def _invocation_params(self) -> dict[str, Any]:
-        openai_params = {
-            "engine": self.deployment_name,
-            "api_type": self.openai_api_type,
-            "api_version": self.openai_api_version,
-        }
-        return {**openai_params, **super()._invocation_params}
-
-    @property
-    def _llm_type(self) -> str:
-        """Return type of llm."""
-        return "azure"
-
-    @property
-    def lc_attributes(self) -> dict[str, Any]:
-        return {
-            "openai_api_type": self.openai_api_type,
-            "openai_api_version": self.openai_api_version,
-        }
-
-
-class OpenAIChat(BaseLLM):
-    """OpenAI Chat large language models.
-
-    To use, you should have the ``openai`` python package installed, and the
-    environment variable ``OPENAI_API_KEY`` set with your API key.
-
-    Any parameters that are valid to be passed to the openai.create call can be passed
-    in, even if not explicitly saved on this class.
-
-    Args:
-
-        model_name: The model name to use.
-        model_kwargs: Any additional kwargs to pass to the model.
-        openai_api_key: The OpenAI API key to use.
-        openai_api_base: The OpenAI API base to use.
-        openai_proxy: The OpenAI proxy to use.
-        max_retries: The maximum number of retries to make when generating.
-        prefix_messages: The prefix messages to use.
-        streaming: Whether to stream the results or not.
-        allowed_special: Set of special tokens that are allowed。
-        disallowed_special: Set of special tokens that are not allowed。
-
-
-
-    Example:
-        .. code-block:: python
-
-            from langchain.llms import OpenAIChat
-
-            openaichat = OpenAIChat(model_name="gpt-3.5-turbo")
-    """
-
-    client: Any  #: :meta private:
-    model_name: str = "gpt-4-1106-preview"
-    model_kwargs: dict[str, Any] = Field(default_factory=dict)
-    openai_api_key: str | None = None
-    openai_api_base: str | None = None
-    openai_proxy: str | None = None
-    max_retries: int = 6
-    """Maximum number of retries to make when generating."""
-    prefix_messages: list = Field(default_factory=list)
-    """Series of messages for Chat input."""
-    streaming: bool = False
-    """Whether to stream the results or not."""
-    allowed_special: Literal["all"] | AbstractSet[str] = set()
-    """Set of special tokens that are allowed。"""
-    disallowed_special: Literal["all"] | Collection[str] = "all"
-    """Set of special tokens that are not allowed。"""
-
-    @root_validator(pre=True)
-    def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]:
-        """Build extra kwargs from additional params that were passed in."""
-        all_required_field_names = {
-            field.alias for field in cls.__fields__.values()
-        }
-
-        extra = values.get("model_kwargs", {})
-        for field_name in list(values):
-            if field_name not in all_required_field_names:
-                if field_name in extra:
-                    raise ValueError(
-                        f"Found {field_name} supplied twice."
-                    )
-                extra[field_name] = values.pop(field_name)
-        values["model_kwargs"] = extra
-        return values
-
-    @root_validator()
-    def validate_environment(cls, values: dict) -> dict:
-        """Validate that api key and python package exists in environment."""
-        openai_api_key = get_from_dict_or_env(
-            values, "openai_api_key", "OPENAI_API_KEY"
-        )
-        openai_api_base = get_from_dict_or_env(
-            values,
-            "openai_api_base",
-            "OPENAI_API_BASE",
-            default="",
-        )
-        openai_proxy = get_from_dict_or_env(
-            values,
-            "openai_proxy",
-            "OPENAI_PROXY",
-            default="",
-        )
-        openai_organization = get_from_dict_or_env(
-            values,
-            "openai_organization",
-            "OPENAI_ORGANIZATION",
-            default="",
-        )
-        try:
-            import openai
-
-            openai.api_key = openai_api_key
-            if openai_api_base:
-                openai.api_base = openai_api_base
-            if openai_organization:
-                openai.organization = openai_organization
-            if openai_proxy:
-                openai.proxy = {"http": openai_proxy, "https": openai_proxy}  # type: ignore[assignment]  # noqa: E501
-        except ImportError:
-            raise ImportError(
-                "Could not import openai python package. "
-                "Please install it with `pip install openai`."
-            )
-        try:
-            values["client"] = openai.ChatCompletion
-        except AttributeError:
-            raise ValueError(
-                "`openai` has no `ChatCompletion` attribute, this is"
-                " likely due to an old version of the openai package."
-                " Try upgrading it with `pip install --upgrade"
-                " openai`."
-            )
-        return values
-
-    @property
-    def _default_params(self) -> dict[str, Any]:
-        """Get the default parameters for calling OpenAI API."""
-        return self.model_kwargs
-
-    def _get_chat_params(
-        self, prompts: list[str], stop: list[str] | None = None
-    ) -> tuple:
-        if len(prompts) > 1:
-            raise ValueError(
-                "OpenAIChat currently only supports single prompt,"
-                f" got {prompts}"
-            )
-        messages = self.prefix_messages + [
-            {"role": "user", "content": prompts[0]}
-        ]
-        params: dict[str, Any] = {
-            **{"model": self.model_name},
-            **self._default_params,
-        }
-        if stop is not None:
-            if "stop" in params:
-                raise ValueError(
-                    "`stop` found in both the input and default"
-                    " params."
-                )
-            params["stop"] = stop
-        if params.get("max_tokens") == -1:
-            # for ChatGPT api, omitting max_tokens is equivalent to having no limit
-            del params["max_tokens"]
-        return messages, params
-
-    def _stream(
-        self,
-        prompt: str,
-        stop: list[str] | None = None,
-        run_manager: CallbackManagerForLLMRun | None = None,
-        **kwargs: Any,
-    ) -> Iterator[GenerationChunk]:
-        messages, params = self._get_chat_params([prompt], stop)
-        params = {**params, **kwargs, "stream": True}
-        for stream_resp in completion_with_retry(
-            self, messages=messages, run_manager=run_manager, **params
-        ):
-            token = stream_resp["choices"][0]["delta"].get(
-                "content", ""
-            )
-            chunk = GenerationChunk(text=token)
-            yield chunk
-            if run_manager:
-                run_manager.on_llm_new_token(token, chunk=chunk)
-
-    async def _astream(
-        self,
-        prompt: str,
-        stop: list[str] | None = None,
-        run_manager: AsyncCallbackManagerForLLMRun | None = None,
-        **kwargs: Any,
-    ) -> AsyncIterator[GenerationChunk]:
-        messages, params = self._get_chat_params([prompt], stop)
-        params = {**params, **kwargs, "stream": True}
-        async for stream_resp in await acompletion_with_retry(
-            self, messages=messages, run_manager=run_manager, **params
-        ):
-            token = stream_resp["choices"][0]["delta"].get(
-                "content", ""
-            )
-            chunk = GenerationChunk(text=token)
-            yield chunk
-            if run_manager:
-                await run_manager.on_llm_new_token(token, chunk=chunk)
-
-    def _generate(
-        self,
-        prompts: list[str],
-        stop: list[str] | None = None,
-        run_manager: CallbackManagerForLLMRun | None = None,
-        **kwargs: Any,
-    ) -> LLMResult:
-        if self.streaming:
-            generation: GenerationChunk | None = None
-            for chunk in self._stream(
-                prompts[0], stop, run_manager, **kwargs
-            ):
-                if generation is None:
-                    generation = chunk
-                else:
-                    generation += chunk
-            assert generation is not None
-            return LLMResult(generations=[[generation]])
-
-        messages, params = self._get_chat_params(prompts, stop)
-        params = {**params, **kwargs}
-        full_response = completion_with_retry(
-            self, messages=messages, run_manager=run_manager, **params
-        )
-        llm_output = {
-            "token_usage": full_response["usage"],
-            "model_name": self.model_name,
-        }
-        return LLMResult(
-            generations=[
-                [
-                    Generation(
-                        text=full_response["choices"][0]["message"][
-                            "content"
-                        ]
-                    )
-                ]
-            ],
-            llm_output=llm_output,
-        )
-
-    async def _agenerate(
-        self,
-        prompts: list[str],
-        stop: list[str] | None = None,
-        run_manager: AsyncCallbackManagerForLLMRun | None = None,
-        **kwargs: Any,
-    ) -> LLMResult:
-        if self.streaming:
-            generation: GenerationChunk | None = None
-            async for chunk in self._astream(
-                prompts[0], stop, run_manager, **kwargs
-            ):
-                if generation is None:
-                    generation = chunk
-                else:
-                    generation += chunk
-            assert generation is not None
-            return LLMResult(generations=[[generation]])
-
-        messages, params = self._get_chat_params(prompts, stop)
-        params = {**params, **kwargs}
-        full_response = await acompletion_with_retry(
-            self, messages=messages, run_manager=run_manager, **params
-        )
-        llm_output = {
-            "token_usage": full_response["usage"],
-            "model_name": self.model_name,
-        }
-        return LLMResult(
-            generations=[
-                [
-                    Generation(
-                        text=full_response["choices"][0]["message"][
-                            "content"
-                        ]
-                    )
-                ]
-            ],
-            llm_output=llm_output,
-        )
-
-    @property
-    def _identifying_params(self) -> Mapping[str, Any]:
-        """Get the identifying parameters."""
-        return {
-            **{"model_name": self.model_name},
-            **self._default_params,
-        }
-
-    @property
-    def _llm_type(self) -> str:
-        """Return type of llm."""
-        return "openai-chat"
-
-    def get_token_ids(self, text: str) -> list[int]:
-        """Get the token IDs using the tiktoken package."""
-        # tiktoken NOT supported for Python < 3.8
-        if sys.version_info[1] < 8:
-            return super().get_token_ids(text)
-        try:
-            import tiktoken
-        except ImportError:
-            raise ImportError(
-                "Could not import tiktoken python package. This is"
-                " needed in order to calculate get_num_tokens. Please"
-                " install it with `pip install tiktoken`."
-            )
-
-        enc = tiktoken.encoding_for_model(self.model_name)
-        return enc.encode(
-            text,
-            allowed_special=self.allowed_special,
-            disallowed_special=self.disallowed_special,
-        )
diff --git a/swarms/models/phi.py b/swarms/models/phi.py
deleted file mode 100644
index 90fca08e..00000000
--- a/swarms/models/phi.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Phi by Microsoft written by Kye"""
diff --git a/swarms/models/test_fire_function.py b/swarms/models/test_fire_function.py
deleted file mode 100644
index 082d954d..00000000
--- a/swarms/models/test_fire_function.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from unittest.mock import MagicMock
-
-from swarms.models.fire_function import FireFunctionCaller
-
-
-def test_fire_function_caller_run(mocker):
-    # Create mock model and tokenizer
-    model = MagicMock()
-    tokenizer = MagicMock()
-    mocker.patch.object(FireFunctionCaller, "model", model)
-    mocker.patch.object(FireFunctionCaller, "tokenizer", tokenizer)
-
-    # Create mock task and arguments
-    task = "Add 2 and 3"
-    args = (2, 3)
-    kwargs = {}
-
-    # Create mock generated_ids and decoded output
-    generated_ids = [1, 2, 3]
-    decoded_output = "5"
-    model.generate.return_value = generated_ids
-    tokenizer.batch_decode.return_value = [decoded_output]
-
-    # Create FireFunctionCaller instance
-    fire_function_caller = FireFunctionCaller()
-
-    # Run the function
-    fire_function_caller.run(task, *args, **kwargs)
-
-    # Assert model.generate was called with the correct inputs
-    model.generate.assert_called_once_with(
-        tokenizer.apply_chat_template.return_value,
-        max_new_tokens=fire_function_caller.max_tokens,
-        *args,
-        **kwargs,
-    )
-
-    # Assert tokenizer.batch_decode was called with the correct inputs
-    tokenizer.batch_decode.assert_called_once_with(generated_ids)
-
-    # Assert the decoded output is printed
-    assert decoded_output in mocker.patch.object(
-        print, "call_args_list"
-    )
diff --git a/swarms/models/vllm.py b/swarms/models/vllm.py
deleted file mode 100644
index cf9cda45..00000000
--- a/swarms/models/vllm.py
+++ /dev/null
@@ -1,97 +0,0 @@
-import torch
-
-from swarms.models.base_llm import AbstractLLM
-
-if torch.cuda.is_available() or torch.cuda.device_count() > 0:
-    # Download vllm with pip
-    try:
-        from vllm import LLM, SamplingParams
-    except ImportError as error:
-        print(f"[ERROR] [vLLM] {error}")
-        raise error
-else:
-    from swarms.models.huggingface import HuggingfaceLLM as LLM
-
-    SamplingParams = None
-
-
-class vLLM(AbstractLLM):
-    """vLLM model
-
-
-    Args:
-        model_name (str, optional): _description_. Defaults to "facebook/opt-13b".
-        tensor_parallel_size (int, optional): _description_. Defaults to 4.
-        trust_remote_code (bool, optional): _description_. Defaults to False.
-        revision (str, optional): _description_. Defaults to None.
-        temperature (float, optional): _description_. Defaults to 0.5.
-        top_p (float, optional): _description_. Defaults to 0.95.
-        *args: _description_.
-        **kwargs: _description_.
-
-    Methods:
-        run: run the vLLM model
-
-    Raises:
-        error: _description_
-
-    Examples:
-    >>> from swarms.models.vllm import vLLM
-    >>> vllm = vLLM()
-    >>> vllm.run("Hello world!")
-
-
-    """
-
-    def __init__(
-        self,
-        model_name: str = "facebook/opt-13b",
-        tensor_parallel_size: int = 4,
-        trust_remote_code: bool = False,
-        revision: str = None,
-        temperature: float = 0.5,
-        top_p: float = 0.95,
-        *args,
-        **kwargs,
-    ):
-        super().__init__(*args, **kwargs)
-        self.model_name = model_name
-        self.tensor_parallel_size = tensor_parallel_size
-        self.trust_remote_code = trust_remote_code
-        self.revision = revision
-        self.top_p = top_p
-
-        # LLM model
-        self.llm = LLM(
-            model_name=self.model_name,
-            tensor_parallel_size=self.tensor_parallel_size,
-            trust_remote_code=self.trust_remote_code,
-            revision=self.revision,
-            *args,
-            **kwargs,
-        )
-
-        # Sampling parameters
-        self.sampling_params = SamplingParams(
-            temperature=temperature, top_p=top_p, *args, **kwargs
-        )
-
-    def run(self, task: str = None, *args, **kwargs):
-        """Run the vLLM model
-
-        Args:
-            task (str, optional): _description_. Defaults to None.
-
-        Raises:
-            error: _description_
-
-        Returns:
-            _type_: _description_
-        """
-        try:
-            return self.llm.generate(
-                task, self.sampling_params, *args, **kwargs
-            )
-        except Exception as error:
-            print(f"[ERROR] [vLLM] [run] {error}")
-            raise error
diff --git a/swarms/tokenizers/__init__.py b/swarms/tokenizers/__init__.py
index d62146ca..5d82440b 100644
--- a/swarms/tokenizers/__init__.py
+++ b/swarms/tokenizers/__init__.py
@@ -3,7 +3,6 @@ from swarms.tokenizers.anthropic_tokenizer import (
     import_optional_dependency,
 )
 from swarms.tokenizers.base_tokenizer import BaseTokenizer
-from swarms.tokenizers.cohere_tokenizer import CohereTokenizer
 from swarms.tokenizers.openai_tokenizers import OpenAITokenizer
 from swarms.tokenizers.r_tokenizers import (
     HuggingFaceTokenizer,
@@ -19,5 +18,4 @@ __all__ = [
     "OpenAITokenizer",
     "import_optional_dependency",
     "AnthropicTokenizer",
-    "CohereTokenizer",
 ]
diff --git a/swarms/tokenizers/cohere_tokenizer.py b/swarms/tokenizers/cohere_tokenizer.py
deleted file mode 100644
index e6164f5b..00000000
--- a/swarms/tokenizers/cohere_tokenizer.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-
-from cohere import Client
-
-
-@dataclass
-class CohereTokenizer:
-    """
-    A tokenizer class for Cohere models.
-    """
-
-    model: str
-    client: Client
-    DEFAULT_MODEL: str = "command"
-    DEFAULT_MAX_TOKENS: int = 2048
-    max_tokens: int = DEFAULT_MAX_TOKENS
-
-    def count_tokens(self, text: str | list) -> int:
-        """
-        Count the number of tokens in the given text.
-
-        Args:
-            text (str | list): The input text to tokenize.
-
-        Returns:
-            int: The number of tokens in the text.
-
-        Raises:
-            ValueError: If the input text is not a string.
-        """
-        if isinstance(text, str):
-            return len(self.client.tokenize(text=text).tokens)
-        else:
-            raise ValueError("Text must be a string.")
diff --git a/swarms/workers/__init__.py b/swarms/workers/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/swarms/workers/base.py b/swarms/workers/base.py
deleted file mode 100644
index 358810bd..00000000
--- a/swarms/workers/base.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from typing import Dict, List, Optional, Union
-
-
-class AbstractWorker:
-    """(In preview) An abstract class for AI worker.
-
-    An worker can communicate with other workers and perform actions.
-    Different workers can differ in what actions they perform in the `receive` method.
-    """
-
-    def __init__(
-        self,
-        name: str,
-    ):
-        """
-        Args:
-            name (str): name of the worker.
-        """
-        # a dictionary of conversations, default value is list
-        self._name = name
-
-    @property
-    def name(self):
-        """Get the name of the worker."""
-        return self._name
-
-    def run(self, task: str):
-        """Run the worker agent once"""
-
-    def send(
-        self,
-        message: Union[Dict, str],
-        recipient,  # add AbstractWorker
-        request_reply: Optional[bool] = None,
-    ):
-        """(Abstract method) Send a message to another worker."""
-
-    async def a_send(
-        self,
-        message: Union[Dict, str],
-        recipient,  # add AbstractWorker
-        request_reply: Optional[bool] = None,
-    ):
-        """(Aabstract async method) Send a message to another worker."""
-
-    def receive(
-        self,
-        message: Union[Dict, str],
-        sender,  # add AbstractWorker
-        request_reply: Optional[bool] = None,
-    ):
-        """(Abstract method) Receive a message from another worker."""
-
-    async def a_receive(
-        self,
-        message: Union[Dict, str],
-        sender,  # add AbstractWorker
-        request_reply: Optional[bool] = None,
-    ):
-        """(Abstract async method) Receive a message from another worker."""
-
-    def reset(self):
-        """(Abstract method) Reset the worker."""
-
-    def generate_reply(
-        self,
-        messages: Optional[List[Dict]] = None,
-        sender=None,  # Optional["AbstractWorker"] = None,
-        **kwargs,
-    ) -> Union[str, Dict, None]:
-        """(Abstract method) Generate a reply based on the received messages.
-
-        Args:
-            messages (list[dict]): a list of messages received.
-            sender: sender of an Agent instance.
-        Returns:
-            str or dict or None: the generated reply. If None, no reply is generated.
-        """
-
-    async def a_generate_reply(
-        self,
-        messages: Optional[List[Dict]] = None,
-        sender=None,  # Optional["AbstractWorker"] = None,
-        **kwargs,
-    ) -> Union[str, Dict, None]:
-        """(Abstract async method) Generate a reply based on the received messages.
-
-        Args:
-            messages (list[dict]): a list of messages received.
-            sender: sender of an Agent instance.
-        Returns:
-            str or dict or None: the generated reply. If None, no reply is generated.
-        """