diff --git a/pyproject.toml b/pyproject.toml
index 8112def8..07bd584e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,46 +29,26 @@ torch = ">=2.1.1,<3.0"
 transformers = "4.39.0"
 asyncio = ">=3.4.3,<4.0"
 einops = "0.7.0"
-google-generativeai = "0.3.1"
-langchain = "0.1.13"
-langchain-core = "0.1.34"
+langchain-core = "0.1.33"
 langchain-community = "0.0.29"
 langchain-experimental = "0.0.55"
-faiss-cpu = "1.7.4"
 backoff = "2.2.1"
-datasets = "*"
-optimum = "1.15.0"
-supervision = "0.19.0"
-opencv-python = "4.9.0.80"
-diffusers = "*"
-anthropic = "0.21.3"
 toml = "*"
 pypdf = "4.1.0"
-accelerate = "*"
-sentencepiece = "0.1.98"
 httpx = "0.24.1"
-tiktoken = "0.5.2"
 ratelimit = "2.2.1"
 loguru = "0.7.2"
-huggingface-hub = "*"
 pydantic = "2.6.4"
 tenacity = "8.2.3"
 Pillow = "10.2.0"
-chromadb = "0.4.24"
 termcolor = "2.2.0"
-torchvision = "0.16.1"
 rich = "13.5.2"
-bitsandbytes = "*"
-sentence-transformers = "*"
-peft = "*"
 psutil = "*"
-timm = "*"
 sentry-sdk = "*"
 
 [tool.poetry.dev-dependencies]
 black = "23.3.0"
 
-
 [tool.poetry.group.lint.dependencies]
 ruff = ">=0.0.249,<0.3.5"
 types-toml = "^0.10.8.1"
diff --git a/requirements.txt b/requirements.txt
index 68ad9d8a..072e5c9d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,40 +1,23 @@
-torch==2.1.1
-transformers
-pandas
-langchain==0.1.13
+mkdocs
+mkdocs-material
+mkdocs-glightbox
+torch>=2.1.1,<3.0
+transformers==4.39.0
+asyncio>=3.4.3,<4.0
+einops==0.7.0
 langchain-core==0.1.33
 langchain-community==0.0.29
-langsmith==0.1.17
-langchain-openai==0.0.5
-httpx==0.24.1
-Pillow==9.4.0
-datasets==2.14.5
-pydantic==2.6.4
-huggingface-hub
-requests_mock
-pypdf==4.0.1
-accelerate==0.22.0
-loguru==0.7.2
-optimum
-diffusers
-toml
-tiktoken==0.5.2
-colored
-addict
+langchain-experimental==0.0.55
 backoff==2.2.1
+toml
+pypdf==4.1.0
+httpx==0.24.1
 ratelimit==2.2.1
+loguru==0.7.2
+pydantic==2.6.4
+tenacity==8.2.3
+Pillow==10.2.0
 termcolor==2.2.0
-opencv-python==4.9.0.80
-timm
-torchvision==0.16.1
 rich==13.5.2
-mkdocs
-mkdocs-material
-anthropic==0.2.5
-mkdocs-glightbox
-pre-commit==3.6.2
 psutil
-black
-tenacity
-supervision
 sentry-sdk
\ No newline at end of file
diff --git a/swarms/__init__.py b/swarms/__init__.py
index b9eb1426..db28200e 100644
--- a/swarms/__init__.py
+++ b/swarms/__init__.py
@@ -17,6 +17,6 @@ from swarms.models import *  # noqa: E402, F403
 from swarms.prompts import *  # noqa: E402, F403
 from swarms.structs import *  # noqa: E402, F403
 from swarms.telemetry import *  # noqa: E402, F403
-from swarms.tokenizers import *  # noqa: E402, F403
 from swarms.tools import *  # noqa: E402, F403
 from swarms.utils import *  # noqa: E402, F403
+from swarms.schedulers import *  # noqa: E402, F403
diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py
index 92b0e929..8400073f 100644
--- a/swarms/models/__init__.py
+++ b/swarms/models/__init__.py
@@ -44,7 +44,6 @@ from swarms.models.qwen import QwenVLMultiModal  # noqa: E402
 # from swarms.models.roboflow_model import RoboflowMultiModal
 from swarms.models.sam_supervision import SegmentAnythingMarkGenerator
 from swarms.models.sampling_params import SamplingParams, SamplingType
-from swarms.models.timm import TimmModel  # noqa: E402
 from swarms.models.together import TogetherLLM  # noqa: E402
 from swarms.models.types import (  # noqa: E402
     AudioModality,
diff --git a/swarms/schedulers/__init__.py b/swarms/schedulers/__init__.py
new file mode 100644
index 00000000..803b2278
--- /dev/null
+++ b/swarms/schedulers/__init__.py
@@ -0,0 +1,6 @@
+from swarms.schedulers.agent_process import (
+    AgentProcess,
+    AgentProcessQueue,
+)
+
+__all__ = ["AgentProcess", "AgentProcessQueue"]
diff --git a/swarms/schedulers/agent_process.py b/swarms/schedulers/agent_process.py
new file mode 100644
index 00000000..cd9ca6e3
--- /dev/null
+++ b/swarms/schedulers/agent_process.py
@@ -0,0 +1,103 @@
+from datetime import datetime
+
+from pydantic import BaseModel
+
+from swarms.structs.omni_agent_types import agents
+from swarms.utils.loguru_logger import logger
+
+
+class AgentProcess(BaseModel):
+    agent_id: int
+    agent_name: str
+    prompt: str
+    response: str = None
+    time: callable = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    priority: int = 0
+    status: str = "Waiting"
+    pid: int = None
+
+    def set_pid(self, pid: int):
+        self.pid = pid
+
+    def get_pid(self):
+        return self.pid
+
+    def set_time(self, time: callable):
+        self.time = time
+
+    def get_time(self):
+        return self.time
+
+
+class AgentProcessQueue:
+    """
+    A class representing a queue of agent processes.
+
+    Attributes:
+        MAX_PID (int): The maximum process ID.
+        pid_pool (list): A list representing the availability of process IDs.
+        agent_process_queue (list): A list representing the queue of agent processes.
+
+    Methods:
+        add(agent_process): Adds an agent process to the queue.
+        print(): Prints the details of all agent processes in the queue.
+
+    Private Methods:
+        _get_available_pid(): Returns an available process ID from the pool.
+    """
+
+    def __init__(self, max_pid: int = 1024):
+        self.MAX_PID = max_pid
+        self.pid_pool = [False for i in range(self.MAX_PID)]
+        self.agent_process_queue = (
+            []
+        )  # Currently use list to simulate queue
+
+    def add(self, agents: agents):
+        """
+        Adds an agent process to the queue.
+
+        Args:
+            agent_process (AgentProcess): The agent process to be added.
+
+        Returns:
+            None
+        """
+        for agent in agents:
+            agent_process = AgentProcess(
+                agent_id=agent.id,
+                agent_name=agent.agent_name,
+                prompt=agent.short_memory.return_history_as_string(),
+            )
+            pid = self._get_available_pid()
+            if pid is None:
+                logger.warning("No available PID")
+                return
+            agent_process.set_pid(pid)
+            agent_process.set_status("Waiting")
+            self.agent_process_queue.append(agent_process)
+
+    def print(self):
+        """
+        Prints the details of all agent processes in the queue.
+
+        Returns:
+            None
+        """
+        for agent_process in self.agent_process_queue:
+            logger.info(
+                f"| Agent-process ID: {agent_process.get_pid()} |"
+                f" Status: {agent_process.get_status()} |"
+            )
+
+    def _get_available_pid(self):
+        """
+        Returns an available process ID from the pool.
+
+        Returns:
+            int or None: The available process ID, or None if no ID is available.
+        """
+        for i, used in enumerate(self.pid_pool):
+            if not used:
+                return i
+        return None
diff --git a/swarms/tokenizers/__init__.py b/swarms/tokenizers/__init__.py
deleted file mode 100644
index 895c14bc..00000000
--- a/swarms/tokenizers/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# from swarms.tokenizers.anthropic_tokenizer import (
-#     AnthropicTokenizer,
-#     import_optional_dependency,
-# )
-from swarms.tokenizers.base_tokenizer import BaseTokenizer
-from swarms.tokenizers.openai_tokenizers import OpenAITokenizer
-from swarms.tokenizers.r_tokenizers import (
-    HuggingFaceTokenizer,
-    SentencePieceTokenizer,
-    Tokenizer,
-)
-
-
-__all__ = [
-    "SentencePieceTokenizer",
-    "HuggingFaceTokenizer",
-    "Tokenizer",
-    "BaseTokenizer",
-    "OpenAITokenizer",
-    # "import_optional_dependency",
-    # "AnthropicTokenizer",
-]
diff --git a/swarms/tokenizers/anthropic_tokenizer.py b/swarms/tokenizers/anthropic_tokenizer.py
deleted file mode 100644
index 77cd07c3..00000000
--- a/swarms/tokenizers/anthropic_tokenizer.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from importlib import import_module
-from types import ModuleType
-
-from anthropic import Anthropic
-
-from swarms.tokenizers.base_tokenizer import BaseTokenizer
-
-INSTALL_MAPPING = {
-    "huggingface_hub": "huggingface-hub",
-    "pinecone": "pinecone-client",
-    "opensearchpy": "opensearch-py",
-}
-
-
-def import_optional_dependency(name: str) -> ModuleType | None:
-    """Import an optional dependency.
-
-    If a dependency is missing, an ImportError with a nice message will be raised.
-
-    Args:
-        name: The module name.
-    Returns:
-        The imported module, when found.
-        None is returned when the package is not found and `errors` is False.
-    """
-
-    package_name = INSTALL_MAPPING.get(name)
-    install_name = package_name if package_name is not None else name
-
-    msg = (
-        f"Missing optional dependency: '{install_name}'. "
-        f"Use poetry or pip to install '{install_name}'."
-    )
-    try:
-        module = import_module(name)
-    except ImportError:
-        raise ImportError(msg)
-
-    return module
-
-
-@dataclass
-class AnthropicTokenizer(BaseTokenizer):
-    """
-    Tokenizer class for Anthropic models.]
-    """
-
-    max_tokens: int = 500
-    client: Anthropic = None
-    model: str = "claude-2.1"
-
-    def __post_init__(self):
-        self.DEFAULT_MODEL: str = "claude-2.1"
-        self.MODEL_PREFIXES_TO_MAX_TOKENS: dict[str, int] = {
-            "claude-2.1": 200000,
-            "claude": 100000,
-        }
-        self.model = self.model  # or self.DEFAULT_MODEL
-        self.max_tokens = self.max_tokens or self.default_max_tokens()
-        self.client = (
-            self.client
-            or import_optional_dependency("anthropic").Anthropic()
-        )
-
-    def default_max_tokens(self) -> int:
-        """
-        Returns the default maximum number of tokens based on the model prefix.
-        """
-        tokens = next(
-            v
-            for k, v in self.MODEL_PREFIXES_TO_MAX_TOKENS.items()
-            if self.model.startswith(k)
-        )
-        return tokens
-
-    def count_tokens(self, text: str | list) -> int:
-        """
-        Counts the number of tokens in the given text.
-
-        Args:
-            text: The input text.
-
-        Returns:
-            The number of tokens in the text.
-
-        Raises:
-            ValueError: If the input text is not a string.
-        """
-        if isinstance(text, str):
-            return self.client.count_tokens(text)
-        else:
-            raise ValueError("Text must be a string.")
diff --git a/swarms/tokenizers/base_tokenizer.py b/swarms/tokenizers/base_tokenizer.py
deleted file mode 100644
index fd1bc339..00000000
--- a/swarms/tokenizers/base_tokenizer.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field
-
-
-@dataclass
-class BaseTokenizer(ABC):
-    """
-    Base class for tokenizers.
-
-    Attributes:
-        stop_sequences (List[str]): List of stop sequences.
-        max_tokens (int): Maximum number of tokens.
-        stop_token (str): Stop token.
-    """
-
-    max_tokens: int
-    stop_token: str = "<|Response|>"
-
-    def __post_init__(self):
-        self.stop_sequences: list[str] = field(
-            default_factory=lambda: ["<|Response|>"],
-            init=False,
-        )
-
-    def count_tokens_left(self, text: str | list[dict]) -> int:
-        """
-        Counts the number of tokens left based on the given text.
-
-        Args:
-            text (Union[str, List[dict]]): The text to count tokens from.
-
-        Returns:
-            int: The number of tokens left.
-        """
-        diff = self.max_tokens - self.count_tokens(text)
-
-        if diff > 0:
-            return diff
-        else:
-            return 0
-
-    @abstractmethod
-    def count_tokens(self, text: str | list[dict]) -> int:
-        """
-        Counts the number of tokens in the given text.
-
-        Args:
-            text (Union[str, List[dict]]): The text to count tokens from.
-
-        Returns:
-            int: The number of tokens.
-        """
-        ...
diff --git a/swarms/tokenizers/openai_tokenizers.py b/swarms/tokenizers/openai_tokenizers.py
deleted file mode 100644
index 9b02943b..00000000
--- a/swarms/tokenizers/openai_tokenizers.py
+++ /dev/null
@@ -1,181 +0,0 @@
-from __future__ import annotations
-
-import logging
-from dataclasses import dataclass, field
-
-import tiktoken
-from tiktoken import Encoding
-
-from swarms.tokenizers.base_tokenizer import BaseTokenizer
-
-
-@dataclass
-class OpenAITokenizer(BaseTokenizer):
-    """
-    A class representing an OpenAI tokenizer.
-
-    Attributes:
-    - DEFAULT_OPENAI_GPT_3_COMPLETION_MODEL (str): The default OpenAI GPT-3 completion model.
-    - DEFAULT_OPENAI_GPT_3_CHAT_MODEL (str): The default OpenAI GPT-3 chat model.
-    - DEFAULT_OPENAI_GPT_4_MODEL (str): The default OpenAI GPT-4 model.
-    - DEFAULT_ENCODING (str): The default encoding.
-    - DEFAULT_MAX_TOKENS (int): The default maximum number of tokens.
-    - TOKEN_OFFSET (int): The token offset.
-    - MODEL_PREFIXES_TO_MAX_TOKENS (dict): A dictionary mapping model prefixes to maximum tokens.
-    - EMBEDDING_MODELS (list): A list of embedding models.
-    - model (str): The model name.
-
-    Methods:
-    - __post_init__(): Initializes the OpenAITokenizer object.
-    - encoding(): Returns the encoding for the model.
-    - default_max_tokens(): Returns the default maximum number of tokens.
-    - count_tokens(text, model): Counts the number of tokens in the given text.
-    - len(text, model): Returns the length of the text in tokens.
-    """
-
-    model: str = "gpt-2"
-
-    def __post_init__(self):
-        """
-        Initializes the OpenAITokenizer object.
-        Sets the default maximum number of tokens.
-        """
-        self.max_tokens: int = field(
-            default_factory=self.default_max_tokens
-        )
-
-        self.DEFAULT_OPENAI_GPT_3_COMPLETION_MODEL = (
-            "text-davinci-003"
-        )
-        self.DEFAULT_OPENAI_GPT_3_CHAT_MODEL = "gpt-3.5-turbo"
-        self.DEFAULT_OPENAI_GPT_4_MODEL = "gpt-4"
-        self.DEFAULT_ENCODING = "cl100k_base"
-        self.EFAULT_MAX_TOKENS = 2049
-        self.TOKEN_OFFSET = 8
-
-        self.MODEL_PREFIXES_TO_MAX_TOKENS = {
-            "gpt-4-1106": 128000,
-            "gpt-4-32k": 32768,
-            "gpt-4": 8192,
-            "gpt-3.5-turbo-16k": 16384,
-            "gpt-3.5-turbo": 4096,
-            "gpt-35-turbo-16k": 16384,
-            "gpt-35-turbo": 4096,
-            "text-davinci-003": 4097,
-            "text-davinci-002": 4097,
-            "code-davinci-002": 8001,
-            "text-embedding-ada-002": 8191,
-            "text-embedding-ada-001": 2046,
-        }
-
-        self.EMBEDDING_MODELS = [
-            "text-embedding-ada-002",
-            "text-embedding-ada-001",
-        ]
-
-    @property
-    def encoding(self) -> Encoding:
-        """
-        Returns the encoding for the model.
-        If the model is not found, returns the default encoding.
-        """
-        try:
-            return tiktoken.encoding_for_model(self.model)
-        except KeyError:
-            return tiktoken.get_encoding(self.DEFAULT_ENCODING)
-
-    def default_max_tokens(self) -> int:
-        """
-        Returns the default maximum number of tokens based on the model.
-        """
-        tokens = next(
-            v
-            for k, v in self.MODEL_PREFIXES_TO_MAX_TOKENS.items()
-            if self.model.startswith(k)
-        )
-        offset = (
-            0
-            if self.model in self.EMBEDDING_MODELS
-            else self.TOKEN_OFFSET
-        )
-
-        return (
-            tokens if tokens else self.DEFAULT_MAX_TOKENS
-        ) - offset
-
-    def count_tokens(
-        self, text: str | list[dict], model: str | None = None
-    ) -> int:
-        """
-        Counts the number of tokens in the given text.
-        If the text is a list of messages, counts the tokens for each message.
-        If a model is provided, uses that model for encoding.
-        """
-        if isinstance(text, list):
-            model = model if model else self.model
-
-            try:
-                encoding = tiktoken.encoding_for_model(model)
-            except KeyError:
-                logging.warning(
-                    "model not found. Using cl100k_base encoding."
-                )
-                encoding = tiktoken.get_encoding("cl100k_base")
-
-            if model in {
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-16k-0613",
-                "gpt-4-0314",
-                "gpt-4-32k-0314",
-                "gpt-4-0613",
-                "gpt-4-32k-0613",
-            }:
-                tokens_per_message = 3
-                tokens_per_name = 1
-            elif model == "gpt-3.5-turbo-0301":
-                tokens_per_message = 4
-                tokens_per_name = -1
-            elif "gpt-3.5-turbo" in model or "gpt-35-turbo" in model:
-                logging.info(
-                    "gpt-3.5-turbo may update over time. Returning"
-                    " num tokens assuming gpt-3.5-turbo-0613."
-                )
-                return self.count_tokens(
-                    text, model="gpt-3.5-turbo-0613"
-                )
-            elif "gpt-4" in model:
-                logging.info(
-                    "gpt-4 may update over time. Returning num tokens"
-                    " assuming gpt-4-0613."
-                )
-                return self.count_tokens(text, model="gpt-4-0613")
-            else:
-                raise NotImplementedError(
-                    "token_count() is not implemented for model"
-                    f" {model}. See"
-                    " https://github.com/openai/openai-python/blob/main/chatml.md"
-                    " for information on how messages are converted"
-                    " to tokens."
-                )
-
-            num_tokens = 0
-
-            for message in text:
-                num_tokens += tokens_per_message
-                for key, value in message.items():
-                    num_tokens += len(encoding.encode(value))
-                    if key == "name":
-                        num_tokens += tokens_per_name
-
-            num_tokens += 3
-
-            return num_tokens
-        else:
-            return len(self.encoding.encode(text))
-
-    def len(self, text: str | list[dict], model: str | None):
-        """
-        Returns the length of the text in tokens.
-        If a model is provided, uses that model for encoding.
-        """
-        return self.count_tokens(text, model)
diff --git a/swarms/tokenizers/r_tokenizers.py b/swarms/tokenizers/r_tokenizers.py
deleted file mode 100644
index f807b6ff..00000000
--- a/swarms/tokenizers/r_tokenizers.py
+++ /dev/null
@@ -1,422 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import json
-import os
-import os.path as osp
-from collections import deque
-from typing import List, Optional, Sequence, Union
-
-import torch
-
-from swarms.utils.get_logger import get_logger
-
-
-class SentencePieceTokenizer:
-    """Tokenizer of sentencepiece.
-
-    Args:
-        model_file (str): the path of the tokenizer model
-    """
-
-    def __init__(self, model_file: str):
-        from sentencepiece import SentencePieceProcessor
-
-        self.model = SentencePieceProcessor(model_file=model_file)
-        self._prefix_space_tokens = None
-        # for stop words
-        self._maybe_decode_bytes: bool = None
-        # TODO maybe lack a constant.py
-        self._indexes_tokens_deque = deque(maxlen=10)
-        self.max_indexes_num = 5
-        self.logger = get_logger("lmdeploy")
-
-    @property
-    def vocab_size(self):
-        """vocabulary size."""
-        return self.model.vocab_size()
-
-    @property
-    def bos_token_id(self):
-        """begine of the sentence token id."""
-        return self.model.bos_id()
-
-    @property
-    def eos_token_id(self):
-        """end of the sentence token id."""
-        return self.model.eos_id()
-
-    @property
-    def prefix_space_tokens(self):
-        """tokens without prefix space."""
-        if self._prefix_space_tokens is None:
-            vocab = self.model.IdToPiece(list(range(self.vocab_size)))
-            self._prefix_space_tokens = {
-                i
-                for i, tok in enumerate(vocab)
-                if tok.startswith("▁")
-            }
-        return self._prefix_space_tokens
-
-    def _maybe_add_prefix_space(self, tokens, decoded):
-        """maybe add prefix space for incremental decoding."""
-        if (
-            tokens
-            and not decoded.startswith(" ")
-            and tokens[0] in self.prefix_space_tokens
-        ):
-            return " " + decoded
-        else:
-            return decoded
-
-    def indexes_containing_token(self, token: str):
-        """Return all the possible indexes, whose decoding output may contain
-        the input token."""
-        # traversing vocab is time consuming, can not be accelerated with
-        # multi threads (computation) or multi process (can't pickle tokenizer)
-        # so, we maintain latest 10 stop words and return directly if matched
-        for _token, _indexes in self._indexes_tokens_deque:
-            if token == _token:
-                return _indexes
-        if token == " ":  # ' ' is special
-            token = "▁"
-        vocab = self.model.IdToPiece(list(range(self.vocab_size)))
-        indexes = [i for i, voc in enumerate(vocab) if token in voc]
-        if len(indexes) > self.max_indexes_num:
-            indexes = self.encode(token, add_bos=False)[-1:]
-            self.logger.warning(
-                f"There are too many(>{self.max_indexes_num})"
-                f" possible indexes may decoding {token}, we will use"
-                f" {indexes} only"
-            )
-        self._indexes_tokens_deque.append((token, indexes))
-        return indexes
-
-    def encode(self, s: str, add_bos: bool = True, **kwargs):
-        """Tokenize a prompt.
-
-        Args:
-            s (str): a prompt
-        Returns:
-            list[int]: token ids
-        """
-        return self.model.Encode(s, add_bos=add_bos, **kwargs)
-
-    def decode(self, t: Sequence[int], offset: Optional[int] = None):
-        """De-tokenize.
-
-        Args:
-            t (List[int]): a list of token ids
-            offset (int): for incrementally decoding. Default to None, which
-                means not applied.
-        Returns:
-            str: text of decoding tokens
-        """
-        if isinstance(t, torch.Tensor):
-            t = t.tolist()
-        t = t[offset:]
-        out_string = self.model.Decode(t)
-        if offset:
-            out_string = self._maybe_add_prefix_space(t, out_string)
-        return out_string
-
-    def __call__(self, s: Union[str, Sequence[str]]):
-        """Tokenize prompts.
-
-        Args:
-            s (str): prompts
-        Returns:
-            list[int]: token ids
-        """
-        import addict
-
-        add_bos = False
-        add_eos = False
-
-        input_ids = self.model.Encode(
-            s, add_bos=add_bos, add_eos=add_eos
-        )
-        return addict.Addict(input_ids=input_ids)
-
-
-class HuggingFaceTokenizer:
-    """Tokenizer of sentencepiece.
-
-    Args:
-        model_dir (str): the directory of the tokenizer model
-    """
-
-    def __init__(self, model_dir: str):
-        from transformers import AutoTokenizer
-
-        model_file = osp.join(model_dir, "tokenizer.model")
-        backend_tokenizer_file = osp.join(model_dir, "tokenizer.json")
-        model_file_exists = osp.exists(model_file)
-        self.logger = get_logger("lmdeploy")
-        if (
-            not osp.exists(backend_tokenizer_file)
-            and model_file_exists
-        ):
-            self.logger.warning(
-                "Can not find tokenizer.json. "
-                "It may take long time to initialize the tokenizer."
-            )
-        self.model = AutoTokenizer.from_pretrained(
-            model_dir, trust_remote_code=True
-        )
-        self._prefix_space_tokens = None
-        # save tokenizer.json to reuse
-        if (
-            not osp.exists(backend_tokenizer_file)
-            and model_file_exists
-        ):
-            if hasattr(self.model, "backend_tokenizer"):
-                if os.access(model_dir, os.W_OK):
-                    self.model.backend_tokenizer.save(
-                        backend_tokenizer_file
-                    )
-
-        if self.model.eos_token_id is None:
-            generation_config_file = osp.join(
-                model_dir, "generation_config.json"
-            )
-            if osp.exists(generation_config_file):
-                with open(generation_config_file) as f:
-                    cfg = json.load(f)
-                    self.model.eos_token_id = cfg["eos_token_id"]
-            elif hasattr(self.model, "eod_id"):  # Qwen remote
-                self.model.eos_token_id = self.model.eod_id
-
-        # for stop words
-        self._maybe_decode_bytes: bool = None
-        # TODO maybe lack a constant.py
-        self._indexes_tokens_deque = deque(maxlen=10)
-        self.max_indexes_num = 5
-        self.token2id = {}
-
-    @property
-    def vocab_size(self):
-        """vocabulary size."""
-        return self.model.vocab_size
-
-    @property
-    def bos_token_id(self):
-        """begine of the sentence token id."""
-        return self.model.bos_token_id
-
-    @property
-    def eos_token_id(self):
-        """end of the sentence token id."""
-        return self.model.eos_token_id
-
-    @property
-    def prefix_space_tokens(self):
-        """tokens without prefix space."""
-        if self._prefix_space_tokens is None:
-            vocab = self.model.convert_ids_to_tokens(
-                list(range(self.vocab_size))
-            )
-            self._prefix_space_tokens = {
-                i
-                for i, tok in enumerate(vocab)
-                if tok.startswith(
-                    "▁" if isinstance(tok, str) else b" "
-                )
-            }
-        return self._prefix_space_tokens
-
-    def _maybe_add_prefix_space(
-        self, tokens: List[int], decoded: str
-    ):
-        """maybe add prefix space for incremental decoding."""
-        if (
-            tokens
-            and not decoded.startswith(" ")
-            and tokens[0] in self.prefix_space_tokens
-        ):
-            return " " + decoded
-        else:
-            return decoded
-
-    @property
-    def maybe_decode_bytes(self):
-        """Check if self.model.convert_ids_to_tokens return not a str value."""
-        if self._maybe_decode_bytes is None:
-            self._maybe_decode_bytes = False
-            vocab = self.model.convert_ids_to_tokens(
-                list(range(self.vocab_size))
-            )
-            for tok in vocab:
-                if not isinstance(tok, str):
-                    self._maybe_decode_bytes = True
-                    break
-        return self._maybe_decode_bytes
-
-    def indexes_containing_token(self, token: str):
-        """Return all the possible indexes, whose decoding output may contain
-        the input token."""
-        # traversing vocab is time consuming, can not be accelerated with
-        # multi threads (computation) or multi process (can't pickle tokenizer)
-        # so, we maintain latest 10 stop words and return directly if matched
-        for _token, _indexes in self._indexes_tokens_deque:
-            if token == _token:
-                return _indexes
-
-        if self.token2id == {}:
-            # decode is slower than convert_ids_to_tokens
-            if self.maybe_decode_bytes:
-                self.token2id = {
-                    self.model.decode(i): i
-                    for i in range(self.vocab_size)
-                }
-            else:
-                self.token2id = {
-                    self.model.convert_ids_to_tokens(i): i
-                    for i in range(self.vocab_size)
-                }
-        if token == " ":  # ' ' is special
-            token = "▁"
-        indexes = [
-            i
-            for _token, i in self.token2id.items()
-            if token in _token
-        ]
-        if len(indexes) > self.max_indexes_num:
-            indexes = self.encode(token, add_bos=False)[-1:]
-            self.logger.warning(
-                f"There are too many(>{self.max_indexes_num})"
-                f" possible indexes may decoding {token}, we will use"
-                f" {indexes} only"
-            )
-        self._indexes_tokens_deque.append((token, indexes))
-        return indexes
-
-    def encode(self, s: str, add_bos: bool = True, **kwargs):
-        """Tokenize a prompt.
-
-        Args:
-            s (str): a prompt
-        Returns:
-            list[int]: token ids
-        """
-        encoded = self.model.encode(s, **kwargs)
-        if not add_bos:
-            # in the middle of a session
-            if encoded and encoded[0] == self.bos_token_id:
-                encoded = encoded[1:]
-        return encoded
-
-    def decode(self, t: Sequence[int], offset: Optional[int] = None):
-        """De-tokenize.
-
-        Args:
-            t (List[int]): a list of token ids
-            offset (int): for incrementally decoding. Default to None, which
-                means not applied.
-        Returns:
-            str: text of decoding tokens
-        """
-        skip_special_tokens = True
-        t = t[offset:]
-        out_string = self.model.decode(
-            t, skip_special_tokens=skip_special_tokens
-        )
-        if offset:
-            out_string = self._maybe_add_prefix_space(t, out_string)
-        return out_string
-
-    def __call__(self, s: Union[str, Sequence[str]]):
-        """Tokenize prompts.
-
-        Args:
-            s (str): prompts
-        Returns:
-            list[int]: token ids
-        """
-        add_special_tokens = False
-        return self.model(s, add_special_tokens=add_special_tokens)
-
-
-class Tokenizer:
-    """Tokenize prompts or de-tokenize tokens into texts.
-
-    Args:
-        model_file (str): the path of the tokenizer model
-    """
-
-    def __init__(self, model_file: str):
-        if model_file.endswith(".model"):
-            model_folder = osp.split(model_file)[0]
-        else:
-            model_folder = model_file
-            model_file = osp.join(model_folder, "tokenizer.model")
-        tokenizer_config_file = osp.join(
-            model_folder, "tokenizer_config.json"
-        )
-
-        model_file_exists = osp.exists(model_file)
-        config_exists = osp.exists(tokenizer_config_file)
-        use_hf_model = config_exists or not model_file_exists
-        self.logger = get_logger("lmdeploy")
-        if not use_hf_model:
-            self.model = SentencePieceTokenizer(model_file)
-        else:
-            self.model = HuggingFaceTokenizer(model_folder)
-
-    @property
-    def vocab_size(self):
-        """vocabulary size."""
-        return self.model.vocab_size
-
-    @property
-    def bos_token_id(self):
-        """begine of the sentence token id."""
-        return self.model.bos_token_id
-
-    @property
-    def eos_token_id(self):
-        """end of the sentence token id."""
-        return self.model.eos_token_id
-
-    def encode(self, s: str, add_bos: bool = True, **kwargs):
-        """Tokenize a prompt.
-
-        Args:
-            s (str): a prompt
-        Returns:
-            list[int]: token ids
-        """
-        return self.model.encode(s, add_bos, **kwargs)
-
-    def decode(self, t: Sequence[int], offset: Optional[int] = None):
-        """De-tokenize.
-
-        Args:
-            t (List[int]): a list of token ids
-            offset (int): for incrementally decoding. Default to None, which
-                means not applied.
-        Returns:
-            str: text of decoding tokens
-        """
-        return self.model.decode(t, offset)
-
-    def __call__(self, s: Union[str, Sequence[str]]):
-        """Tokenize prompts.
-
-        Args:
-            s (str): prompts
-        Returns:
-            list[int]: token ids
-        """
-        return self.model(s)
-
-    def indexes_containing_token(self, token):
-        """Return all the possible indexes, whose decoding output may contain
-        the input token."""
-        encoded = self.encode(token, add_bos=False)
-        if len(encoded) > 1:
-            self.logger.warning(
-                f"The token {token}, its length of indexes"
-                f" {encoded} is over than 1. Currently, it can not be"
-                " used as stop words"
-            )
-            return []
-        return self.model.indexes_containing_token(token)