[FEAT][AgentProcess] [AgentProcessQueue]

2 years ago · 0a48ca05cc
parent fec511c630
commit 0a48ca05cc
11 changed files with 126 additions and 830 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -29,46 +29,26 @@ torch = ">=2.1.1,<3.0"
 transformers = "4.39.0"
 asyncio = ">=3.4.3,<4.0"
 einops = "0.7.0"
-google-generativeai = "0.3.1"
+langchain-core = "0.1.33"
 langchain = "0.1.13"
 langchain-core = "0.1.34"
 langchain-community = "0.0.29"
 langchain-experimental = "0.0.55"
 faiss-cpu = "1.7.4"
 backoff = "2.2.1"
 datasets = "*"
 optimum = "1.15.0"
 supervision = "0.19.0"
 opencv-python = "4.9.0.80"
 diffusers = "*"
 anthropic = "0.21.3"
 toml = "*"
 pypdf = "4.1.0"
 accelerate = "*"
 sentencepiece = "0.1.98"
 httpx = "0.24.1"
 tiktoken = "0.5.2"
 ratelimit = "2.2.1"
 loguru = "0.7.2"
 huggingface-hub = "*"
 pydantic = "2.6.4"
 tenacity = "8.2.3"
 Pillow = "10.2.0"
 chromadb = "0.4.24"
 termcolor = "2.2.0"
 torchvision = "0.16.1"
 rich = "13.5.2"
 bitsandbytes = "*"
 sentence-transformers = "*"
 peft = "*"
 psutil = "*"
 timm = "*"
 sentry-sdk = "*"
 [tool.poetry.dev-dependencies]
 black = "23.3.0"
 [tool.poetry.group.lint.dependencies]
 ruff = ">=0.0.249,<0.3.5"
 types-toml = "^0.10.8.1"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,40 +1,23 @@
-torch==2.1.1
+mkdocs
-transformers
+mkdocs-material
-pandas
+mkdocs-glightbox
-langchain==0.1.13
+torch>=2.1.1,<3.0
 transformers==4.39.0
 asyncio>=3.4.3,<4.0
 einops==0.7.0
 langchain-core==0.1.33
 langchain-community==0.0.29
-langsmith==0.1.17
+langchain-experimental==0.0.55
 langchain-openai==0.0.5
 httpx==0.24.1
 Pillow==9.4.0
 datasets==2.14.5
 pydantic==2.6.4
 huggingface-hub
 requests_mock
 pypdf==4.0.1
 accelerate==0.22.0
 loguru==0.7.2
 optimum
 diffusers
 toml
 tiktoken==0.5.2
 colored
 addict
 backoff==2.2.1
 toml
 pypdf==4.1.0
 httpx==0.24.1
 ratelimit==2.2.1
 loguru==0.7.2
 pydantic==2.6.4
 tenacity==8.2.3
 Pillow==10.2.0
 termcolor==2.2.0
 opencv-python==4.9.0.80
 timm
 torchvision==0.16.1
 rich==13.5.2
 mkdocs
 mkdocs-material
 anthropic==0.2.5
 mkdocs-glightbox
 pre-commit==3.6.2
 psutil
 black
 tenacity
 supervision
 sentry-sdk
--- a/swarms/init.py
+++ b/swarms/init.py
@ -17,6 +17,6 @@ from swarms.models import *  # noqa: E402, F403
 from swarms.prompts import *  # noqa: E402, F403
 from swarms.structs import *  # noqa: E402, F403
 from swarms.telemetry import *  # noqa: E402, F403
 from swarms.tokenizers import *  # noqa: E402, F403
 from swarms.tools import *  # noqa: E402, F403
 from swarms.utils import *  # noqa: E402, F403
 from swarms.schedulers import *  # noqa: E402, F403
--- a/swarms/models/init.py
+++ b/swarms/models/init.py
@ -44,7 +44,6 @@ from swarms.models.qwen import QwenVLMultiModal  # noqa: E402
 # from swarms.models.roboflow_model import RoboflowMultiModal
 from swarms.models.sam_supervision import SegmentAnythingMarkGenerator
 from swarms.models.sampling_params import SamplingParams, SamplingType
 from swarms.models.timm import TimmModel  # noqa: E402
 from swarms.models.together import TogetherLLM  # noqa: E402
 from swarms.models.types import (  # noqa: E402
    AudioModality,
--- a/swarms/schedulers/init.py
+++ b/swarms/schedulers/init.py
@ -0,0 +1,6 @@
 from swarms.schedulers.agent_process import (
    AgentProcess,
    AgentProcessQueue,
 )
 __all__ = ["AgentProcess", "AgentProcessQueue"]
--- a/swarms/schedulers/agent_process.py
+++ b/swarms/schedulers/agent_process.py
@ -0,0 +1,103 @@
 from datetime import datetime
 from pydantic import BaseModel
 from swarms.structs.omni_agent_types import agents
 from swarms.utils.loguru_logger import logger
 class AgentProcess(BaseModel):
    agent_id: int
    agent_name: str
    prompt: str
    response: str = None
    time: callable = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    priority: int = 0
    status: str = "Waiting"
    pid: int = None
    def set_pid(self, pid: int):
        self.pid = pid
    def get_pid(self):
        return self.pid
    def set_time(self, time: callable):
        self.time = time
    def get_time(self):
        return self.time
 class AgentProcessQueue:
    """
    A class representing a queue of agent processes.
    Attributes:
        MAX_PID (int): The maximum process ID.
        pid_pool (list): A list representing the availability of process IDs.
        agent_process_queue (list): A list representing the queue of agent processes.
    Methods:
        add(agent_process): Adds an agent process to the queue.
        print(): Prints the details of all agent processes in the queue.
    Private Methods:
        _get_available_pid(): Returns an available process ID from the pool.
    """
    def __init__(self, max_pid: int = 1024):
        self.MAX_PID = max_pid
        self.pid_pool = [False for i in range(self.MAX_PID)]
        self.agent_process_queue = (
            []
        )  # Currently use list to simulate queue
    def add(self, agents: agents):
        """
        Adds an agent process to the queue.
        Args:
            agent_process (AgentProcess): The agent process to be added.
        Returns:
            None
        """
        for agent in agents:
            agent_process = AgentProcess(
                agent_id=agent.id,
                agent_name=agent.agent_name,
                prompt=agent.short_memory.return_history_as_string(),
            )
            pid = self._get_available_pid()
            if pid is None:
                logger.warning("No available PID")
                return
            agent_process.set_pid(pid)
            agent_process.set_status("Waiting")
            self.agent_process_queue.append(agent_process)
    def print(self):
        """
        Prints the details of all agent processes in the queue.
        Returns:
            None
        """
        for agent_process in self.agent_process_queue:
            logger.info(
                f"| Agent-process ID: {agent_process.get_pid()} |"
                f" Status: {agent_process.get_status()} |"
            )
    def _get_available_pid(self):
        """
        Returns an available process ID from the pool.
        Returns:
            int or None: The available process ID, or None if no ID is available.
        """
        for i, used in enumerate(self.pid_pool):
            if not used:
                return i
        return None
--- a/swarms/tokenizers/init.py
+++ b/swarms/tokenizers/init.py
@ -1,22 +0,0 @@
 # from swarms.tokenizers.anthropic_tokenizer import (
 #     AnthropicTokenizer,
 #     import_optional_dependency,
 # )
 from swarms.tokenizers.base_tokenizer import BaseTokenizer
 from swarms.tokenizers.openai_tokenizers import OpenAITokenizer
 from swarms.tokenizers.r_tokenizers import (
    HuggingFaceTokenizer,
    SentencePieceTokenizer,
    Tokenizer,
 )
 __all__ = [
    "SentencePieceTokenizer",
    "HuggingFaceTokenizer",
    "Tokenizer",
    "BaseTokenizer",
    "OpenAITokenizer",
    # "import_optional_dependency",
    # "AnthropicTokenizer",
 ]
--- a/swarms/tokenizers/anthropic_tokenizer.py
+++ b/swarms/tokenizers/anthropic_tokenizer.py
@ -1,95 +0,0 @@
 from __future__ import annotations
 from dataclasses import dataclass
 from importlib import import_module
 from types import ModuleType
 from anthropic import Anthropic
 from swarms.tokenizers.base_tokenizer import BaseTokenizer
 INSTALL_MAPPING = {
    "huggingface_hub": "huggingface-hub",
    "pinecone": "pinecone-client",
    "opensearchpy": "opensearch-py",
 }
 def import_optional_dependency(name: str) -> ModuleType | None:
    """Import an optional dependency.
    If a dependency is missing, an ImportError with a nice message will be raised.
    Args:
        name: The module name.
    Returns:
        The imported module, when found.
        None is returned when the package is not found and `errors` is False.
    """
    package_name = INSTALL_MAPPING.get(name)
    install_name = package_name if package_name is not None else name
    msg = (
        f"Missing optional dependency: '{install_name}'. "
        f"Use poetry or pip to install '{install_name}'."
    )
    try:
        module = import_module(name)
    except ImportError:
        raise ImportError(msg)
    return module
@dataclass
 class AnthropicTokenizer(BaseTokenizer):
    """
    Tokenizer class for Anthropic models.]
    """
    max_tokens: int = 500
    client: Anthropic = None
    model: str = "claude-2.1"
    def __post_init__(self):
        self.DEFAULT_MODEL: str = "claude-2.1"
        self.MODEL_PREFIXES_TO_MAX_TOKENS: dict[str, int] = {
            "claude-2.1": 200000,
            "claude": 100000,
        }
        self.model = self.model  # or self.DEFAULT_MODEL
        self.max_tokens = self.max_tokens or self.default_max_tokens()
        self.client = (
            self.client
            or import_optional_dependency("anthropic").Anthropic()
        )
    def default_max_tokens(self) -> int:
        """
        Returns the default maximum number of tokens based on the model prefix.
        """
        tokens = next(
            v
            for k, v in self.MODEL_PREFIXES_TO_MAX_TOKENS.items()
            if self.model.startswith(k)
        )
        return tokens
    def count_tokens(self, text: str | list) -> int:
        """
        Counts the number of tokens in the given text.
        Args:
            text: The input text.
        Returns:
            The number of tokens in the text.
        Raises:
            ValueError: If the input text is not a string.
        """
        if isinstance(text, str):
            return self.client.count_tokens(text)
        else:
            raise ValueError("Text must be a string.")
--- a/swarms/tokenizers/base_tokenizer.py
+++ b/swarms/tokenizers/base_tokenizer.py
@ -1,55 +0,0 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
@dataclass
 class BaseTokenizer(ABC):
    """
    Base class for tokenizers.
    Attributes:
        stop_sequences (List[str]): List of stop sequences.
        max_tokens (int): Maximum number of tokens.
        stop_token (str): Stop token.
    """
    max_tokens: int
    stop_token: str = "<|Response|>"
    def __post_init__(self):
        self.stop_sequences: list[str] = field(
            default_factory=lambda: ["<|Response|>"],
            init=False,
        )
    def count_tokens_left(self, text: str | list[dict]) -> int:
        """
        Counts the number of tokens left based on the given text.
        Args:
            text (Union[str, List[dict]]): The text to count tokens from.
        Returns:
            int: The number of tokens left.
        """
        diff = self.max_tokens - self.count_tokens(text)
        if diff > 0:
            return diff
        else:
            return 0
    @abstractmethod
    def count_tokens(self, text: str | list[dict]) -> int:
        """
        Counts the number of tokens in the given text.
        Args:
            text (Union[str, List[dict]]): The text to count tokens from.
        Returns:
            int: The number of tokens.
        """
        ...
--- a/swarms/tokenizers/openai_tokenizers.py
+++ b/swarms/tokenizers/openai_tokenizers.py
@ -1,181 +0,0 @@
 from __future__ import annotations
 import logging
 from dataclasses import dataclass, field
 import tiktoken
 from tiktoken import Encoding
 from swarms.tokenizers.base_tokenizer import BaseTokenizer
@dataclass
 class OpenAITokenizer(BaseTokenizer):
    """
    A class representing an OpenAI tokenizer.
    Attributes:
    - DEFAULT_OPENAI_GPT_3_COMPLETION_MODEL (str): The default OpenAI GPT-3 completion model.
    - DEFAULT_OPENAI_GPT_3_CHAT_MODEL (str): The default OpenAI GPT-3 chat model.
    - DEFAULT_OPENAI_GPT_4_MODEL (str): The default OpenAI GPT-4 model.
    - DEFAULT_ENCODING (str): The default encoding.
    - DEFAULT_MAX_TOKENS (int): The default maximum number of tokens.
    - TOKEN_OFFSET (int): The token offset.
    - MODEL_PREFIXES_TO_MAX_TOKENS (dict): A dictionary mapping model prefixes to maximum tokens.
    - EMBEDDING_MODELS (list): A list of embedding models.
    - model (str): The model name.
    Methods:
    - __post_init__(): Initializes the OpenAITokenizer object.
    - encoding(): Returns the encoding for the model.
    - default_max_tokens(): Returns the default maximum number of tokens.
    - count_tokens(text, model): Counts the number of tokens in the given text.
    - len(text, model): Returns the length of the text in tokens.
    """
    model: str = "gpt-2"
    def __post_init__(self):
        """
        Initializes the OpenAITokenizer object.
        Sets the default maximum number of tokens.
        """
        self.max_tokens: int = field(
            default_factory=self.default_max_tokens
        )
        self.DEFAULT_OPENAI_GPT_3_COMPLETION_MODEL = (
            "text-davinci-003"
        )
        self.DEFAULT_OPENAI_GPT_3_CHAT_MODEL = "gpt-3.5-turbo"
        self.DEFAULT_OPENAI_GPT_4_MODEL = "gpt-4"
        self.DEFAULT_ENCODING = "cl100k_base"
        self.EFAULT_MAX_TOKENS = 2049
        self.TOKEN_OFFSET = 8
        self.MODEL_PREFIXES_TO_MAX_TOKENS = {
            "gpt-4-1106": 128000,
            "gpt-4-32k": 32768,
            "gpt-4": 8192,
            "gpt-3.5-turbo-16k": 16384,
            "gpt-3.5-turbo": 4096,
            "gpt-35-turbo-16k": 16384,
            "gpt-35-turbo": 4096,
            "text-davinci-003": 4097,
            "text-davinci-002": 4097,
            "code-davinci-002": 8001,
            "text-embedding-ada-002": 8191,
            "text-embedding-ada-001": 2046,
        }
        self.EMBEDDING_MODELS = [
            "text-embedding-ada-002",
            "text-embedding-ada-001",
        ]
    @property
    def encoding(self) -> Encoding:
        """
        Returns the encoding for the model.
        If the model is not found, returns the default encoding.
        """
        try:
            return tiktoken.encoding_for_model(self.model)
        except KeyError:
            return tiktoken.get_encoding(self.DEFAULT_ENCODING)
    def default_max_tokens(self) -> int:
        """
        Returns the default maximum number of tokens based on the model.
        """
        tokens = next(
            v
            for k, v in self.MODEL_PREFIXES_TO_MAX_TOKENS.items()
            if self.model.startswith(k)
        )
        offset = (
            0
            if self.model in self.EMBEDDING_MODELS
            else self.TOKEN_OFFSET
        )
        return (
            tokens if tokens else self.DEFAULT_MAX_TOKENS
        ) - offset
    def count_tokens(
        self, text: str | list[dict], model: str | None = None
    ) -> int:
        """
        Counts the number of tokens in the given text.
        If the text is a list of messages, counts the tokens for each message.
        If a model is provided, uses that model for encoding.
        """
        if isinstance(text, list):
            model = model if model else self.model
            try:
                encoding = tiktoken.encoding_for_model(model)
            except KeyError:
                logging.warning(
                    "model not found. Using cl100k_base encoding."
                )
                encoding = tiktoken.get_encoding("cl100k_base")
            if model in {
                "gpt-3.5-turbo-0613",
                "gpt-3.5-turbo-16k-0613",
                "gpt-4-0314",
                "gpt-4-32k-0314",
                "gpt-4-0613",
                "gpt-4-32k-0613",
            }:
                tokens_per_message = 3
                tokens_per_name = 1
            elif model == "gpt-3.5-turbo-0301":
                tokens_per_message = 4
                tokens_per_name = -1
            elif "gpt-3.5-turbo" in model or "gpt-35-turbo" in model:
                logging.info(
                    "gpt-3.5-turbo may update over time. Returning"
                    " num tokens assuming gpt-3.5-turbo-0613."
                )
                return self.count_tokens(
                    text, model="gpt-3.5-turbo-0613"
                )
            elif "gpt-4" in model:
                logging.info(
                    "gpt-4 may update over time. Returning num tokens"
                    " assuming gpt-4-0613."
                )
                return self.count_tokens(text, model="gpt-4-0613")
            else:
                raise NotImplementedError(
                    "token_count() is not implemented for model"
                    f" {model}. See"
                    " https://github.com/openai/openai-python/blob/main/chatml.md"
                    " for information on how messages are converted"
                    " to tokens."
                )
            num_tokens = 0
            for message in text:
                num_tokens += tokens_per_message
                for key, value in message.items():
                    num_tokens += len(encoding.encode(value))
                    if key == "name":
                        num_tokens += tokens_per_name
            num_tokens += 3
            return num_tokens
        else:
            return len(self.encoding.encode(text))
    def len(self, text: str | list[dict], model: str | None):
        """
        Returns the length of the text in tokens.
        If a model is provided, uses that model for encoding.
        """
        return self.count_tokens(text, model)
--- a/swarms/tokenizers/r_tokenizers.py
+++ b/swarms/tokenizers/r_tokenizers.py
@ -1,422 +0,0 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import json
 import os
 import os.path as osp
 from collections import deque
 from typing import List, Optional, Sequence, Union
 import torch
 from swarms.utils.get_logger import get_logger
 class SentencePieceTokenizer:
    """Tokenizer of sentencepiece.
    Args:
        model_file (str): the path of the tokenizer model
    """
    def __init__(self, model_file: str):
        from sentencepiece import SentencePieceProcessor
        self.model = SentencePieceProcessor(model_file=model_file)
        self._prefix_space_tokens = None
        # for stop words
        self._maybe_decode_bytes: bool = None
        # TODO maybe lack a constant.py
        self._indexes_tokens_deque = deque(maxlen=10)
        self.max_indexes_num = 5
        self.logger = get_logger("lmdeploy")
    @property
    def vocab_size(self):
        """vocabulary size."""
        return self.model.vocab_size()
    @property
    def bos_token_id(self):
        """begine of the sentence token id."""
        return self.model.bos_id()
    @property
    def eos_token_id(self):
        """end of the sentence token id."""
        return self.model.eos_id()
    @property
    def prefix_space_tokens(self):
        """tokens without prefix space."""
        if self._prefix_space_tokens is None:
            vocab = self.model.IdToPiece(list(range(self.vocab_size)))
            self._prefix_space_tokens = {
                i
                for i, tok in enumerate(vocab)
                if tok.startswith("▁")
            }
        return self._prefix_space_tokens
    def _maybe_add_prefix_space(self, tokens, decoded):
        """maybe add prefix space for incremental decoding."""
        if (
            tokens
            and not decoded.startswith(" ")
            and tokens[0] in self.prefix_space_tokens
        ):
            return " " + decoded
        else:
            return decoded
    def indexes_containing_token(self, token: str):
        """Return all the possible indexes, whose decoding output may contain
        the input token."""
        # traversing vocab is time consuming, can not be accelerated with
        # multi threads (computation) or multi process (can't pickle tokenizer)
        # so, we maintain latest 10 stop words and return directly if matched
        for _token, _indexes in self._indexes_tokens_deque:
            if token == _token:
                return _indexes
        if token == " ":  # ' ' is special
            token = "▁"
        vocab = self.model.IdToPiece(list(range(self.vocab_size)))
        indexes = [i for i, voc in enumerate(vocab) if token in voc]
        if len(indexes) > self.max_indexes_num:
            indexes = self.encode(token, add_bos=False)[-1:]
            self.logger.warning(
                f"There are too many(>{self.max_indexes_num})"
                f" possible indexes may decoding {token}, we will use"
                f" {indexes} only"
            )
        self._indexes_tokens_deque.append((token, indexes))
        return indexes
    def encode(self, s: str, add_bos: bool = True, **kwargs):
        """Tokenize a prompt.
        Args:
            s (str): a prompt
        Returns:
            list[int]: token ids
        """
        return self.model.Encode(s, add_bos=add_bos, **kwargs)
    def decode(self, t: Sequence[int], offset: Optional[int] = None):
        """De-tokenize.
        Args:
            t (List[int]): a list of token ids
            offset (int): for incrementally decoding. Default to None, which
                means not applied.
        Returns:
            str: text of decoding tokens
        """
        if isinstance(t, torch.Tensor):
            t = t.tolist()
        t = t[offset:]
        out_string = self.model.Decode(t)
        if offset:
            out_string = self._maybe_add_prefix_space(t, out_string)
        return out_string
    def __call__(self, s: Union[str, Sequence[str]]):
        """Tokenize prompts.
        Args:
            s (str): prompts
        Returns:
            list[int]: token ids
        """
        import addict
        add_bos = False
        add_eos = False
        input_ids = self.model.Encode(
            s, add_bos=add_bos, add_eos=add_eos
        )
        return addict.Addict(input_ids=input_ids)
 class HuggingFaceTokenizer:
    """Tokenizer of sentencepiece.
    Args:
        model_dir (str): the directory of the tokenizer model
    """
    def __init__(self, model_dir: str):
        from transformers import AutoTokenizer
        model_file = osp.join(model_dir, "tokenizer.model")
        backend_tokenizer_file = osp.join(model_dir, "tokenizer.json")
        model_file_exists = osp.exists(model_file)
        self.logger = get_logger("lmdeploy")
        if (
            not osp.exists(backend_tokenizer_file)
            and model_file_exists
        ):
            self.logger.warning(
                "Can not find tokenizer.json. "
                "It may take long time to initialize the tokenizer."
            )
        self.model = AutoTokenizer.from_pretrained(
            model_dir, trust_remote_code=True
        )
        self._prefix_space_tokens = None
        # save tokenizer.json to reuse
        if (
            not osp.exists(backend_tokenizer_file)
            and model_file_exists
        ):
            if hasattr(self.model, "backend_tokenizer"):
                if os.access(model_dir, os.W_OK):
                    self.model.backend_tokenizer.save(
                        backend_tokenizer_file
                    )
        if self.model.eos_token_id is None:
            generation_config_file = osp.join(
                model_dir, "generation_config.json"
            )
            if osp.exists(generation_config_file):
                with open(generation_config_file) as f:
                    cfg = json.load(f)
                    self.model.eos_token_id = cfg["eos_token_id"]
            elif hasattr(self.model, "eod_id"):  # Qwen remote
                self.model.eos_token_id = self.model.eod_id
        # for stop words
        self._maybe_decode_bytes: bool = None
        # TODO maybe lack a constant.py
        self._indexes_tokens_deque = deque(maxlen=10)
        self.max_indexes_num = 5
        self.token2id = {}
    @property
    def vocab_size(self):
        """vocabulary size."""
        return self.model.vocab_size
    @property
    def bos_token_id(self):
        """begine of the sentence token id."""
        return self.model.bos_token_id
    @property
    def eos_token_id(self):
        """end of the sentence token id."""
        return self.model.eos_token_id
    @property
    def prefix_space_tokens(self):
        """tokens without prefix space."""
        if self._prefix_space_tokens is None:
            vocab = self.model.convert_ids_to_tokens(
                list(range(self.vocab_size))
            )
            self._prefix_space_tokens = {
                i
                for i, tok in enumerate(vocab)
                if tok.startswith(
                    "▁" if isinstance(tok, str) else b" "
                )
            }
        return self._prefix_space_tokens
    def _maybe_add_prefix_space(
        self, tokens: List[int], decoded: str
    ):
        """maybe add prefix space for incremental decoding."""
        if (
            tokens
            and not decoded.startswith(" ")
            and tokens[0] in self.prefix_space_tokens
        ):
            return " " + decoded
        else:
            return decoded
    @property
    def maybe_decode_bytes(self):
        """Check if self.model.convert_ids_to_tokens return not a str value."""
        if self._maybe_decode_bytes is None:
            self._maybe_decode_bytes = False
            vocab = self.model.convert_ids_to_tokens(
                list(range(self.vocab_size))
            )
            for tok in vocab:
                if not isinstance(tok, str):
                    self._maybe_decode_bytes = True
                    break
        return self._maybe_decode_bytes
    def indexes_containing_token(self, token: str):
        """Return all the possible indexes, whose decoding output may contain
        the input token."""
        # traversing vocab is time consuming, can not be accelerated with
        # multi threads (computation) or multi process (can't pickle tokenizer)
        # so, we maintain latest 10 stop words and return directly if matched
        for _token, _indexes in self._indexes_tokens_deque:
            if token == _token:
                return _indexes
        if self.token2id == {}:
            # decode is slower than convert_ids_to_tokens
            if self.maybe_decode_bytes:
                self.token2id = {
                    self.model.decode(i): i
                    for i in range(self.vocab_size)
                }
            else:
                self.token2id = {
                    self.model.convert_ids_to_tokens(i): i
                    for i in range(self.vocab_size)
                }
        if token == " ":  # ' ' is special
            token = "▁"
        indexes = [
            i
            for _token, i in self.token2id.items()
            if token in _token
        ]
        if len(indexes) > self.max_indexes_num:
            indexes = self.encode(token, add_bos=False)[-1:]
            self.logger.warning(
                f"There are too many(>{self.max_indexes_num})"
                f" possible indexes may decoding {token}, we will use"
                f" {indexes} only"
            )
        self._indexes_tokens_deque.append((token, indexes))
        return indexes
    def encode(self, s: str, add_bos: bool = True, **kwargs):
        """Tokenize a prompt.
        Args:
            s (str): a prompt
        Returns:
            list[int]: token ids
        """
        encoded = self.model.encode(s, **kwargs)
        if not add_bos:
            # in the middle of a session
            if encoded and encoded[0] == self.bos_token_id:
                encoded = encoded[1:]
        return encoded
    def decode(self, t: Sequence[int], offset: Optional[int] = None):
        """De-tokenize.
        Args:
            t (List[int]): a list of token ids
            offset (int): for incrementally decoding. Default to None, which
                means not applied.
        Returns:
            str: text of decoding tokens
        """
        skip_special_tokens = True
        t = t[offset:]
        out_string = self.model.decode(
            t, skip_special_tokens=skip_special_tokens
        )
        if offset:
            out_string = self._maybe_add_prefix_space(t, out_string)
        return out_string
    def __call__(self, s: Union[str, Sequence[str]]):
        """Tokenize prompts.
        Args:
            s (str): prompts
        Returns:
            list[int]: token ids
        """
        add_special_tokens = False
        return self.model(s, add_special_tokens=add_special_tokens)
 class Tokenizer:
    """Tokenize prompts or de-tokenize tokens into texts.
    Args:
        model_file (str): the path of the tokenizer model
    """
    def __init__(self, model_file: str):
        if model_file.endswith(".model"):
            model_folder = osp.split(model_file)[0]
        else:
            model_folder = model_file
            model_file = osp.join(model_folder, "tokenizer.model")
        tokenizer_config_file = osp.join(
            model_folder, "tokenizer_config.json"
        )
        model_file_exists = osp.exists(model_file)
        config_exists = osp.exists(tokenizer_config_file)
        use_hf_model = config_exists or not model_file_exists
        self.logger = get_logger("lmdeploy")
        if not use_hf_model:
            self.model = SentencePieceTokenizer(model_file)
        else:
            self.model = HuggingFaceTokenizer(model_folder)
    @property
    def vocab_size(self):
        """vocabulary size."""
        return self.model.vocab_size
    @property
    def bos_token_id(self):
        """begine of the sentence token id."""
        return self.model.bos_token_id
    @property
    def eos_token_id(self):
        """end of the sentence token id."""
        return self.model.eos_token_id
    def encode(self, s: str, add_bos: bool = True, **kwargs):
        """Tokenize a prompt.
        Args:
            s (str): a prompt
        Returns:
            list[int]: token ids
        """
        return self.model.encode(s, add_bos, **kwargs)
    def decode(self, t: Sequence[int], offset: Optional[int] = None):
        """De-tokenize.
        Args:
            t (List[int]): a list of token ids
            offset (int): for incrementally decoding. Default to None, which
                means not applied.
        Returns:
            str: text of decoding tokens
        """
        return self.model.decode(t, offset)
    def __call__(self, s: Union[str, Sequence[str]]):
        """Tokenize prompts.
        Args:
            s (str): prompts
        Returns:
            list[int]: token ids
        """
        return self.model(s)
    def indexes_containing_token(self, token):
        """Return all the possible indexes, whose decoding output may contain
        the input token."""
        encoded = self.encode(token, add_bos=False)
        if len(encoded) > 1:
            self.logger.warning(
                f"The token {token}, its length of indexes"
                f" {encoded} is over than 1. Currently, it can not be"
                " used as stop words"
            )
            return []
        return self.model.indexes_containing_token(token)