pull/430/head
Kye 10 months ago
parent 43661cc845
commit f4c6692671

@ -26,7 +26,7 @@ classifiers = [
[tool.poetry.dependencies]
python = ">=3.9,<4.0"
torch = ">=2.1.1,<3.0"
transformers = "4.39.0"
transformers = ">= 4.39.0, <5.0.0"
asyncio = ">=3.4.3,<4.0"
einops = "0.7.0"
langchain-core = "0.1.33"

@ -1,21 +1,17 @@
from swarms.memory.action_subtask import ActionSubtaskEntry
from swarms.memory.base_db import AbstractDatabase
from swarms.memory.base_vectordb import AbstractVectorDatabase
from swarms.memory.chroma_db import ChromaDB
from swarms.memory.dict_internal_memory import DictInternalMemory
from swarms.memory.dict_shared_memory import DictSharedMemory
from swarms.memory.short_term_memory import ShortTermMemory
from swarms.memory.sqlite import SQLiteDB
from swarms.memory.visual_memory import VisualShortTermMemory
__all__ = [
"AbstractVectorDatabase",
"AbstractDatabase",
"ShortTermMemory",
"SQLiteDB",
"VisualShortTermMemory",
"AbstractVectorDatabase",
"ActionSubtaskEntry",
"ChromaDB",
"DictInternalMemory",
"DictSharedMemory",
]
"ShortTermMemory",
"VisualShortTermMemory",
]

@ -1,8 +1,6 @@
from swarms.models.base_embedding_model import BaseEmbeddingModel
from swarms.models.base_llm import AbstractLLM # noqa: E402
from swarms.models.base_multimodal_model import BaseMultiModalModel
from swarms.models.biogpt import BioGPT # noqa: E402
from swarms.models.clipq import CLIPQ # noqa: E402
from swarms.models.fire_function import FireFunctionCaller
from swarms.models.fuyu import Fuyu # noqa: E402
from swarms.models.gemini import Gemini # noqa: E402
@ -52,12 +50,7 @@ from swarms.models.types import ( # noqa: E402
TextModality,
VideoModality,
)
# from swarms.models.ultralytics_model import UltralyticsModel
from swarms.models.vilt import Vilt # noqa: E402
from swarms.models.wizard_storytelling import WizardLLMStoryTeller
from swarms.models.zephyr import Zephyr # noqa: E402
from swarms.models.zeroscope import ZeroscopeTTV # noqa: E402
__all__ = [
"AbstractLLM",
@ -65,41 +58,34 @@ __all__ = [
"AzureOpenAI",
"BaseEmbeddingModel",
"BaseMultiModalModel",
"BioGPT",
"CLIPQ",
"Cohere",
"FireFunctionCaller",
"Fuyu",
"GPT4VisionAPI",
"Gemini",
"GPT4VisionAPI",
"HuggingfaceLLM",
"Idefics",
"Kosmos",
"LayoutLMDocumentQA",
"LavaMultiModal",
"Replicate",
"MPT7B",
"Mistral",
"Mixtral",
"MosaicML",
"MPT7B",
"MultimodalData",
"Nougat",
"OpenAI",
"OpenAIChat",
"OpenAITTS",
"Petals",
"QwenVLMultiModal",
"Replicate",
"SamplingParams",
"SamplingType",
"SegmentAnythingMarkGenerator",
"TextModality",
"TimmModel",
"TogetherLLM",
"Vilt",
"VideoModality",
"WizardLLMStoryTeller",
"Zephyr",
"ZeroscopeTTV",
"AudioModality",
"ImageModality",
"MultimodalData",
]
"VideoModality",
]

@ -1,7 +1,7 @@
from transformers import AutoModelForCausalLM, AutoTokenizer
from swarms.models.base_llm import AbstractLLM
class Petals:
class Petals(AbstractLLM):
"""Petals Bloom models."""
def __init__(

@ -1,40 +0,0 @@
# AnthropicTokenizer
import pytest
from swarms.tokenizers.anthropic_tokenizer import AnthropicTokenizer
def test_post_init():
tokenizer = AnthropicTokenizer()
assert tokenizer.model == "claude-2.1"
assert tokenizer.max_tokens == 200000
def test_default_max_tokens():
tokenizer = AnthropicTokenizer(model="claude")
assert tokenizer.default_max_tokens() == 100000
@pytest.mark.parametrize(
"model,tokens", [("claude-2.1", 200000), ("claude", 100000)]
)
def test_default_max_tokens_models(model, tokens):
tokenizer = AnthropicTokenizer(model=model)
assert tokenizer.default_max_tokens() == tokens
def test_count_tokens_string():
# Insert mock instantiation of anthropic client and its count_tokens function
text = "This is a test string."
tokenizer = AnthropicTokenizer()
tokens = tokenizer.count_tokens(text)
assert tokens == 5
def test_count_tokens_list():
# Insert mock instantiation of anthropic client and its count_tokens function
text = ["This", "is", "a", "test", "string."]
tokenizer = AnthropicTokenizer()
with pytest.raises(ValueError):
tokenizer.count_tokens(text)

@ -1,46 +0,0 @@
# BaseTokenizer
import pytest
from swarms.tokenizers.base_tokenizer import BaseTokenizer
# 1. Fixture for BaseTokenizer instance.
@pytest.fixture
def base_tokenizer():
return BaseTokenizer(max_tokens=100)
# 2. Tests for __post_init__.
def test_post_init(base_tokenizer):
assert base_tokenizer.stop_sequences == ["<|Response|>"]
assert base_tokenizer.stop_token == "<|Response|>"
# 3. Tests for count_tokens_left with different inputs.
def test_count_tokens_left_with_positive_diff(
base_tokenizer, monkeypatch
):
# Mocking count_tokens to return a specific value
monkeypatch.setattr(
"swarms.tokenizers.BaseTokenizer.count_tokens",
lambda x, y: 50,
)
assert base_tokenizer.count_tokens_left("some text") == 50
def test_count_tokens_left_with_zero_diff(
base_tokenizer, monkeypatch
):
monkeypatch.setattr(
"swarms.tokenizers.BaseTokenizer.count_tokens",
lambda x, y: 100,
)
assert base_tokenizer.count_tokens_left("some text") == 0
# 4. Add tests for count_tokens. This method is an abstract one, so testing it
# will be dependent on the actual implementation in the subclass. Here is just
# a general idea how to test it (we assume that test_count_tokens is implemented in some subclass).
def test_count_tokens(subclass_tokenizer_instance):
assert subclass_tokenizer_instance.count_tokens("some text") == 6

@ -1,37 +0,0 @@
# CohereTokenizer
from unittest.mock import MagicMock
import pytest
from swarms.tokenizers.cohere_tokenizer import CohereTokenizer
@pytest.fixture
def cohere_tokenizer():
mock_client = MagicMock()
mock_client.tokenize.return_value.tokens = [
"token1",
"token2",
"token3",
]
return CohereTokenizer(model="<model-name>", client=mock_client)
def test_count_tokens_with_string(cohere_tokenizer):
tokens_count = cohere_tokenizer.count_tokens("valid string")
assert tokens_count == 3
def test_count_tokens_with_non_string(cohere_tokenizer):
with pytest.raises(ValueError):
cohere_tokenizer.count_tokens(["invalid", "input"])
def test_count_tokens_with_different_length(cohere_tokenizer):
cohere_tokenizer.client.tokenize.return_value.tokens = [
"token1",
"token2",
]
tokens_count = cohere_tokenizer.count_tokens("valid string")
assert tokens_count == 2

@ -1,68 +0,0 @@
# HuggingFaceTokenizer
import os
from unittest.mock import patch
import pytest
from swarms.tokenizers.r_tokenizers import HuggingFaceTokenizer
# Test class setup
@pytest.fixture
def hftokenizer():
dir_path = os.path.join(os.getcwd(), "modeldir")
tokenizer = HuggingFaceTokenizer(dir_path)
return tokenizer
# testing __init__
@patch("os.path")
@patch("swarms.tokenizers.get_logger")
def test___init__(mock_get_logger, mock_path, hftokenizer):
mock_path.exists.return_value = False
mock_path.join.return_value = "dummy_path"
mock_get_logger.return_value = "dummy_logger"
assert hftokenizer.model_dir == "dummy_path"
assert hftokenizer.logger == "dummy_logger"
assert hftokenizer._maybe_decode_bytes is False
assert hftokenizer._prefix_space_tokens is None
# testing vocab_size property
def test_vocab_size(hftokenizer):
assert hftokenizer.vocab_size == 30522
# testing bos_token_id property
def test_bos_token_id(hftokenizer):
assert hftokenizer.bos_token_id == 101
# testing eos_token_id property
def test_eos_token_id(hftokenizer):
assert hftokenizer.eos_token_id == 102
# testing prefix_space_tokens property
def test_prefix_space_tokens(hftokenizer):
assert len(hftokenizer.prefix_space_tokens) > 0
# testing _maybe_add_prefix_space method
def test__maybe_add_prefix_space(hftokenizer):
assert (
hftokenizer._maybe_add_prefix_space(
[101, 2003, 2010, 2050, 2001, 2339], " is why"
)
== " is why"
)
assert (
hftokenizer._maybe_add_prefix_space(
[2003, 2010, 2050, 2001, 2339], "is why"
)
== " is why"
)
# continuing tests for other methods...

@ -1,48 +0,0 @@
# OpenAITokenizer
import pytest
import swarms.tokenizers.openai_tokenizers as tokenizers
@pytest.fixture()
def openai_tokenizer():
return tokenizers.OpenAITokenizer("gpt-3")
def test_init(openai_tokenizer):
assert openai_tokenizer.model == "gpt-3"
def test_default_max_tokens(openai_tokenizer):
assert openai_tokenizer.default_max_tokens() == 4096
@pytest.mark.parametrize(
"text, expected_output", [("Hello, world!", 3), (["Hello"], 4)]
)
def test_count_tokens_single(openai_tokenizer, text, expected_output):
assert (
openai_tokenizer.count_tokens(text, "gpt-3")
== expected_output
)
@pytest.mark.parametrize(
"texts, expected_output",
[(["Hello, world!", "This is a test"], 6), (["Hello"], 4)],
)
def test_count_tokens_multiple(
openai_tokenizer, texts, expected_output
):
assert (
openai_tokenizer.count_tokens(texts, "gpt-3")
== expected_output
)
@pytest.mark.parametrize(
"text, expected_output", [("Hello, world!", 3), (["Hello"], 4)]
)
def test_len(openai_tokenizer, text, expected_output):
assert openai_tokenizer.len(text, "gpt-3") == expected_output

@ -1,82 +0,0 @@
# Tokenizer
from unittest.mock import patch
from swarms.tokenizers.r_tokenizers import Tokenizer
def test_initializer_existing_model_file():
with patch("os.path.exists", return_value=True):
with patch(
"swarms.tokenizers.SentencePieceTokenizer"
) as mock_model:
tokenizer = Tokenizer("tokenizers/my_model.model")
mock_model.assert_called_with("tokenizers/my_model.model")
assert tokenizer.model == mock_model.return_value
def test_initializer_model_folder():
with patch("os.path.exists", side_effect=[False, True]):
with patch(
"swarms.tokenizers.HuggingFaceTokenizer"
) as mock_model:
tokenizer = Tokenizer("my_model_directory")
mock_model.assert_called_with("my_model_directory")
assert tokenizer.model == mock_model.return_value
def test_vocab_size():
with patch(
"swarms.tokenizers.SentencePieceTokenizer"
) as mock_model:
tokenizer = Tokenizer("tokenizers/my_model.model")
assert (
tokenizer.vocab_size == mock_model.return_value.vocab_size
)
def test_bos_token_id():
with patch(
"swarms.tokenizers.SentencePieceTokenizer"
) as mock_model:
tokenizer = Tokenizer("tokenizers/my_model.model")
assert (
tokenizer.bos_token_id
== mock_model.return_value.bos_token_id
)
def test_encode():
with patch(
"swarms.tokenizers.SentencePieceTokenizer"
) as mock_model:
tokenizer = Tokenizer("tokenizers/my_model.model")
assert (
tokenizer.encode("hello")
== mock_model.return_value.encode.return_value
)
def test_decode():
with patch(
"swarms.tokenizers.SentencePieceTokenizer"
) as mock_model:
tokenizer = Tokenizer("tokenizers/my_model.model")
assert (
tokenizer.decode([1, 2, 3])
== mock_model.return_value.decode.return_value
)
def test_call():
with patch(
"swarms.tokenizers.SentencePieceTokenizer"
) as mock_model:
tokenizer = Tokenizer("tokenizers/my_model.model")
assert (
tokenizer("hello")
== mock_model.return_value.__call__.return_value
)
# More tests can be added here
Loading…
Cancel
Save