parent
43661cc845
commit
f4c6692671
@ -1,21 +1,17 @@
|
|||||||
from swarms.memory.action_subtask import ActionSubtaskEntry
|
from swarms.memory.action_subtask import ActionSubtaskEntry
|
||||||
from swarms.memory.base_db import AbstractDatabase
|
from swarms.memory.base_db import AbstractDatabase
|
||||||
from swarms.memory.base_vectordb import AbstractVectorDatabase
|
from swarms.memory.base_vectordb import AbstractVectorDatabase
|
||||||
from swarms.memory.chroma_db import ChromaDB
|
|
||||||
from swarms.memory.dict_internal_memory import DictInternalMemory
|
from swarms.memory.dict_internal_memory import DictInternalMemory
|
||||||
from swarms.memory.dict_shared_memory import DictSharedMemory
|
from swarms.memory.dict_shared_memory import DictSharedMemory
|
||||||
from swarms.memory.short_term_memory import ShortTermMemory
|
from swarms.memory.short_term_memory import ShortTermMemory
|
||||||
from swarms.memory.sqlite import SQLiteDB
|
|
||||||
from swarms.memory.visual_memory import VisualShortTermMemory
|
from swarms.memory.visual_memory import VisualShortTermMemory
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"AbstractVectorDatabase",
|
|
||||||
"AbstractDatabase",
|
"AbstractDatabase",
|
||||||
"ShortTermMemory",
|
"AbstractVectorDatabase",
|
||||||
"SQLiteDB",
|
|
||||||
"VisualShortTermMemory",
|
|
||||||
"ActionSubtaskEntry",
|
"ActionSubtaskEntry",
|
||||||
"ChromaDB",
|
|
||||||
"DictInternalMemory",
|
"DictInternalMemory",
|
||||||
"DictSharedMemory",
|
"DictSharedMemory",
|
||||||
]
|
"ShortTermMemory",
|
||||||
|
"VisualShortTermMemory",
|
||||||
|
]
|
@ -1,40 +0,0 @@
|
|||||||
# AnthropicTokenizer
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from swarms.tokenizers.anthropic_tokenizer import AnthropicTokenizer
|
|
||||||
|
|
||||||
|
|
||||||
def test_post_init():
|
|
||||||
tokenizer = AnthropicTokenizer()
|
|
||||||
assert tokenizer.model == "claude-2.1"
|
|
||||||
assert tokenizer.max_tokens == 200000
|
|
||||||
|
|
||||||
|
|
||||||
def test_default_max_tokens():
|
|
||||||
tokenizer = AnthropicTokenizer(model="claude")
|
|
||||||
assert tokenizer.default_max_tokens() == 100000
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"model,tokens", [("claude-2.1", 200000), ("claude", 100000)]
|
|
||||||
)
|
|
||||||
def test_default_max_tokens_models(model, tokens):
|
|
||||||
tokenizer = AnthropicTokenizer(model=model)
|
|
||||||
assert tokenizer.default_max_tokens() == tokens
|
|
||||||
|
|
||||||
|
|
||||||
def test_count_tokens_string():
|
|
||||||
# Insert mock instantiation of anthropic client and its count_tokens function
|
|
||||||
text = "This is a test string."
|
|
||||||
tokenizer = AnthropicTokenizer()
|
|
||||||
tokens = tokenizer.count_tokens(text)
|
|
||||||
assert tokens == 5
|
|
||||||
|
|
||||||
|
|
||||||
def test_count_tokens_list():
|
|
||||||
# Insert mock instantiation of anthropic client and its count_tokens function
|
|
||||||
text = ["This", "is", "a", "test", "string."]
|
|
||||||
tokenizer = AnthropicTokenizer()
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
tokenizer.count_tokens(text)
|
|
@ -1,46 +0,0 @@
|
|||||||
# BaseTokenizer
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from swarms.tokenizers.base_tokenizer import BaseTokenizer
|
|
||||||
|
|
||||||
|
|
||||||
# 1. Fixture for BaseTokenizer instance.
|
|
||||||
@pytest.fixture
|
|
||||||
def base_tokenizer():
|
|
||||||
return BaseTokenizer(max_tokens=100)
|
|
||||||
|
|
||||||
|
|
||||||
# 2. Tests for __post_init__.
|
|
||||||
def test_post_init(base_tokenizer):
|
|
||||||
assert base_tokenizer.stop_sequences == ["<|Response|>"]
|
|
||||||
assert base_tokenizer.stop_token == "<|Response|>"
|
|
||||||
|
|
||||||
|
|
||||||
# 3. Tests for count_tokens_left with different inputs.
|
|
||||||
def test_count_tokens_left_with_positive_diff(
|
|
||||||
base_tokenizer, monkeypatch
|
|
||||||
):
|
|
||||||
# Mocking count_tokens to return a specific value
|
|
||||||
monkeypatch.setattr(
|
|
||||||
"swarms.tokenizers.BaseTokenizer.count_tokens",
|
|
||||||
lambda x, y: 50,
|
|
||||||
)
|
|
||||||
assert base_tokenizer.count_tokens_left("some text") == 50
|
|
||||||
|
|
||||||
|
|
||||||
def test_count_tokens_left_with_zero_diff(
|
|
||||||
base_tokenizer, monkeypatch
|
|
||||||
):
|
|
||||||
monkeypatch.setattr(
|
|
||||||
"swarms.tokenizers.BaseTokenizer.count_tokens",
|
|
||||||
lambda x, y: 100,
|
|
||||||
)
|
|
||||||
assert base_tokenizer.count_tokens_left("some text") == 0
|
|
||||||
|
|
||||||
|
|
||||||
# 4. Add tests for count_tokens. This method is an abstract one, so testing it
|
|
||||||
# will be dependent on the actual implementation in the subclass. Here is just
|
|
||||||
# a general idea how to test it (we assume that test_count_tokens is implemented in some subclass).
|
|
||||||
def test_count_tokens(subclass_tokenizer_instance):
|
|
||||||
assert subclass_tokenizer_instance.count_tokens("some text") == 6
|
|
@ -1,37 +0,0 @@
|
|||||||
# CohereTokenizer
|
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from swarms.tokenizers.cohere_tokenizer import CohereTokenizer
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def cohere_tokenizer():
|
|
||||||
mock_client = MagicMock()
|
|
||||||
mock_client.tokenize.return_value.tokens = [
|
|
||||||
"token1",
|
|
||||||
"token2",
|
|
||||||
"token3",
|
|
||||||
]
|
|
||||||
return CohereTokenizer(model="<model-name>", client=mock_client)
|
|
||||||
|
|
||||||
|
|
||||||
def test_count_tokens_with_string(cohere_tokenizer):
|
|
||||||
tokens_count = cohere_tokenizer.count_tokens("valid string")
|
|
||||||
assert tokens_count == 3
|
|
||||||
|
|
||||||
|
|
||||||
def test_count_tokens_with_non_string(cohere_tokenizer):
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
cohere_tokenizer.count_tokens(["invalid", "input"])
|
|
||||||
|
|
||||||
|
|
||||||
def test_count_tokens_with_different_length(cohere_tokenizer):
|
|
||||||
cohere_tokenizer.client.tokenize.return_value.tokens = [
|
|
||||||
"token1",
|
|
||||||
"token2",
|
|
||||||
]
|
|
||||||
tokens_count = cohere_tokenizer.count_tokens("valid string")
|
|
||||||
assert tokens_count == 2
|
|
@ -1,68 +0,0 @@
|
|||||||
# HuggingFaceTokenizer
|
|
||||||
|
|
||||||
import os
|
|
||||||
from unittest.mock import patch
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from swarms.tokenizers.r_tokenizers import HuggingFaceTokenizer
|
|
||||||
|
|
||||||
|
|
||||||
# Test class setup
|
|
||||||
@pytest.fixture
|
|
||||||
def hftokenizer():
|
|
||||||
dir_path = os.path.join(os.getcwd(), "modeldir")
|
|
||||||
tokenizer = HuggingFaceTokenizer(dir_path)
|
|
||||||
return tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
# testing __init__
|
|
||||||
@patch("os.path")
|
|
||||||
@patch("swarms.tokenizers.get_logger")
|
|
||||||
def test___init__(mock_get_logger, mock_path, hftokenizer):
|
|
||||||
mock_path.exists.return_value = False
|
|
||||||
mock_path.join.return_value = "dummy_path"
|
|
||||||
mock_get_logger.return_value = "dummy_logger"
|
|
||||||
assert hftokenizer.model_dir == "dummy_path"
|
|
||||||
assert hftokenizer.logger == "dummy_logger"
|
|
||||||
assert hftokenizer._maybe_decode_bytes is False
|
|
||||||
assert hftokenizer._prefix_space_tokens is None
|
|
||||||
|
|
||||||
|
|
||||||
# testing vocab_size property
|
|
||||||
def test_vocab_size(hftokenizer):
|
|
||||||
assert hftokenizer.vocab_size == 30522
|
|
||||||
|
|
||||||
|
|
||||||
# testing bos_token_id property
|
|
||||||
def test_bos_token_id(hftokenizer):
|
|
||||||
assert hftokenizer.bos_token_id == 101
|
|
||||||
|
|
||||||
|
|
||||||
# testing eos_token_id property
|
|
||||||
def test_eos_token_id(hftokenizer):
|
|
||||||
assert hftokenizer.eos_token_id == 102
|
|
||||||
|
|
||||||
|
|
||||||
# testing prefix_space_tokens property
|
|
||||||
def test_prefix_space_tokens(hftokenizer):
|
|
||||||
assert len(hftokenizer.prefix_space_tokens) > 0
|
|
||||||
|
|
||||||
|
|
||||||
# testing _maybe_add_prefix_space method
|
|
||||||
def test__maybe_add_prefix_space(hftokenizer):
|
|
||||||
assert (
|
|
||||||
hftokenizer._maybe_add_prefix_space(
|
|
||||||
[101, 2003, 2010, 2050, 2001, 2339], " is why"
|
|
||||||
)
|
|
||||||
== " is why"
|
|
||||||
)
|
|
||||||
assert (
|
|
||||||
hftokenizer._maybe_add_prefix_space(
|
|
||||||
[2003, 2010, 2050, 2001, 2339], "is why"
|
|
||||||
)
|
|
||||||
== " is why"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# continuing tests for other methods...
|
|
@ -1,48 +0,0 @@
|
|||||||
# OpenAITokenizer
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
import swarms.tokenizers.openai_tokenizers as tokenizers
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
|
||||||
def openai_tokenizer():
|
|
||||||
return tokenizers.OpenAITokenizer("gpt-3")
|
|
||||||
|
|
||||||
|
|
||||||
def test_init(openai_tokenizer):
|
|
||||||
assert openai_tokenizer.model == "gpt-3"
|
|
||||||
|
|
||||||
|
|
||||||
def test_default_max_tokens(openai_tokenizer):
|
|
||||||
assert openai_tokenizer.default_max_tokens() == 4096
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"text, expected_output", [("Hello, world!", 3), (["Hello"], 4)]
|
|
||||||
)
|
|
||||||
def test_count_tokens_single(openai_tokenizer, text, expected_output):
|
|
||||||
assert (
|
|
||||||
openai_tokenizer.count_tokens(text, "gpt-3")
|
|
||||||
== expected_output
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"texts, expected_output",
|
|
||||||
[(["Hello, world!", "This is a test"], 6), (["Hello"], 4)],
|
|
||||||
)
|
|
||||||
def test_count_tokens_multiple(
|
|
||||||
openai_tokenizer, texts, expected_output
|
|
||||||
):
|
|
||||||
assert (
|
|
||||||
openai_tokenizer.count_tokens(texts, "gpt-3")
|
|
||||||
== expected_output
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"text, expected_output", [("Hello, world!", 3), (["Hello"], 4)]
|
|
||||||
)
|
|
||||||
def test_len(openai_tokenizer, text, expected_output):
|
|
||||||
assert openai_tokenizer.len(text, "gpt-3") == expected_output
|
|
@ -1 +0,0 @@
|
|||||||
# SentencePieceTokenizer
|
|
@ -1,82 +0,0 @@
|
|||||||
# Tokenizer
|
|
||||||
|
|
||||||
from unittest.mock import patch
|
|
||||||
|
|
||||||
from swarms.tokenizers.r_tokenizers import Tokenizer
|
|
||||||
|
|
||||||
|
|
||||||
def test_initializer_existing_model_file():
|
|
||||||
with patch("os.path.exists", return_value=True):
|
|
||||||
with patch(
|
|
||||||
"swarms.tokenizers.SentencePieceTokenizer"
|
|
||||||
) as mock_model:
|
|
||||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
|
||||||
mock_model.assert_called_with("tokenizers/my_model.model")
|
|
||||||
assert tokenizer.model == mock_model.return_value
|
|
||||||
|
|
||||||
|
|
||||||
def test_initializer_model_folder():
|
|
||||||
with patch("os.path.exists", side_effect=[False, True]):
|
|
||||||
with patch(
|
|
||||||
"swarms.tokenizers.HuggingFaceTokenizer"
|
|
||||||
) as mock_model:
|
|
||||||
tokenizer = Tokenizer("my_model_directory")
|
|
||||||
mock_model.assert_called_with("my_model_directory")
|
|
||||||
assert tokenizer.model == mock_model.return_value
|
|
||||||
|
|
||||||
|
|
||||||
def test_vocab_size():
|
|
||||||
with patch(
|
|
||||||
"swarms.tokenizers.SentencePieceTokenizer"
|
|
||||||
) as mock_model:
|
|
||||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
|
||||||
assert (
|
|
||||||
tokenizer.vocab_size == mock_model.return_value.vocab_size
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_bos_token_id():
|
|
||||||
with patch(
|
|
||||||
"swarms.tokenizers.SentencePieceTokenizer"
|
|
||||||
) as mock_model:
|
|
||||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
|
||||||
assert (
|
|
||||||
tokenizer.bos_token_id
|
|
||||||
== mock_model.return_value.bos_token_id
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_encode():
|
|
||||||
with patch(
|
|
||||||
"swarms.tokenizers.SentencePieceTokenizer"
|
|
||||||
) as mock_model:
|
|
||||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
|
||||||
assert (
|
|
||||||
tokenizer.encode("hello")
|
|
||||||
== mock_model.return_value.encode.return_value
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_decode():
|
|
||||||
with patch(
|
|
||||||
"swarms.tokenizers.SentencePieceTokenizer"
|
|
||||||
) as mock_model:
|
|
||||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
|
||||||
assert (
|
|
||||||
tokenizer.decode([1, 2, 3])
|
|
||||||
== mock_model.return_value.decode.return_value
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_call():
|
|
||||||
with patch(
|
|
||||||
"swarms.tokenizers.SentencePieceTokenizer"
|
|
||||||
) as mock_model:
|
|
||||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
|
||||||
assert (
|
|
||||||
tokenizer("hello")
|
|
||||||
== mock_model.return_value.__call__.return_value
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# More tests can be added here
|
|
Loading…
Reference in new issue