parent
43661cc845
commit
f4c6692671
@ -1,21 +1,17 @@
|
||||
from swarms.memory.action_subtask import ActionSubtaskEntry
|
||||
from swarms.memory.base_db import AbstractDatabase
|
||||
from swarms.memory.base_vectordb import AbstractVectorDatabase
|
||||
from swarms.memory.chroma_db import ChromaDB
|
||||
from swarms.memory.dict_internal_memory import DictInternalMemory
|
||||
from swarms.memory.dict_shared_memory import DictSharedMemory
|
||||
from swarms.memory.short_term_memory import ShortTermMemory
|
||||
from swarms.memory.sqlite import SQLiteDB
|
||||
from swarms.memory.visual_memory import VisualShortTermMemory
|
||||
|
||||
__all__ = [
|
||||
"AbstractVectorDatabase",
|
||||
"AbstractDatabase",
|
||||
"ShortTermMemory",
|
||||
"SQLiteDB",
|
||||
"VisualShortTermMemory",
|
||||
"AbstractVectorDatabase",
|
||||
"ActionSubtaskEntry",
|
||||
"ChromaDB",
|
||||
"DictInternalMemory",
|
||||
"DictSharedMemory",
|
||||
]
|
||||
"ShortTermMemory",
|
||||
"VisualShortTermMemory",
|
||||
]
|
@ -1,40 +0,0 @@
|
||||
# AnthropicTokenizer
|
||||
|
||||
import pytest
|
||||
|
||||
from swarms.tokenizers.anthropic_tokenizer import AnthropicTokenizer
|
||||
|
||||
|
||||
def test_post_init():
|
||||
tokenizer = AnthropicTokenizer()
|
||||
assert tokenizer.model == "claude-2.1"
|
||||
assert tokenizer.max_tokens == 200000
|
||||
|
||||
|
||||
def test_default_max_tokens():
|
||||
tokenizer = AnthropicTokenizer(model="claude")
|
||||
assert tokenizer.default_max_tokens() == 100000
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model,tokens", [("claude-2.1", 200000), ("claude", 100000)]
|
||||
)
|
||||
def test_default_max_tokens_models(model, tokens):
|
||||
tokenizer = AnthropicTokenizer(model=model)
|
||||
assert tokenizer.default_max_tokens() == tokens
|
||||
|
||||
|
||||
def test_count_tokens_string():
|
||||
# Insert mock instantiation of anthropic client and its count_tokens function
|
||||
text = "This is a test string."
|
||||
tokenizer = AnthropicTokenizer()
|
||||
tokens = tokenizer.count_tokens(text)
|
||||
assert tokens == 5
|
||||
|
||||
|
||||
def test_count_tokens_list():
|
||||
# Insert mock instantiation of anthropic client and its count_tokens function
|
||||
text = ["This", "is", "a", "test", "string."]
|
||||
tokenizer = AnthropicTokenizer()
|
||||
with pytest.raises(ValueError):
|
||||
tokenizer.count_tokens(text)
|
@ -1,46 +0,0 @@
|
||||
# BaseTokenizer
|
||||
|
||||
import pytest
|
||||
|
||||
from swarms.tokenizers.base_tokenizer import BaseTokenizer
|
||||
|
||||
|
||||
# 1. Fixture for BaseTokenizer instance.
|
||||
@pytest.fixture
|
||||
def base_tokenizer():
|
||||
return BaseTokenizer(max_tokens=100)
|
||||
|
||||
|
||||
# 2. Tests for __post_init__.
|
||||
def test_post_init(base_tokenizer):
|
||||
assert base_tokenizer.stop_sequences == ["<|Response|>"]
|
||||
assert base_tokenizer.stop_token == "<|Response|>"
|
||||
|
||||
|
||||
# 3. Tests for count_tokens_left with different inputs.
|
||||
def test_count_tokens_left_with_positive_diff(
|
||||
base_tokenizer, monkeypatch
|
||||
):
|
||||
# Mocking count_tokens to return a specific value
|
||||
monkeypatch.setattr(
|
||||
"swarms.tokenizers.BaseTokenizer.count_tokens",
|
||||
lambda x, y: 50,
|
||||
)
|
||||
assert base_tokenizer.count_tokens_left("some text") == 50
|
||||
|
||||
|
||||
def test_count_tokens_left_with_zero_diff(
|
||||
base_tokenizer, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(
|
||||
"swarms.tokenizers.BaseTokenizer.count_tokens",
|
||||
lambda x, y: 100,
|
||||
)
|
||||
assert base_tokenizer.count_tokens_left("some text") == 0
|
||||
|
||||
|
||||
# 4. Add tests for count_tokens. This method is an abstract one, so testing it
|
||||
# will be dependent on the actual implementation in the subclass. Here is just
|
||||
# a general idea how to test it (we assume that test_count_tokens is implemented in some subclass).
|
||||
def test_count_tokens(subclass_tokenizer_instance):
|
||||
assert subclass_tokenizer_instance.count_tokens("some text") == 6
|
@ -1,37 +0,0 @@
|
||||
# CohereTokenizer
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from swarms.tokenizers.cohere_tokenizer import CohereTokenizer
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cohere_tokenizer():
|
||||
mock_client = MagicMock()
|
||||
mock_client.tokenize.return_value.tokens = [
|
||||
"token1",
|
||||
"token2",
|
||||
"token3",
|
||||
]
|
||||
return CohereTokenizer(model="<model-name>", client=mock_client)
|
||||
|
||||
|
||||
def test_count_tokens_with_string(cohere_tokenizer):
|
||||
tokens_count = cohere_tokenizer.count_tokens("valid string")
|
||||
assert tokens_count == 3
|
||||
|
||||
|
||||
def test_count_tokens_with_non_string(cohere_tokenizer):
|
||||
with pytest.raises(ValueError):
|
||||
cohere_tokenizer.count_tokens(["invalid", "input"])
|
||||
|
||||
|
||||
def test_count_tokens_with_different_length(cohere_tokenizer):
|
||||
cohere_tokenizer.client.tokenize.return_value.tokens = [
|
||||
"token1",
|
||||
"token2",
|
||||
]
|
||||
tokens_count = cohere_tokenizer.count_tokens("valid string")
|
||||
assert tokens_count == 2
|
@ -1,68 +0,0 @@
|
||||
# HuggingFaceTokenizer
|
||||
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from swarms.tokenizers.r_tokenizers import HuggingFaceTokenizer
|
||||
|
||||
|
||||
# Test class setup
|
||||
@pytest.fixture
|
||||
def hftokenizer():
|
||||
dir_path = os.path.join(os.getcwd(), "modeldir")
|
||||
tokenizer = HuggingFaceTokenizer(dir_path)
|
||||
return tokenizer
|
||||
|
||||
|
||||
# testing __init__
|
||||
@patch("os.path")
|
||||
@patch("swarms.tokenizers.get_logger")
|
||||
def test___init__(mock_get_logger, mock_path, hftokenizer):
|
||||
mock_path.exists.return_value = False
|
||||
mock_path.join.return_value = "dummy_path"
|
||||
mock_get_logger.return_value = "dummy_logger"
|
||||
assert hftokenizer.model_dir == "dummy_path"
|
||||
assert hftokenizer.logger == "dummy_logger"
|
||||
assert hftokenizer._maybe_decode_bytes is False
|
||||
assert hftokenizer._prefix_space_tokens is None
|
||||
|
||||
|
||||
# testing vocab_size property
|
||||
def test_vocab_size(hftokenizer):
|
||||
assert hftokenizer.vocab_size == 30522
|
||||
|
||||
|
||||
# testing bos_token_id property
|
||||
def test_bos_token_id(hftokenizer):
|
||||
assert hftokenizer.bos_token_id == 101
|
||||
|
||||
|
||||
# testing eos_token_id property
|
||||
def test_eos_token_id(hftokenizer):
|
||||
assert hftokenizer.eos_token_id == 102
|
||||
|
||||
|
||||
# testing prefix_space_tokens property
|
||||
def test_prefix_space_tokens(hftokenizer):
|
||||
assert len(hftokenizer.prefix_space_tokens) > 0
|
||||
|
||||
|
||||
# testing _maybe_add_prefix_space method
|
||||
def test__maybe_add_prefix_space(hftokenizer):
|
||||
assert (
|
||||
hftokenizer._maybe_add_prefix_space(
|
||||
[101, 2003, 2010, 2050, 2001, 2339], " is why"
|
||||
)
|
||||
== " is why"
|
||||
)
|
||||
assert (
|
||||
hftokenizer._maybe_add_prefix_space(
|
||||
[2003, 2010, 2050, 2001, 2339], "is why"
|
||||
)
|
||||
== " is why"
|
||||
)
|
||||
|
||||
|
||||
# continuing tests for other methods...
|
@ -1,48 +0,0 @@
|
||||
# OpenAITokenizer
|
||||
|
||||
import pytest
|
||||
|
||||
import swarms.tokenizers.openai_tokenizers as tokenizers
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def openai_tokenizer():
|
||||
return tokenizers.OpenAITokenizer("gpt-3")
|
||||
|
||||
|
||||
def test_init(openai_tokenizer):
|
||||
assert openai_tokenizer.model == "gpt-3"
|
||||
|
||||
|
||||
def test_default_max_tokens(openai_tokenizer):
|
||||
assert openai_tokenizer.default_max_tokens() == 4096
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text, expected_output", [("Hello, world!", 3), (["Hello"], 4)]
|
||||
)
|
||||
def test_count_tokens_single(openai_tokenizer, text, expected_output):
|
||||
assert (
|
||||
openai_tokenizer.count_tokens(text, "gpt-3")
|
||||
== expected_output
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"texts, expected_output",
|
||||
[(["Hello, world!", "This is a test"], 6), (["Hello"], 4)],
|
||||
)
|
||||
def test_count_tokens_multiple(
|
||||
openai_tokenizer, texts, expected_output
|
||||
):
|
||||
assert (
|
||||
openai_tokenizer.count_tokens(texts, "gpt-3")
|
||||
== expected_output
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text, expected_output", [("Hello, world!", 3), (["Hello"], 4)]
|
||||
)
|
||||
def test_len(openai_tokenizer, text, expected_output):
|
||||
assert openai_tokenizer.len(text, "gpt-3") == expected_output
|
@ -1 +0,0 @@
|
||||
# SentencePieceTokenizer
|
@ -1,82 +0,0 @@
|
||||
# Tokenizer
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from swarms.tokenizers.r_tokenizers import Tokenizer
|
||||
|
||||
|
||||
def test_initializer_existing_model_file():
|
||||
with patch("os.path.exists", return_value=True):
|
||||
with patch(
|
||||
"swarms.tokenizers.SentencePieceTokenizer"
|
||||
) as mock_model:
|
||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
||||
mock_model.assert_called_with("tokenizers/my_model.model")
|
||||
assert tokenizer.model == mock_model.return_value
|
||||
|
||||
|
||||
def test_initializer_model_folder():
|
||||
with patch("os.path.exists", side_effect=[False, True]):
|
||||
with patch(
|
||||
"swarms.tokenizers.HuggingFaceTokenizer"
|
||||
) as mock_model:
|
||||
tokenizer = Tokenizer("my_model_directory")
|
||||
mock_model.assert_called_with("my_model_directory")
|
||||
assert tokenizer.model == mock_model.return_value
|
||||
|
||||
|
||||
def test_vocab_size():
|
||||
with patch(
|
||||
"swarms.tokenizers.SentencePieceTokenizer"
|
||||
) as mock_model:
|
||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
||||
assert (
|
||||
tokenizer.vocab_size == mock_model.return_value.vocab_size
|
||||
)
|
||||
|
||||
|
||||
def test_bos_token_id():
|
||||
with patch(
|
||||
"swarms.tokenizers.SentencePieceTokenizer"
|
||||
) as mock_model:
|
||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
||||
assert (
|
||||
tokenizer.bos_token_id
|
||||
== mock_model.return_value.bos_token_id
|
||||
)
|
||||
|
||||
|
||||
def test_encode():
|
||||
with patch(
|
||||
"swarms.tokenizers.SentencePieceTokenizer"
|
||||
) as mock_model:
|
||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
||||
assert (
|
||||
tokenizer.encode("hello")
|
||||
== mock_model.return_value.encode.return_value
|
||||
)
|
||||
|
||||
|
||||
def test_decode():
|
||||
with patch(
|
||||
"swarms.tokenizers.SentencePieceTokenizer"
|
||||
) as mock_model:
|
||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
||||
assert (
|
||||
tokenizer.decode([1, 2, 3])
|
||||
== mock_model.return_value.decode.return_value
|
||||
)
|
||||
|
||||
|
||||
def test_call():
|
||||
with patch(
|
||||
"swarms.tokenizers.SentencePieceTokenizer"
|
||||
) as mock_model:
|
||||
tokenizer = Tokenizer("tokenizers/my_model.model")
|
||||
assert (
|
||||
tokenizer("hello")
|
||||
== mock_model.return_value.__call__.return_value
|
||||
)
|
||||
|
||||
|
||||
# More tests can be added here
|
Loading…
Reference in new issue