You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/tests/tokenizers/test_basetokenizer.py

47 lines
1.4 KiB

# BaseTokenizer
import pytest
11 months ago
from swarms.tokenizers.base_tokenizer import BaseTokenizer
# 1. Fixture for BaseTokenizer instance.
@pytest.fixture
def base_tokenizer():
return BaseTokenizer(max_tokens=100)
# 2. Tests for __post_init__.
def test_post_init(base_tokenizer):
assert base_tokenizer.stop_sequences == ["<|Response|>"]
assert base_tokenizer.stop_token == "<|Response|>"
# 3. Tests for count_tokens_left with different inputs.
def test_count_tokens_left_with_positive_diff(
base_tokenizer, monkeypatch
):
# Mocking count_tokens to return a specific value
monkeypatch.setattr(
"swarms.tokenizers.BaseTokenizer.count_tokens",
lambda x, y: 50,
)
assert base_tokenizer.count_tokens_left("some text") == 50
def test_count_tokens_left_with_zero_diff(
base_tokenizer, monkeypatch
):
monkeypatch.setattr(
"swarms.tokenizers.BaseTokenizer.count_tokens",
lambda x, y: 100,
)
assert base_tokenizer.count_tokens_left("some text") == 0
# 4. Add tests for count_tokens. This method is an abstract one, so testing it
# will be dependent on the actual implementation in the subclass. Here is just
# a general idea how to test it (we assume that test_count_tokens is implemented in some subclass).
def test_count_tokens(subclass_tokenizer_instance):
assert subclass_tokenizer_instance.count_tokens("some text") == 6