You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/tests/tokenizers/test_huggingfacetokenizer.py

67 lines
1.7 KiB

# HuggingFaceTokenizer
import pytest
import os
from unittest.mock import patch
from swarms.tokenizers.r_tokenizers import HuggingFaceTokenizer
# Test class setup
@pytest.fixture
def hftokenizer():
dir_path = os.path.join(os.getcwd(), "modeldir")
tokenizer = HuggingFaceTokenizer(dir_path)
return tokenizer
# testing __init__
@patch("os.path")
@patch("swarms.tokenizers.get_logger")
def test___init__(mock_get_logger, mock_path, hftokenizer):
mock_path.exists.return_value = False
mock_path.join.return_value = "dummy_path"
mock_get_logger.return_value = "dummy_logger"
assert hftokenizer.model_dir == "dummy_path"
assert hftokenizer.logger == "dummy_logger"
assert hftokenizer._maybe_decode_bytes is False
assert hftokenizer._prefix_space_tokens is None
# testing vocab_size property
def test_vocab_size(hftokenizer):
assert hftokenizer.vocab_size == 30522
# testing bos_token_id property
def test_bos_token_id(hftokenizer):
assert hftokenizer.bos_token_id == 101
# testing eos_token_id property
def test_eos_token_id(hftokenizer):
assert hftokenizer.eos_token_id == 102
# testing prefix_space_tokens property
def test_prefix_space_tokens(hftokenizer):
assert len(hftokenizer.prefix_space_tokens) > 0
# testing _maybe_add_prefix_space method
def test__maybe_add_prefix_space(hftokenizer):
assert (
hftokenizer._maybe_add_prefix_space(
[101, 2003, 2010, 2050, 2001, 2339], " is why"
)
== " is why"
)
assert (
hftokenizer._maybe_add_prefix_space(
[2003, 2010, 2050, 2001, 2339], "is why"
)
== " is why"
)
# continuing tests for other methods...