You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/tests/tools/test_toon_formatter.py

373 lines
10 KiB

"""
Tests for TOON Formatter
This test suite ensures the TOON formatter correctly encodes,
decodes, and compresses JSON data while maintaining data integrity.
Coverage Areas:
- Basic encode/decode operations
- Compression ratio calculations
- Edge cases and error handling
- Schema-aware operations
- Abbreviation system
"""
import json
import pytest
from swarms.utils.toon_formatter import (
TOONFormatter,
toon_encode,
toon_decode,
optimize_for_llm,
)
class TestTOONFormatterBasic:
"""Test basic TOON formatter operations."""
def test_simple_encode(self):
"""Test encoding simple dictionary."""
formatter = TOONFormatter()
data = {"user": "Alice", "age": 30}
toon_str = formatter.encode(data)
assert isinstance(toon_str, str)
assert "usr:Alice" in toon_str or "user:Alice" in toon_str
assert "age:30" in toon_str
def test_simple_decode(self):
"""Test decoding simple TOON string."""
formatter = TOONFormatter(compact_keys=False)
toon_str = "user:Alice age:30"
decoded = formatter.decode(toon_str)
assert decoded == {"user": "Alice", "age": 30}
def test_roundtrip(self):
"""Test encode-decode roundtrip preserves data."""
formatter = TOONFormatter(compact_keys=False)
data = {
"name": "Alice",
"age": 30,
"email": "alice@example.com",
"active": True,
}
toon_str = formatter.encode(data)
decoded = formatter.decode(toon_str)
# Normalize boolean representation
if "active" in decoded and decoded["active"] in [1, "1"]:
decoded["active"] = True
assert decoded == data
def test_null_omission(self):
"""Test that null values are omitted when configured."""
formatter = TOONFormatter(omit_null=True)
data = {"name": "Alice", "age": None, "email": "alice@test.com"}
toon_str = formatter.encode(data)
# Should not contain the null age
assert "age" not in toon_str
assert "name" in toon_str or "nm" in toon_str
def test_boolean_compression(self):
"""Test boolean compression to 1/0."""
formatter = TOONFormatter()
data = {"active": True, "verified": False}
toon_str = formatter.encode(data)
assert ":1" in toon_str # True -> 1
assert ":0" in toon_str # False -> 0
class TestTOONFormatterAbbreviations:
"""Test key abbreviation system."""
def test_common_abbreviations(self):
"""Test that common keys are abbreviated."""
formatter = TOONFormatter(compact_keys=True)
data = {
"user": "Alice",
"email": "alice@test.com",
"status": "active",
}
toon_str = formatter.encode(data)
# Check for abbreviated keys
assert "usr:" in toon_str
assert "eml:" in toon_str
assert "sts:" in toon_str
def test_reverse_abbreviations(self):
"""Test decoding abbreviated keys back to full names."""
formatter = TOONFormatter(compact_keys=True)
toon_str = "usr:Alice eml:alice@test.com sts:active"
decoded = formatter.decode(toon_str)
assert "user" in decoded
assert "email" in decoded
assert "status" in decoded
def test_no_abbreviation_mode(self):
"""Test that compact_keys=False preserves original keys."""
formatter = TOONFormatter(compact_keys=False)
data = {"user": "Alice", "email": "alice@test.com"}
toon_str = formatter.encode(data)
assert "user:" in toon_str
assert "email:" in toon_str
assert "usr:" not in toon_str
assert "eml:" not in toon_str
class TestTOONFormatterCompression:
"""Test compression metrics and calculations."""
def test_compression_ratio(self):
"""Test compression ratio calculation."""
formatter = TOONFormatter(compact_keys=True, omit_null=True)
data = {
"username": "Alice Johnson",
"email": "alice@example.com",
"status": "active",
"created_at": "2025-01-15",
}
ratio = formatter.estimate_compression_ratio(data)
# Should have meaningful compression
assert 0.2 <= ratio <= 0.8
assert isinstance(ratio, float)
def test_compression_effectiveness(self):
"""Test that TOON is shorter than JSON."""
formatter = TOONFormatter()
data = {"user": "Alice", "age": 30, "email": "alice@test.com"}
json_str = json.dumps(data)
toon_str = formatter.encode(data)
assert len(toon_str) < len(json_str)
class TestTOONFormatterEdgeCases:
"""Test edge cases and error handling."""
def test_empty_dict(self):
"""Test encoding empty dictionary."""
formatter = TOONFormatter()
data = {}
toon_str = formatter.encode(data)
assert toon_str == ""
def test_nested_dict(self):
"""Test encoding nested dictionary."""
formatter = TOONFormatter()
data = {
"user": {"name": "Alice", "age": 30},
"status": "active",
}
toon_str = formatter.encode(data)
# Should contain nested structure
assert "user:" in toon_str or "usr:" in toon_str
assert "name:" in toon_str or "nm:" in toon_str
def test_array_encoding(self):
"""Test encoding arrays."""
formatter = TOONFormatter()
data = {"users": ["Alice", "Bob", "Charlie"]}
toon_str = formatter.encode(data)
assert "[" in toon_str
assert "]" in toon_str
assert "Alice" in toon_str
def test_special_characters(self):
"""Test handling of special characters."""
formatter = TOONFormatter()
data = {"name": "Alice:Smith", "description": "A test user"}
toon_str = formatter.encode(data)
# Should escape colons
assert "Alice\\:Smith" in toon_str or "Alice:Smith" in toon_str
def test_numeric_values(self):
"""Test encoding various numeric types."""
formatter = TOONFormatter()
data = {"int": 42, "float": 3.14, "negative": -10}
toon_str = formatter.encode(data)
assert "42" in toon_str
assert "3.14" in toon_str
assert "-10" in toon_str
def test_max_depth_handling(self):
"""Test max depth limit for nested structures."""
formatter = TOONFormatter(max_depth=2)
# Create deeply nested structure
data = {"a": {"b": {"c": {"d": "deep"}}}}
# Should not raise error, may fall back to JSON
toon_str = formatter.encode(data)
assert isinstance(toon_str, str)
class TestConvenienceFunctions:
"""Test convenience functions."""
def test_toon_encode_function(self):
"""Test toon_encode convenience function."""
data = {"user": "Alice", "age": 30}
toon_str = toon_encode(data)
assert isinstance(toon_str, str)
assert "Alice" in toon_str
def test_toon_decode_function(self):
"""Test toon_decode convenience function."""
toon_str = "user:Alice age:30"
data = toon_decode(toon_str)
assert isinstance(data, dict)
assert "user" in data or "age" in data
def test_optimize_for_llm_toon(self):
"""Test optimize_for_llm with TOON format."""
data = {"user": "Alice", "email": "alice@test.com"}
optimized = optimize_for_llm(data, format="toon")
assert isinstance(optimized, str)
assert len(optimized) > 0
def test_optimize_for_llm_json(self):
"""Test optimize_for_llm with JSON format."""
data = {"user": "Alice", "age": 30}
optimized = optimize_for_llm(data, format="json")
assert isinstance(optimized, str)
# Should be valid JSON
parsed = json.loads(optimized)
assert parsed == data
def test_optimize_for_llm_compact(self):
"""Test optimize_for_llm with compact format."""
data = {"user": "Alice", "age": 30}
optimized = optimize_for_llm(data, format="compact")
assert isinstance(optimized, str)
# Should be compact (no spaces)
assert " " not in optimized or optimized.count(" ") < 5
class TestTOONFormatterIntegration:
"""Test integration scenarios."""
def test_large_dataset(self):
"""Test encoding large dataset."""
formatter = TOONFormatter()
# Create large dataset
data = {
"users": [
{
"id": i,
"name": f"User{i}",
"email": f"user{i}@test.com",
"active": i % 2 == 0,
}
for i in range(100)
]
}
toon_str = formatter.encode(data)
# Should compress significantly
json_len = len(json.dumps(data))
toon_len = len(toon_str)
assert toon_len < json_len
def test_schema_aware_encoding(self):
"""Test schema-aware encoding (basic)."""
formatter = TOONFormatter()
schema = {
"type": "object",
"properties": {
"id": {"type": "integer"},
"name": {"type": "string"},
},
}
data = {"id": 1, "name": "Alice"}
# Should not raise error with schema
toon_str = formatter.encode(data, schema=schema)
assert isinstance(toon_str, str)
# Performance benchmarks (optional, can be run with pytest-benchmark)
class TestTOONFormatterPerformance:
"""Performance benchmarks for TOON formatter."""
def test_encode_performance(self):
"""Test encoding performance."""
formatter = TOONFormatter()
data = {
"users": [
{"id": i, "name": f"User{i}", "active": True}
for i in range(50)
]
}
import time
start = time.time()
for _ in range(10):
formatter.encode(data)
duration = time.time() - start
# Should be reasonably fast (< 1 second for 10 iterations)
assert duration < 1.0
def test_decode_performance(self):
"""Test decoding performance."""
formatter = TOONFormatter(compact_keys=False)
toon_str = " ".join([f"id:{i} name:User{i} active:1" for i in range(50)])
import time
start = time.time()
for _ in range(10):
formatter.decode(toon_str)
duration = time.time() - start
# Should be reasonably fast
assert duration < 1.0
if __name__ == "__main__":
pytest.main([__file__, "-v"])