""" Tests for TOON Formatter This test suite ensures the TOON formatter correctly encodes, decodes, and compresses JSON data while maintaining data integrity. Coverage Areas: - Basic encode/decode operations - Compression ratio calculations - Edge cases and error handling - Schema-aware operations - Abbreviation system """ import json import pytest from swarms.utils.toon_formatter import ( TOONFormatter, toon_encode, toon_decode, optimize_for_llm, ) class TestTOONFormatterBasic: """Test basic TOON formatter operations.""" def test_simple_encode(self): """Test encoding simple dictionary.""" formatter = TOONFormatter() data = {"user": "Alice", "age": 30} toon_str = formatter.encode(data) assert isinstance(toon_str, str) assert "usr:Alice" in toon_str or "user:Alice" in toon_str assert "age:30" in toon_str def test_simple_decode(self): """Test decoding simple TOON string.""" formatter = TOONFormatter(compact_keys=False) toon_str = "user:Alice age:30" decoded = formatter.decode(toon_str) assert decoded == {"user": "Alice", "age": 30} def test_roundtrip(self): """Test encode-decode roundtrip preserves data.""" formatter = TOONFormatter(compact_keys=False) data = { "name": "Alice", "age": 30, "email": "alice@example.com", "active": True, } toon_str = formatter.encode(data) decoded = formatter.decode(toon_str) # Normalize boolean representation if "active" in decoded and decoded["active"] in [1, "1"]: decoded["active"] = True assert decoded == data def test_null_omission(self): """Test that null values are omitted when configured.""" formatter = TOONFormatter(omit_null=True) data = {"name": "Alice", "age": None, "email": "alice@test.com"} toon_str = formatter.encode(data) # Should not contain the null age assert "age" not in toon_str assert "name" in toon_str or "nm" in toon_str def test_boolean_compression(self): """Test boolean compression to 1/0.""" formatter = TOONFormatter() data = {"active": True, "verified": False} toon_str = formatter.encode(data) assert ":1" in toon_str # True -> 1 assert ":0" in toon_str # False -> 0 class TestTOONFormatterAbbreviations: """Test key abbreviation system.""" def test_common_abbreviations(self): """Test that common keys are abbreviated.""" formatter = TOONFormatter(compact_keys=True) data = { "user": "Alice", "email": "alice@test.com", "status": "active", } toon_str = formatter.encode(data) # Check for abbreviated keys assert "usr:" in toon_str assert "eml:" in toon_str assert "sts:" in toon_str def test_reverse_abbreviations(self): """Test decoding abbreviated keys back to full names.""" formatter = TOONFormatter(compact_keys=True) toon_str = "usr:Alice eml:alice@test.com sts:active" decoded = formatter.decode(toon_str) assert "user" in decoded assert "email" in decoded assert "status" in decoded def test_no_abbreviation_mode(self): """Test that compact_keys=False preserves original keys.""" formatter = TOONFormatter(compact_keys=False) data = {"user": "Alice", "email": "alice@test.com"} toon_str = formatter.encode(data) assert "user:" in toon_str assert "email:" in toon_str assert "usr:" not in toon_str assert "eml:" not in toon_str class TestTOONFormatterCompression: """Test compression metrics and calculations.""" def test_compression_ratio(self): """Test compression ratio calculation.""" formatter = TOONFormatter(compact_keys=True, omit_null=True) data = { "username": "Alice Johnson", "email": "alice@example.com", "status": "active", "created_at": "2025-01-15", } ratio = formatter.estimate_compression_ratio(data) # Should have meaningful compression assert 0.2 <= ratio <= 0.8 assert isinstance(ratio, float) def test_compression_effectiveness(self): """Test that TOON is shorter than JSON.""" formatter = TOONFormatter() data = {"user": "Alice", "age": 30, "email": "alice@test.com"} json_str = json.dumps(data) toon_str = formatter.encode(data) assert len(toon_str) < len(json_str) class TestTOONFormatterEdgeCases: """Test edge cases and error handling.""" def test_empty_dict(self): """Test encoding empty dictionary.""" formatter = TOONFormatter() data = {} toon_str = formatter.encode(data) assert toon_str == "" def test_nested_dict(self): """Test encoding nested dictionary.""" formatter = TOONFormatter() data = { "user": {"name": "Alice", "age": 30}, "status": "active", } toon_str = formatter.encode(data) # Should contain nested structure assert "user:" in toon_str or "usr:" in toon_str assert "name:" in toon_str or "nm:" in toon_str def test_array_encoding(self): """Test encoding arrays.""" formatter = TOONFormatter() data = {"users": ["Alice", "Bob", "Charlie"]} toon_str = formatter.encode(data) assert "[" in toon_str assert "]" in toon_str assert "Alice" in toon_str def test_special_characters(self): """Test handling of special characters.""" formatter = TOONFormatter() data = {"name": "Alice:Smith", "description": "A test user"} toon_str = formatter.encode(data) # Should escape colons assert "Alice\\:Smith" in toon_str or "Alice:Smith" in toon_str def test_numeric_values(self): """Test encoding various numeric types.""" formatter = TOONFormatter() data = {"int": 42, "float": 3.14, "negative": -10} toon_str = formatter.encode(data) assert "42" in toon_str assert "3.14" in toon_str assert "-10" in toon_str def test_max_depth_handling(self): """Test max depth limit for nested structures.""" formatter = TOONFormatter(max_depth=2) # Create deeply nested structure data = {"a": {"b": {"c": {"d": "deep"}}}} # Should not raise error, may fall back to JSON toon_str = formatter.encode(data) assert isinstance(toon_str, str) class TestConvenienceFunctions: """Test convenience functions.""" def test_toon_encode_function(self): """Test toon_encode convenience function.""" data = {"user": "Alice", "age": 30} toon_str = toon_encode(data) assert isinstance(toon_str, str) assert "Alice" in toon_str def test_toon_decode_function(self): """Test toon_decode convenience function.""" toon_str = "user:Alice age:30" data = toon_decode(toon_str) assert isinstance(data, dict) assert "user" in data or "age" in data def test_optimize_for_llm_toon(self): """Test optimize_for_llm with TOON format.""" data = {"user": "Alice", "email": "alice@test.com"} optimized = optimize_for_llm(data, format="toon") assert isinstance(optimized, str) assert len(optimized) > 0 def test_optimize_for_llm_json(self): """Test optimize_for_llm with JSON format.""" data = {"user": "Alice", "age": 30} optimized = optimize_for_llm(data, format="json") assert isinstance(optimized, str) # Should be valid JSON parsed = json.loads(optimized) assert parsed == data def test_optimize_for_llm_compact(self): """Test optimize_for_llm with compact format.""" data = {"user": "Alice", "age": 30} optimized = optimize_for_llm(data, format="compact") assert isinstance(optimized, str) # Should be compact (no spaces) assert " " not in optimized or optimized.count(" ") < 5 class TestTOONFormatterIntegration: """Test integration scenarios.""" def test_large_dataset(self): """Test encoding large dataset.""" formatter = TOONFormatter() # Create large dataset data = { "users": [ { "id": i, "name": f"User{i}", "email": f"user{i}@test.com", "active": i % 2 == 0, } for i in range(100) ] } toon_str = formatter.encode(data) # Should compress significantly json_len = len(json.dumps(data)) toon_len = len(toon_str) assert toon_len < json_len def test_schema_aware_encoding(self): """Test schema-aware encoding (basic).""" formatter = TOONFormatter() schema = { "type": "object", "properties": { "id": {"type": "integer"}, "name": {"type": "string"}, }, } data = {"id": 1, "name": "Alice"} # Should not raise error with schema toon_str = formatter.encode(data, schema=schema) assert isinstance(toon_str, str) # Performance benchmarks (optional, can be run with pytest-benchmark) class TestTOONFormatterPerformance: """Performance benchmarks for TOON formatter.""" def test_encode_performance(self): """Test encoding performance.""" formatter = TOONFormatter() data = { "users": [ {"id": i, "name": f"User{i}", "active": True} for i in range(50) ] } import time start = time.time() for _ in range(10): formatter.encode(data) duration = time.time() - start # Should be reasonably fast (< 1 second for 10 iterations) assert duration < 1.0 def test_decode_performance(self): """Test decoding performance.""" formatter = TOONFormatter(compact_keys=False) toon_str = " ".join([f"id:{i} name:User{i} active:1" for i in range(50)]) import time start = time.time() for _ in range(10): formatter.decode(toon_str) duration = time.time() - start # Should be reasonably fast assert duration < 1.0 if __name__ == "__main__": pytest.main([__file__, "-v"])