From 75049e82a38c59034b638d8347d0518098158469 Mon Sep 17 00:00:00 2001
From: Kye Gomez <kye@swarms.world>
Date: Tue, 7 Oct 2025 22:04:09 -0700
Subject: [PATCH] [TEST SUITE][requirements.txt] [and readme]

---
 tests/README.md                  | 230 +++++++++++++
 tests/requirements.txt           |   4 +
 tests/structs/test_results.md    | 172 ----------
 tests/utils/test_conversation.py | 560 -------------------------------
 4 files changed, 234 insertions(+), 732 deletions(-)
 create mode 100644 tests/README.md
 create mode 100644 tests/requirements.txt
 delete mode 100644 tests/structs/test_results.md
 delete mode 100644 tests/utils/test_conversation.py

diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 00000000..1b1dcddd
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,230 @@
+# Swarms Test Suite
+
+This directory contains comprehensive tests for the Swarms framework, covering all major components including agents, workflows, tools, utilities, and more.
+
+## 📁 Directory Structure
+
+### Core Test Files
+- **`test_comprehensive_test.py`** - Main comprehensive test suite that runs all major Swarms components
+- **`test___init__.py`** - Package initialization tests
+- **`requirements.txt`** - Test dependencies (swarms, pytest, matplotlib, loguru)
+
+### Test Categories
+
+#### 🤖 Agent Tests (`/agent/`)
+Tests for individual agent functionality and behavior:
+
+**`/agents/`** - Core agent functionality
+- `test_agent_logging.py` - Agent logging and monitoring capabilities
+- `test_create_agents_from_yaml.py` - YAML-based agent creation
+- `test_litellm_args_kwargs.py` - LiteLLM argument handling
+- `test_llm_args.py` - LLM argument processing
+- `test_llm_handling_args.py` - LLM argument management
+- `test_tool_agent.py` - Tool-enabled agent functionality
+
+**`/benchmark_agent/`** - Agent performance and benchmarking
+- `test_agent_benchmark_init.py` - Agent benchmark initialization
+- `test_agent_exec_benchmark.py` - Agent execution benchmarking
+- `test_auto_test_eval.py` - Automated test evaluation
+- `test_github_summarizer_agent.py` - GitHub summarization agent
+- `test_profiling_agent.py` - Agent performance profiling
+
+#### 🏗️ Structure Tests (`/structs/`)
+Tests for Swarms structural components and workflows:
+
+- `test_agent.py` - Core Agent class functionality
+- `test_agent_features.py` - Agent feature testing
+- `test_agent_rearrange.py` - Agent rearrangement capabilities
+- `test_agentrearrange.py` - Alternative agent rearrangement tests
+- `test_airflow_swarm.py` - Airflow integration
+- `test_auto_swarm_builder_fix.py` - Auto swarm builder fixes
+- `test_auto_swarms_builder.py` - Automated swarm construction
+- `test_base_workflow.py` - Base workflow functionality
+- `test_base.py` - Base class implementations
+- `test_board_of_directors_swarm.py` - Board of directors swarm pattern
+- `test_concurrent_workflow.py` - Concurrent workflow execution
+- `test_conversation.py` - Conversation management
+- `test_forest_swarm.py` - Forest swarm architecture
+- `test_graph_workflow_comprehensive.py` - Graph-based workflows
+- `test_groupchat.py` - Group chat functionality
+- `test_majority_voting.py` - Majority voting mechanisms
+- `test_moa.py` - Mixture of Agents (MoA) testing
+- `test_multi_agent_collab.py` - Multi-agent collaboration
+- `test_multi_agent_orchestrator.py` - Multi-agent orchestration
+- `test_reasoning_agent_router_all.py` - Reasoning agent routing
+- `test_recursive_workflow.py` - Recursive workflow patterns
+- `test_round_robin_swarm.py` - Round-robin swarm scheduling
+- `test_sequential_workflow.py` - Sequential workflow execution
+- `test_spreadsheet.py` - Spreadsheet swarm functionality
+- `test_swarm_architectures.py` - Various swarm architectures
+- `test_yaml_model.py` - YAML model configuration
+
+#### 🔧 Tools Tests (`/tools/`)
+Tests for tool integration and functionality:
+
+- `test_base_tool.py` - Base tool class functionality
+- `test_output_str_fix.py` - Output string formatting fixes
+- `test_parse_tools.py` - Tool parsing and execution
+- `test_support_mcp.py` - MCP (Model Context Protocol) support
+
+#### 🛠️ Utilities Tests (`/utils/`)
+Tests for utility functions and helpers:
+
+- `test_acompletions.py` - Async completion handling
+- `test_auto_check_download.py` - Automatic download checking
+- `test_display_markdown_message.py` - Markdown message display
+- `test_docstring_parser.py` - Docstring parsing utilities
+- `test_extract_code_from_markdown.py` - Code extraction from markdown
+- `test_formatter.py` - Text formatting utilities
+- `test_litellm_wrapper.py` - LiteLLM wrapper functionality
+- `test_math_eval.py` - Mathematical expression evaluation
+- `test_md_output.py` - Markdown output handling
+- `test_metrics_decorator.py` - Metrics collection decorators
+- `test_pdf_to_text.py` - PDF to text conversion
+- `test_try_except_wrapper.py` - Error handling wrappers
+
+#### 🎨 Artifacts Tests (`/artifacts/`)
+Tests for artifact management and versioning:
+
+- `test_artifact_main.py` - Core artifact functionality
+- `test_artifact_output_types.py` - Artifact output type handling
+
+#### 💬 Communication Tests (`/communication/`)
+Tests for communication and conversation management:
+
+- `test_conversation.py` - Conversation handling and persistence
+
+#### 📊 AOP (Aspect-Oriented Programming) Tests (`/aop/`)
+Advanced testing with benchmarking and performance analysis:
+
+- `aop_benchmark.py` - Comprehensive AOP benchmarking suite
+- `test_data/` - Benchmark data and results
+  - `aop_benchmark_data/` - Benchmark results and visualizations
+  - `image1.jpg`, `image2.png` - Test images
+
+#### 📈 Telemetry Tests (`/telemetry/`)
+Tests for telemetry and monitoring:
+
+- `test_user_utils.py` - User utility telemetry
+
+## 🚀 Running Tests
+
+### Prerequisites
+Install test dependencies:
+```bash
+pip install -r requirements.txt
+```
+
+### Running All Tests
+```bash
+pytest
+```
+
+### Running Specific Test Categories
+```bash
+# Run agent tests
+pytest agent/
+
+# Run structure tests
+pytest structs/
+
+# Run utility tests
+pytest utils/
+
+# Run tool tests
+pytest tools/
+```
+
+### Running Individual Test Files
+```bash
+# Run comprehensive test suite
+pytest test_comprehensive_test.py
+
+# Run specific test file
+pytest structs/test_agent.py
+```
+
+### Running with Coverage
+```bash
+pytest --cov=swarms --cov-report=html
+```
+
+## 📋 Test Features
+
+### Comprehensive Testing
+- **Agent Functionality**: Complete testing of agent creation, execution, and management
+- **Workflow Testing**: Various workflow patterns including sequential, concurrent, and recursive
+- **Tool Integration**: Testing of tool parsing, execution, and MCP support
+- **Performance Benchmarking**: AOP benchmarking with multiple LLM providers
+- **Error Handling**: Comprehensive error handling and recovery testing
+
+### Test Data
+- Benchmark results with CSV and Excel exports
+- Performance visualizations (PNG charts)
+- Test images for multimodal testing
+- Conversation cache files for persistence testing
+
+### Supported LLM Providers
+The AOP benchmark tests support multiple LLM providers:
+- OpenAI (GPT-4o, GPT-4o-mini, GPT-4-turbo)
+- Anthropic (Claude 3.5 Sonnet, Claude 3 Haiku, Claude 3 Sonnet)
+- Google (Gemini 1.5 Pro, Gemini 1.5 Flash)
+- Meta (Llama 3.1 8B, Llama 3.1 70B)
+
+## 🔧 Configuration
+
+### Environment Variables
+Tests require the following environment variables:
+- `OPENAI_API_KEY` - OpenAI API key for testing
+- Additional API keys for other providers (optional)
+
+### Test Configuration
+- Maximum agents: 20 (configurable in AOP benchmark)
+- Requests per test: 20
+- Concurrent requests: 5
+- Timeout settings: Configurable per test type
+
+## 📊 Benchmarking
+
+The AOP benchmark suite provides:
+- Performance metrics across multiple LLM providers
+- Memory usage tracking
+- Response time analysis
+- Throughput measurements
+- Visual performance reports
+
+## 🐛 Debugging
+
+### Verbose Output
+```bash
+pytest -v
+```
+
+### Debug Mode
+```bash
+pytest --pdb
+```
+
+### Logging
+Tests use Loguru for comprehensive logging. Check console output for detailed test execution logs.
+
+## 📝 Contributing
+
+When adding new tests:
+1. Follow the existing directory structure
+2. Use descriptive test names
+3. Include proper docstrings
+4. Add appropriate fixtures and mocks
+5. Update this README if adding new test categories
+
+## 🔍 Test Coverage
+
+The test suite aims for comprehensive coverage of:
+- ✅ Agent creation and execution
+- ✅ Workflow patterns and orchestration
+- ✅ Tool integration and execution
+- ✅ Utility functions and helpers
+- ✅ Error handling and edge cases
+- ✅ Performance and benchmarking
+- ✅ Communication and conversation management
+- ✅ Artifact management and versioning
diff --git a/tests/requirements.txt b/tests/requirements.txt
new file mode 100644
index 00000000..16e1cafe
--- /dev/null
+++ b/tests/requirements.txt
@@ -0,0 +1,4 @@
+swarms
+pytest
+matplotlib
+loguru
\ No newline at end of file
diff --git a/tests/structs/test_results.md b/tests/structs/test_results.md
deleted file mode 100644
index c4a06189..00000000
--- a/tests/structs/test_results.md
+++ /dev/null
@@ -1,172 +0,0 @@
-# Test Results Report
-
-Test Run Date: 2024-03-21 00:00:00
-
-## Summary
-
-- Total Tests: 31
-- Passed: 31
-- Failed: 0
-- Errors: 0
-
-## Detailed Results
-
-| Test Name | Result | Duration (s) | Error |
-|-----------|---------|--------------|-------|
-| test_add_message | PASS | 0.0010 | |
-| test_add_message_with_time | PASS | 0.0008 | |
-| test_delete_message | PASS | 0.0007 | |
-| test_delete_message_out_of_bounds | PASS | 0.0006 | |
-| test_update_message | PASS | 0.0009 | |
-| test_update_message_out_of_bounds | PASS | 0.0006 | |
-| test_return_history_as_string | PASS | 0.0012 | |
-| test_search | PASS | 0.0011 | |
-| test_conversation_cache_creation | PASS | 0.0150 | |
-| test_conversation_cache_loading | PASS | 0.0180 | |
-| test_add_multiple_messages | PASS | 0.0009 | |
-| test_query | PASS | 0.0007 | |
-| test_display_conversation | PASS | 0.0008 | |
-| test_count_messages_by_role | PASS | 0.0010 | |
-| test_get_str | PASS | 0.0007 | |
-| test_to_json | PASS | 0.0008 | |
-| test_to_dict | PASS | 0.0006 | |
-| test_to_yaml | PASS | 0.0007 | |
-| test_get_last_message_as_string | PASS | 0.0008 | |
-| test_return_messages_as_list | PASS | 0.0009 | |
-| test_return_messages_as_dictionary | PASS | 0.0007 | |
-| test_add_tool_output_to_agent | PASS | 0.0008 | |
-| test_get_final_message | PASS | 0.0007 | |
-| test_get_final_message_content | PASS | 0.0006 | |
-| test_return_all_except_first | PASS | 0.0009 | |
-| test_return_all_except_first_string | PASS | 0.0008 | |
-| test_batch_add | PASS | 0.0010 | |
-| test_get_cache_stats | PASS | 0.0012 | |
-| test_list_cached_conversations | PASS | 0.0150 | |
-| test_clear | PASS | 0.0007 | |
-| test_save_and_load_json | PASS | 0.0160 | |
-
-## Test Details
-
-### test_add_message
-- Verifies that messages can be added to the conversation
-- Checks message role and content are stored correctly
-
-### test_add_message_with_time
-- Verifies timestamp functionality when adding messages
-- Ensures timestamp is present in message metadata
-
-### test_delete_message
-- Verifies messages can be deleted from conversation
-- Checks conversation length after deletion
-
-### test_delete_message_out_of_bounds
-- Verifies proper error handling for invalid deletion index
-- Ensures IndexError is raised for out of bounds access
-
-### test_update_message
-- Verifies messages can be updated in the conversation
-- Checks that role and content are updated correctly
-
-### test_update_message_out_of_bounds
-- Verifies proper error handling for invalid update index
-- Ensures IndexError is raised for out of bounds access
-
-### test_return_history_as_string
-- Verifies conversation history string formatting
-- Checks that messages are properly formatted with roles
-
-### test_search
-- Verifies search functionality in conversation history
-- Checks that search returns correct matching messages
-
-### test_conversation_cache_creation
-- Verifies conversation cache file creation
-- Ensures cache file is created in correct location
-
-### test_conversation_cache_loading
-- Verifies loading conversation from cache
-- Ensures conversation state is properly restored
-
-### test_add_multiple_messages
-- Verifies multiple messages can be added at once
-- Checks that all messages are added with correct roles and content
-
-### test_query
-- Verifies querying specific messages by index
-- Ensures correct message content and role are returned
-
-### test_display_conversation
-- Verifies conversation display functionality
-- Checks that messages are properly formatted for display
-
-### test_count_messages_by_role
-- Verifies message counting by role
-- Ensures accurate counts for each role type
-
-### test_get_str
-- Verifies string representation of conversation
-- Checks proper formatting of conversation as string
-
-### test_to_json
-- Verifies JSON serialization of conversation
-- Ensures proper JSON formatting and content preservation
-
-### test_to_dict
-- Verifies dictionary representation of conversation
-- Checks proper structure of conversation dictionary
-
-### test_to_yaml
-- Verifies YAML serialization of conversation
-- Ensures proper YAML formatting and content preservation
-
-### test_get_last_message_as_string
-- Verifies retrieval of last message as string
-- Checks proper formatting of last message
-
-### test_return_messages_as_list
-- Verifies list representation of messages
-- Ensures proper formatting of messages in list
-
-### test_return_messages_as_dictionary
-- Verifies dictionary representation of messages
-- Checks proper structure of message dictionaries
-
-### test_add_tool_output_to_agent
-- Verifies adding tool output to conversation
-- Ensures proper handling of tool output data
-
-### test_get_final_message
-- Verifies retrieval of final message
-- Checks proper formatting of final message
-
-### test_get_final_message_content
-- Verifies retrieval of final message content
-- Ensures only content is returned without role
-
-### test_return_all_except_first
-- Verifies retrieval of all messages except first
-- Checks proper exclusion of first message
-
-### test_return_all_except_first_string
-- Verifies string representation without first message
-- Ensures proper formatting of remaining messages
-
-### test_batch_add
-- Verifies batch addition of messages
-- Checks proper handling of multiple messages at once
-
-### test_get_cache_stats
-- Verifies cache statistics retrieval
-- Ensures all cache metrics are present
-
-### test_list_cached_conversations
-- Verifies listing of cached conversations
-- Checks proper retrieval of conversation names
-
-### test_clear
-- Verifies conversation clearing functionality
-- Ensures all messages are removed
-
-### test_save_and_load_json
-- Verifies saving and loading conversation to/from JSON
-- Ensures conversation state is preserved across save/load 
\ No newline at end of file
diff --git a/tests/utils/test_conversation.py b/tests/utils/test_conversation.py
deleted file mode 100644
index a52d40fa..00000000
--- a/tests/utils/test_conversation.py
+++ /dev/null
@@ -1,560 +0,0 @@
-import os
-from loguru import logger
-from swarms.structs.conversation import Conversation
-
-
-def assert_equal(actual, expected, message=""):
-    """Custom assertion function for equality"""
-    if actual != expected:
-        logger.error(
-            f"Assertion failed: {message}\nExpected: {expected}\nActual: {actual}"
-        )
-        raise AssertionError(
-            f"{message}\nExpected: {expected}\nActual: {actual}"
-        )
-    logger.success(f"Assertion passed: {message}")
-
-
-def assert_true(condition, message=""):
-    """Custom assertion function for boolean conditions"""
-    if not condition:
-        logger.error(f"Assertion failed: {message}")
-        raise AssertionError(message)
-    logger.success(f"Assertion passed: {message}")
-
-
-def test_conversation_initialization():
-    """Test conversation initialization with different parameters"""
-    logger.info("Testing conversation initialization")
-
-    # Test default initialization
-    conv = Conversation()
-    assert_true(
-        isinstance(conv, Conversation),
-        "Should create Conversation instance",
-    )
-    assert_equal(
-        conv.provider,
-        "in-memory",
-        "Default provider should be in-memory",
-    )
-
-    # Test with custom parameters
-    conv = Conversation(
-        name="test-conv",
-        system_prompt="Test system prompt",
-        time_enabled=True,
-        token_count=True,
-    )
-    assert_equal(
-        conv.name, "test-conv", "Name should be set correctly"
-    )
-    assert_equal(
-        conv.system_prompt,
-        "Test system prompt",
-        "System prompt should be set",
-    )
-    assert_true(conv.time_enabled, "Time should be enabled")
-    assert_true(conv.token_count, "Token count should be enabled")
-
-
-def test_add_message():
-    """Test adding messages to conversation"""
-    logger.info("Testing add message functionality")
-
-    conv = Conversation(time_enabled=True, token_count=True)
-
-    # Test adding text message
-    conv.add("user", "Hello, world!")
-    assert_equal(
-        len(conv.conversation_history), 1, "Should have one message"
-    )
-    assert_equal(
-        conv.conversation_history[0]["role"],
-        "user",
-        "Role should be user",
-    )
-    assert_equal(
-        conv.conversation_history[0]["content"],
-        "Hello, world!",
-        "Content should match",
-    )
-
-    # Test adding dict message
-    dict_msg = {"key": "value"}
-    conv.add("assistant", dict_msg)
-    assert_equal(
-        len(conv.conversation_history), 2, "Should have two messages"
-    )
-    assert_equal(
-        conv.conversation_history[1]["role"],
-        "assistant",
-        "Role should be assistant",
-    )
-    assert_equal(
-        conv.conversation_history[1]["content"],
-        dict_msg,
-        "Content should match dict",
-    )
-
-
-def test_delete_message():
-    """Test deleting messages from conversation"""
-    logger.info("Testing delete message functionality")
-
-    conv = Conversation()
-    conv.add("user", "Message 1")
-    conv.add("user", "Message 2")
-
-    initial_length = len(conv.conversation_history)
-    conv.delete("0")  # Delete first message
-
-    assert_equal(
-        len(conv.conversation_history),
-        initial_length - 1,
-        "Conversation history should be shorter by one",
-    )
-    assert_equal(
-        conv.conversation_history[0]["content"],
-        "Message 2",
-        "Remaining message should be Message 2",
-    )
-
-
-def test_update_message():
-    """Test updating messages in conversation"""
-    logger.info("Testing update message functionality")
-
-    conv = Conversation()
-    conv.add("user", "Original message")
-
-    conv.update("0", "user", "Updated message")
-    assert_equal(
-        conv.conversation_history[0]["content"],
-        "Updated message",
-        "Message should be updated",
-    )
-
-
-def test_search_messages():
-    """Test searching messages in conversation"""
-    logger.info("Testing search functionality")
-
-    conv = Conversation()
-    conv.add("user", "Hello world")
-    conv.add("assistant", "Hello user")
-    conv.add("user", "Goodbye world")
-
-    results = conv.search("Hello")
-    assert_equal(
-        len(results), 2, "Should find two messages with 'Hello'"
-    )
-
-    results = conv.search("Goodbye")
-    assert_equal(
-        len(results), 1, "Should find one message with 'Goodbye'"
-    )
-
-
-def test_export_import():
-    """Test exporting and importing conversation"""
-    logger.info("Testing export/import functionality")
-
-    conv = Conversation(name="export-test")
-    conv.add("user", "Test message")
-
-    # Test JSON export/import
-    test_file = "test_conversation_export.json"
-    conv.export_conversation(test_file)
-
-    assert_true(os.path.exists(test_file), "Export file should exist")
-
-    new_conv = Conversation(name="import-test")
-    new_conv.import_conversation(test_file)
-
-    assert_equal(
-        len(new_conv.conversation_history),
-        len(conv.conversation_history),
-        "Imported conversation should have same number of messages",
-    )
-
-    # Cleanup
-    os.remove(test_file)
-
-
-def test_message_counting():
-    """Test message counting functionality"""
-    logger.info("Testing message counting functionality")
-
-    conv = Conversation()
-    conv.add("user", "User message")
-    conv.add("assistant", "Assistant message")
-    conv.add("system", "System message")
-
-    counts = conv.count_messages_by_role()
-    assert_equal(counts["user"], 1, "Should have one user message")
-    assert_equal(
-        counts["assistant"], 1, "Should have one assistant message"
-    )
-    assert_equal(
-        counts["system"], 1, "Should have one system message"
-    )
-
-
-def test_conversation_string_representation():
-    """Test string representation methods"""
-    logger.info("Testing string representation methods")
-
-    conv = Conversation()
-    conv.add("user", "Test message")
-
-    str_repr = conv.return_history_as_string()
-    assert_true(
-        "user: Test message" in str_repr,
-        "String representation should contain message",
-    )
-
-    json_repr = conv.to_json()
-    assert_true(
-        isinstance(json_repr, str),
-        "JSON representation should be string",
-    )
-    assert_true(
-        "Test message" in json_repr,
-        "JSON should contain message content",
-    )
-
-
-def test_memory_management():
-    """Test memory management functions"""
-    logger.info("Testing memory management functions")
-
-    conv = Conversation()
-    conv.add("user", "Message 1")
-    conv.add("assistant", "Message 2")
-
-    # Test clear
-    conv.clear()
-    assert_equal(
-        len(conv.conversation_history),
-        0,
-        "History should be empty after clear",
-    )
-
-    # Test truncate
-    conv = Conversation(context_length=100, token_count=True)
-    long_message = (
-        "This is a very long message that should be truncated " * 10
-    )
-    conv.add("user", long_message)
-    conv.truncate_memory_with_tokenizer()
-    assert_true(
-        len(conv.conversation_history[0]["content"])
-        < len(long_message),
-        "Message should be truncated",
-    )
-
-
-def test_backend_initialization():
-    """Test different backend initializations"""
-    logger.info("Testing backend initialization")
-
-    # Test Redis backend
-    conv = Conversation(
-        backend="redis",
-        redis_host="localhost",
-        redis_port=6379,
-        redis_db=0,
-        use_embedded_redis=True,
-    )
-    assert_equal(conv.backend, "redis", "Backend should be redis")
-
-    # Test SQLite backend
-    conv = Conversation(
-        backend="sqlite",
-        db_path=":memory:",
-        table_name="test_conversations",
-    )
-    assert_equal(conv.backend, "sqlite", "Backend should be sqlite")
-
-    # Test DuckDB backend
-    conv = Conversation(
-        backend="duckdb",
-        db_path=":memory:",
-        table_name="test_conversations",
-    )
-    assert_equal(conv.backend, "duckdb", "Backend should be duckdb")
-
-
-def test_conversation_with_system_prompt():
-    """Test conversation with system prompt and rules"""
-    logger.info("Testing conversation with system prompt and rules")
-
-    conv = Conversation(
-        system_prompt="You are a helpful assistant",
-        rules="Be concise and clear",
-        custom_rules_prompt="Follow these guidelines",
-        time_enabled=True,
-    )
-
-    history = conv.conversation_history
-    assert_equal(
-        len(history),
-        3,
-        "Should have system prompt, rules, and custom rules",
-    )
-    assert_equal(
-        history[0]["content"],
-        "You are a helpful assistant",
-        "System prompt should match",
-    )
-    assert_equal(
-        history[1]["content"],
-        "Be concise and clear",
-        "Rules should match",
-    )
-    assert_true(
-        "timestamp" in history[0], "Messages should have timestamps"
-    )
-
-
-def test_batch_operations():
-    """Test batch operations on conversation"""
-    logger.info("Testing batch operations")
-
-    conv = Conversation()
-
-    # Test batch add
-    roles = ["user", "assistant", "user"]
-    contents = ["Hello", "Hi there", "How are you?"]
-    conv.add_multiple_messages(roles, contents)
-
-    assert_equal(
-        len(conv.conversation_history),
-        3,
-        "Should have three messages",
-    )
-
-    # Test batch search
-    results = conv.search("Hi")
-    assert_equal(len(results), 1, "Should find one message with 'Hi'")
-
-
-def test_conversation_export_formats():
-    """Test different export formats"""
-    logger.info("Testing export formats")
-
-    conv = Conversation(name="export-test")
-    conv.add("user", "Test message")
-
-    # Test YAML export
-    conv.export_method = "yaml"
-    conv.save_filepath = "test_conversation.yaml"
-    conv.export()
-    assert_true(
-        os.path.exists("test_conversation.yaml"),
-        "YAML file should exist",
-    )
-
-    # Test JSON export
-    conv.export_method = "json"
-    conv.save_filepath = "test_conversation.json"
-    conv.export()
-    assert_true(
-        os.path.exists("test_conversation.json"),
-        "JSON file should exist",
-    )
-
-    # Cleanup
-    os.remove("test_conversation.yaml")
-    os.remove("test_conversation.json")
-
-
-def test_conversation_with_token_counting():
-    """Test conversation with token counting enabled"""
-    logger.info("Testing token counting functionality")
-
-    conv = Conversation(
-        token_count=True,
-        tokenizer_model_name="gpt-4.1",
-        context_length=1000,
-    )
-
-    conv.add("user", "This is a test message")
-    assert_true(
-        "token_count" in conv.conversation_history[0],
-        "Message should have token count",
-    )
-
-    # Test token counting with different message types
-    conv.add(
-        "assistant", {"response": "This is a structured response"}
-    )
-    assert_true(
-        "token_count" in conv.conversation_history[1],
-        "Structured message should have token count",
-    )
-
-
-def test_conversation_message_categories():
-    """Test conversation with message categories"""
-    logger.info("Testing message categories")
-
-    conv = Conversation()
-
-    # Add messages with categories
-    conv.add("user", "Input message", category="input")
-    conv.add("assistant", "Output message", category="output")
-
-    # Test category counting
-    token_counts = conv.export_and_count_categories()
-    assert_true(
-        "input_tokens" in token_counts,
-        "Should have input token count",
-    )
-    assert_true(
-        "output_tokens" in token_counts,
-        "Should have output token count",
-    )
-    assert_true(
-        "total_tokens" in token_counts,
-        "Should have total token count",
-    )
-
-
-def test_conversation_persistence():
-    """Test conversation persistence and loading"""
-    logger.info("Testing conversation persistence")
-
-    # Create and save conversation
-    conv1 = Conversation(
-        name="persistence-test",
-        system_prompt="Test prompt",
-        time_enabled=True,
-        autosave=True,
-    )
-    conv1.add("user", "Test message")
-    conv1.export()
-
-    # Load conversation
-    conv2 = Conversation.load_conversation(name="persistence-test")
-    assert_equal(
-        conv2.system_prompt,
-        "Test prompt",
-        "System prompt should persist",
-    )
-    assert_equal(
-        len(conv2.conversation_history),
-        2,
-        "Should have system prompt and message",
-    )
-
-
-def test_conversation_utilities():
-    """Test various utility methods"""
-    logger.info("Testing utility methods")
-
-    conv = Conversation(message_id_on=True)
-    conv.add("user", "First message")
-    conv.add("assistant", "Second message")
-
-    # Test getting last message
-    last_msg = conv.get_last_message_as_string()
-    assert_true(
-        "Second message" in last_msg,
-        "Should get correct last message",
-    )
-
-    # Test getting messages as list
-    msg_list = conv.return_messages_as_list()
-    assert_equal(len(msg_list), 2, "Should have two messages in list")
-
-    # Test getting messages as dictionary
-    msg_dict = conv.return_messages_as_dictionary()
-    assert_equal(
-        len(msg_dict), 2, "Should have two messages in dictionary"
-    )
-
-    # Test message IDs
-    assert_true(
-        "message_id" in conv.conversation_history[0],
-        "Messages should have IDs when enabled",
-    )
-
-
-def test_conversation_error_handling():
-    """Test error handling in conversation methods"""
-    logger.info("Testing error handling")
-
-    conv = Conversation()
-
-    # Test invalid export method
-    try:
-        conv.export_method = "invalid"
-        conv.export()
-        assert_true(
-            False, "Should raise ValueError for invalid export method"
-        )
-    except ValueError:
-        assert_true(
-            True, "Should catch ValueError for invalid export method"
-        )
-
-    # Test invalid backend
-    try:
-        Conversation(backend="invalid_backend")
-        assert_true(
-            False, "Should raise ValueError for invalid backend"
-        )
-    except ValueError:
-        assert_true(
-            True, "Should catch ValueError for invalid backend"
-        )
-
-
-def run_all_tests():
-    """Run all test functions"""
-    logger.info("Starting all tests")
-
-    test_functions = [
-        test_conversation_initialization,
-        test_add_message,
-        test_delete_message,
-        test_update_message,
-        test_search_messages,
-        test_export_import,
-        test_message_counting,
-        test_conversation_string_representation,
-        test_memory_management,
-        test_backend_initialization,
-        test_conversation_with_system_prompt,
-        test_batch_operations,
-        test_conversation_export_formats,
-        test_conversation_with_token_counting,
-        test_conversation_message_categories,
-        test_conversation_persistence,
-        test_conversation_utilities,
-        test_conversation_error_handling,
-    ]
-
-    passed = 0
-    failed = 0
-
-    for test_func in test_functions:
-        try:
-            logger.info(f"Running {test_func.__name__}")
-            test_func()
-            passed += 1
-            logger.success(f"{test_func.__name__} passed")
-        except Exception as e:
-            failed += 1
-            logger.error(f"{test_func.__name__} failed: {str(e)}")
-
-    logger.info(f"Test summary: {passed} passed, {failed} failed")
-    return passed, failed
-
-
-if __name__ == "__main__":
-    passed, failed = run_all_tests()
-    if failed > 0:
-        exit(1)