[math_eval]

2 years ago · b00b9a55d7
parent 654dfb7089
commit b00b9a55d7
9 changed files with 264 additions and 8 deletions
--- a/docs/swarms/utils/math_eval.md
+++ b/docs/swarms/utils/math_eval.md
@ -0,0 +1,99 @@
 # Math Evaluation Decorator Documentation
 ## Introduction
 The Math Evaluation Decorator is a utility function that helps you compare the output of two functions, `func1` and `func2`, when given the same input. This decorator is particularly useful for validating whether a generated function produces the same results as a ground truth function. This documentation provides a detailed explanation of the Math Evaluation Decorator, its purpose, usage, and examples.
 ## Purpose
 The Math Evaluation Decorator serves the following purposes:
 1. To compare the output of two functions, `func1` and `func2`, when given the same input.
 2. To log any errors that may occur during the evaluation.
 3. To provide a warning if the outputs of `func1` and `func2` do not match.
 ## Decorator Definition
 ```python
 def math_eval(func1, func2):
    """Math evaluation decorator.
    Args:
        func1 (_type_): The first function to be evaluated.
        func2 (_type_): The second function to be evaluated.
    Example:
    >>> @math_eval(ground_truth, generated_func)
    >>> def test_func(x):
    >>>     return x
    >>> result1, result2 = test_func(5)
    >>> print(f"Result from ground_truth: {result1}")
    >>> print(f"Result from generated_func: {result2}")
    """
 ```
 ### Parameters
 | Parameter | Type   | Description                                      |
 |-----------|--------|--------------------------------------------------|
 | `func1`   | _type_ | The first function to be evaluated.             |
 | `func2`   | _type_ | The second function to be evaluated.            |
 ## Usage
 The Math Evaluation Decorator is used as a decorator for a test function that you want to evaluate. Here's how to use it:
 1. Define the two functions, `func1` and `func2`, that you want to compare.
 2. Create a test function and decorate it with `@math_eval(func1, func2)`.
 3. In the test function, provide the input(s) to both `func1` and `func2`.
 4. The decorator will compare the outputs of `func1` and `func2` when given the same input(s).
 5. Any errors that occur during the evaluation will be logged.
 6. If the outputs of `func1` and `func2` do not match, a warning will be generated.
 ## Examples
 ### Example 1: Comparing Two Simple Functions
 ```python
 # Define the ground truth function
 def ground_truth(x):
    return x * 2
 # Define the generated function
 def generated_func(x):
    return x - 10
 # Create a test function and decorate it
@math_eval(ground_truth, generated_func)
 def test_func(x):
    return x
 # Evaluate the test function with an input
 result1, result2 = test_func(5)
 # Print the results
 print(f"Result from ground_truth: {result1}")
 print(f"Result from generated_func: {result2}")
 ```
 In this example, the decorator compares the outputs of `ground_truth` and `generated_func` when given the input `5`. If the outputs do not match, a warning will be generated.
 ### Example 2: Handling Errors
 If an error occurs in either `func1` or `func2`, the decorator will log the error and set the result to `None`. This ensures that the evaluation continues even if one of the functions encounters an issue.
 ## Additional Information and Tips
 - The Math Evaluation Decorator is a powerful tool for comparing the outputs of functions, especially when validating machine learning models or generated code.
 - Ensure that the functions `func1` and `func2` take the same input(s) to ensure a meaningful comparison.
 - Regularly check the logs for any errors or warnings generated during the evaluation.
 - If the decorator logs a warning about mismatched outputs, investigate and debug the functions accordingly.
 ## References and Resources
 - For more information on Python decorators, refer to the [Python Decorators Documentation](https://docs.python.org/3/glossary.html#term-decorator).
 - Explore advanced use cases of the Math Evaluation Decorator in your projects to ensure code correctness and reliability.
 This comprehensive documentation explains the Math Evaluation Decorator, its purpose, usage, and examples. Use this decorator to compare the outputs of functions and validate code effectively.
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -110,6 +110,7 @@ nav:
    - ShortTermMemory: "swarms/memory/short_term_memory.md"
  - swarms.utils:
    - phoenix_trace_decorator: "swarms/utils/phoenix_tracer.md"
    - math_eval: "swarms/utils/math_eval.md"
 - Guides:
    - Overview: "examples/index.md"
    - Agents:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "swarms"
-version = "2.9.2"
+version = "2.9.3"
 description = "Swarms - Pytorch"
 license = "MIT"
 authors = ["Kye Gomez <kye@apac.ai>"]
--- a/swarms/memory/init.py
+++ b/swarms/memory/init.py
@ -1,7 +1,4 @@
 from swarms.memory.base_vectordb import VectorDatabase
 from swarms.memory.short_term_memory import ShortTermMemory
-__all__ = [
+__all__ = ["VectorDatabase", "ShortTermMemory"]
    "VectorDatabase",
    "ShortTermMemory"
 ]
--- a/swarms/models/init.py
+++ b/swarms/models/init.py
@ -32,6 +32,7 @@ from swarms.models.layoutlm_document_qa import (
 from swarms.models.gpt4_vision_api import GPT4VisionAPI  # noqa: E402
 from swarms.models.openai_tts import OpenAITTS  # noqa: E402
 from swarms.models.gemini import Gemini  # noqa: E402
 # from swarms.models.gpt4v import GPT4Vision
 # from swarms.models.dalle3 import Dalle3
 # from swarms.models.distilled_whisperx import DistilWhisperModel # noqa: E402
--- a/swarms/utils/init.py
+++ b/swarms/utils/init.py
@ -4,6 +4,7 @@ from swarms.utils.parse_code import (
    extract_code_in_backticks_in_string,
 )
 from swarms.utils.pdf_to_text import pdf_to_text
 from swarms.utils.math_eval import math_eval
 # from swarms.utils.phoenix_handler import phoenix_trace_decorator
@ -13,4 +14,5 @@ __all__ = [
    "extract_code_in_backticks_in_string",
    "pdf_to_text",
    # "phoenix_trace_decorator",
    "math_eval",
 ]
--- a/swarms/utils/math_eval.py
+++ b/swarms/utils/math_eval.py
@ -0,0 +1,61 @@
 import functools
 import logging
 def math_eval(func1, func2):
    """Math evaluation decorator.
    Args:
        func1 (_type_): _description_
        func2 (_type_): _description_
    Example:
    >>> @math_eval(ground_truth, generated_func)
    >>> def test_func(x):
    >>>     return x
    >>> result1, result2 = test_func(5)
    >>> print(f"Result from ground_truth: {result1}")
    >>> print(f"Result from generated_func: {result2}")
    """
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            try:
                result1 = func1(*args, **kwargs)
            except Exception as e:
                logging.error(f"Error in func1: {e}")
                result1 = None
            try:
                result2 = func2(*args, **kwargs)
            except Exception as e:
                logging.error(f"Error in func2: {e}")
                result2 = None
            if result1 != result2:
                logging.warning(
                    f"Outputs do not match: {result1} != {result2}"
                )
            return result1, result2
        return wrapper
    return decorator
 # def ground_truth(x):
 #     return x * 2
 # def generated_func(x):
 #     return x - 10
 # @math_eval(ground_truth, generated_func)
 # def test_func(x):
 #     return x
 # result1, result2 = test_func(5)
 # print(f"Result from ground_truth: {result1}")
 # print(f"Result from generated_func: {result2}")
--- a/test_math_eval.py
+++ b/test_math_eval.py
@ -0,0 +1,89 @@
 import pytest
 from swarms.utils.math_eval import math_eval
 def test_math_eval_same_output():
    @math_eval(lambda x: x + 1, lambda x: x + 1)
    def func(x):
        return x
    for i in range(20):
        result1, result2 = func(i)
        assert result1 == result2
        assert result1 == i + 1
 def test_math_eval_different_output():
    @math_eval(lambda x: x + 1, lambda x: x + 2)
    def func(x):
        return x
    for i in range(20):
        result1, result2 = func(i)
        assert result1 != result2
        assert result1 == i + 1
        assert result2 == i + 2
 def test_math_eval_exception_in_func1():
    @math_eval(lambda x: 1 / x, lambda x: x)
    def func(x):
        return x
    with pytest.raises(ZeroDivisionError):
        func(0)
 def test_math_eval_exception_in_func2():
    @math_eval(lambda x: x, lambda x: 1 / x)
    def func(x):
        return x
    with pytest.raises(ZeroDivisionError):
        func(0)
 def test_math_eval_with_multiple_arguments():
    @math_eval(lambda x, y: x + y, lambda x, y: y + x)
    def func(x, y):
        return x, y
    for i in range(10):
        for j in range(10):
            result1, result2 = func(i, j)
            assert result1 == result2
            assert result1 == i + j
 def test_math_eval_with_kwargs():
    @math_eval(lambda x, y=0: x + y, lambda x, y=0: y + x)
    def func(x, y=0):
        return x, y
    for i in range(10):
        for j in range(10):
            result1, result2 = func(i, y=j)
            assert result1 == result2
            assert result1 == i + j
 def test_math_eval_with_no_arguments():
    @math_eval(lambda: 1, lambda: 1)
    def func():
        return
    result1, result2 = func()
    assert result1 == result2
    assert result1 == 1
 def test_math_eval_with_different_types():
    @math_eval(lambda x: str(x), lambda x: x)
    def func(x):
        return x
    for i in range(10):
        result1, result2 = func(i)
        assert result1 != result2
        assert result1 == str(i)
        assert result2 == i
--- a/tests/memory/test_short_term_memory.py
+++ b/tests/memory/test_short_term_memory.py
@ -2,6 +2,7 @@ import pytest
 from swarms.memory.short_term_memory import ShortTermMemory
 import threading
 def test_init():
    memory = ShortTermMemory()
    assert memory.short_term_memory == []
@ -108,16 +109,21 @@ def test_return_medium_memory_as_str():
 def test_thread_safety():
    memory = ShortTermMemory()
    def add_messages():
        for _ in range(1000):
            memory.add("user", "Hello, world!")
-    threads = [threading.Thread(target=add_messages) for _ in range(10)]
+
    threads = [
        threading.Thread(target=add_messages) for _ in range(10)
    ]
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()
    assert len(memory.get_short_term()) == 10000
 def test_save_and_load():
    memory1 = ShortTermMemory()
    memory1.add("user", "Hello, world!")