[math_eval]

1 year ago · b00b9a55d7
parent 654dfb7089
commit b00b9a55d7
9 changed files with 264 additions and 8 deletions
--- a/docs/swarms/utils/math_eval.md
+++ b/docs/swarms/utils/math_eval.md
@ -0,0 +1,99 @@
+# Math Evaluation Decorator Documentation
+
+## Introduction
+The Math Evaluation Decorator is a utility function that helps you compare the output of two functions, `func1` and `func2`, when given the same input. This decorator is particularly useful for validating whether a generated function produces the same results as a ground truth function. This documentation provides a detailed explanation of the Math Evaluation Decorator, its purpose, usage, and examples.
+
+## Purpose
+The Math Evaluation Decorator serves the following purposes:
+1. To compare the output of two functions, `func1` and `func2`, when given the same input.
+2. To log any errors that may occur during the evaluation.
+3. To provide a warning if the outputs of `func1` and `func2` do not match.
+
+## Decorator Definition
+```python
+def math_eval(func1, func2):
+    """Math evaluation decorator.
+
+    Args:
+        func1 (_type_): The first function to be evaluated.
+        func2 (_type_): The second function to be evaluated.
+
+    Example:
+    >>> @math_eval(ground_truth, generated_func)
+    >>> def test_func(x):
+    >>>     return x
+    >>> result1, result2 = test_func(5)
+    >>> print(f"Result from ground_truth: {result1}")
+    >>> print(f"Result from generated_func: {result2}")
+
+    """
+```
+
+### Parameters
+| Parameter | Type   | Description                                      |
+|-----------|--------|--------------------------------------------------|
+| `func1`   | _type_ | The first function to be evaluated.             |
+| `func2`   | _type_ | The second function to be evaluated.            |
+
+## Usage
+The Math Evaluation Decorator is used as a decorator for a test function that you want to evaluate. Here's how to use it:
+
+1. Define the two functions, `func1` and `func2`, that you want to compare.
+
+2. Create a test function and decorate it with `@math_eval(func1, func2)`.
+
+3. In the test function, provide the input(s) to both `func1` and `func2`.
+
+4. The decorator will compare the outputs of `func1` and `func2` when given the same input(s).
+
+5. Any errors that occur during the evaluation will be logged.
+
+6. If the outputs of `func1` and `func2` do not match, a warning will be generated.
+
+## Examples
+
+### Example 1: Comparing Two Simple Functions
+```python
+# Define the ground truth function
+def ground_truth(x):
+    return x * 2
+
+# Define the generated function
+def generated_func(x):
+    return x - 10
+
+# Create a test function and decorate it
+@math_eval(ground_truth, generated_func)
+def test_func(x):
+    return x
+
+# Evaluate the test function with an input
+result1, result2 = test_func(5)
+
+# Print the results
+print(f"Result from ground_truth: {result1}")
+print(f"Result from generated_func: {result2}")
+```
+
+In this example, the decorator compares the outputs of `ground_truth` and `generated_func` when given the input `5`. If the outputs do not match, a warning will be generated.
+
+### Example 2: Handling Errors
+If an error occurs in either `func1` or `func2`, the decorator will log the error and set the result to `None`. This ensures that the evaluation continues even if one of the functions encounters an issue.
+
+## Additional Information and Tips
+
+- The Math Evaluation Decorator is a powerful tool for comparing the outputs of functions, especially when validating machine learning models or generated code.
+
+- Ensure that the functions `func1` and `func2` take the same input(s) to ensure a meaningful comparison.
+
+- Regularly check the logs for any errors or warnings generated during the evaluation.
+
+- If the decorator logs a warning about mismatched outputs, investigate and debug the functions accordingly.
+
+## References and Resources
+
+- For more information on Python decorators, refer to the [Python Decorators Documentation](https://docs.python.org/3/glossary.html#term-decorator).
+
+- Explore advanced use cases of the Math Evaluation Decorator in your projects to ensure code correctness and reliability.
+
+This comprehensive documentation explains the Math Evaluation Decorator, its purpose, usage, and examples. Use this decorator to compare the outputs of functions and validate code effectively.
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -110,6 +110,7 @@ nav:
    - ShortTermMemory: "swarms/memory/short_term_memory.md"
  - swarms.utils:
    - phoenix_trace_decorator: "swarms/utils/phoenix_tracer.md"
+    - math_eval: "swarms/utils/math_eval.md"
 - Guides:
    - Overview: "examples/index.md"
    - Agents:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

 [tool.poetry]
 name = "swarms"
-version = "2.9.2"
+version = "2.9.3"
 description = "Swarms - Pytorch"
 license = "MIT"
 authors = ["Kye Gomez <kye@apac.ai>"]
--- a/swarms/memory/init.py
+++ b/swarms/memory/init.py
@ -1,7 +1,4 @@
 from swarms.memory.base_vectordb import VectorDatabase
 from swarms.memory.short_term_memory import ShortTermMemory

-__all__ = [
-    "VectorDatabase",
-    "ShortTermMemory"
-]
+__all__ = ["VectorDatabase", "ShortTermMemory"]
--- a/swarms/models/init.py
+++ b/swarms/models/init.py
@ -31,7 +31,8 @@ from swarms.models.layoutlm_document_qa import (
 )  # noqa: E402
 from swarms.models.gpt4_vision_api import GPT4VisionAPI  # noqa: E402
 from swarms.models.openai_tts import OpenAITTS  # noqa: E402
-from swarms.models.gemini import Gemini # noqa: E402
+from swarms.models.gemini import Gemini  # noqa: E402
+
 # from swarms.models.gpt4v import GPT4Vision
 # from swarms.models.dalle3 import Dalle3
 # from swarms.models.distilled_whisperx import DistilWhisperModel # noqa: E402
--- a/swarms/utils/init.py
+++ b/swarms/utils/init.py
@ -4,6 +4,7 @@ from swarms.utils.parse_code import (
    extract_code_in_backticks_in_string,
 )
 from swarms.utils.pdf_to_text import pdf_to_text
+from swarms.utils.math_eval import math_eval

 # from swarms.utils.phoenix_handler import phoenix_trace_decorator

@ -13,4 +14,5 @@ __all__ = [
    "extract_code_in_backticks_in_string",
    "pdf_to_text",
    # "phoenix_trace_decorator",
+    "math_eval",
 ]
--- a/swarms/utils/math_eval.py
+++ b/swarms/utils/math_eval.py
@ -0,0 +1,61 @@
+import functools
+import logging
+
+
+def math_eval(func1, func2):
+    """Math evaluation decorator.
+
+    Args:
+        func1 (_type_): _description_
+        func2 (_type_): _description_
+
+    Example:
+    >>> @math_eval(ground_truth, generated_func)
+    >>> def test_func(x):
+    >>>     return x
+    >>> result1, result2 = test_func(5)
+    >>> print(f"Result from ground_truth: {result1}")
+    >>> print(f"Result from generated_func: {result2}")
+
+    """
+
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                result1 = func1(*args, **kwargs)
+            except Exception as e:
+                logging.error(f"Error in func1: {e}")
+                result1 = None
+
+            try:
+                result2 = func2(*args, **kwargs)
+            except Exception as e:
+                logging.error(f"Error in func2: {e}")
+                result2 = None
+
+            if result1 != result2:
+                logging.warning(
+                    f"Outputs do not match: {result1} != {result2}"
+                )
+
+            return result1, result2
+
+        return wrapper
+
+    return decorator
+
+
+# def ground_truth(x):
+#     return x * 2
+
+# def generated_func(x):
+#     return x - 10
+
+# @math_eval(ground_truth, generated_func)
+# def test_func(x):
+#     return x
+
+# result1, result2 = test_func(5)
+# print(f"Result from ground_truth: {result1}")
+# print(f"Result from generated_func: {result2}")
--- a/test_math_eval.py
+++ b/test_math_eval.py
@ -0,0 +1,89 @@
+import pytest
+from swarms.utils.math_eval import math_eval
+
+
+def test_math_eval_same_output():
+    @math_eval(lambda x: x + 1, lambda x: x + 1)
+    def func(x):
+        return x
+
+    for i in range(20):
+        result1, result2 = func(i)
+        assert result1 == result2
+        assert result1 == i + 1
+
+
+def test_math_eval_different_output():
+    @math_eval(lambda x: x + 1, lambda x: x + 2)
+    def func(x):
+        return x
+
+    for i in range(20):
+        result1, result2 = func(i)
+        assert result1 != result2
+        assert result1 == i + 1
+        assert result2 == i + 2
+
+
+def test_math_eval_exception_in_func1():
+    @math_eval(lambda x: 1 / x, lambda x: x)
+    def func(x):
+        return x
+
+    with pytest.raises(ZeroDivisionError):
+        func(0)
+
+
+def test_math_eval_exception_in_func2():
+    @math_eval(lambda x: x, lambda x: 1 / x)
+    def func(x):
+        return x
+
+    with pytest.raises(ZeroDivisionError):
+        func(0)
+
+
+def test_math_eval_with_multiple_arguments():
+    @math_eval(lambda x, y: x + y, lambda x, y: y + x)
+    def func(x, y):
+        return x, y
+
+    for i in range(10):
+        for j in range(10):
+            result1, result2 = func(i, j)
+            assert result1 == result2
+            assert result1 == i + j
+
+
+def test_math_eval_with_kwargs():
+    @math_eval(lambda x, y=0: x + y, lambda x, y=0: y + x)
+    def func(x, y=0):
+        return x, y
+
+    for i in range(10):
+        for j in range(10):
+            result1, result2 = func(i, y=j)
+            assert result1 == result2
+            assert result1 == i + j
+
+
+def test_math_eval_with_no_arguments():
+    @math_eval(lambda: 1, lambda: 1)
+    def func():
+        return
+
+    result1, result2 = func()
+    assert result1 == result2
+    assert result1 == 1
+
+
+def test_math_eval_with_different_types():
+    @math_eval(lambda x: str(x), lambda x: x)
+    def func(x):
+        return x
+
+    for i in range(10):
+        result1, result2 = func(i)
+        assert result1 != result2
+        assert result1 == str(i)
+        assert result2 == i
--- a/tests/memory/test_short_term_memory.py
+++ b/tests/memory/test_short_term_memory.py
@ -2,6 +2,7 @@ import pytest
 from swarms.memory.short_term_memory import ShortTermMemory
 import threading

+
 def test_init():
    memory = ShortTermMemory()
    assert memory.short_term_memory == []
@ -108,16 +109,21 @@ def test_return_medium_memory_as_str():

 def test_thread_safety():
    memory = ShortTermMemory()
+
    def add_messages():
        for _ in range(1000):
            memory.add("user", "Hello, world!")
-    threads = [threading.Thread(target=add_messages) for _ in range(10)]
+
+    threads = [
+        threading.Thread(target=add_messages) for _ in range(10)
+    ]
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()
    assert len(memory.get_short_term()) == 10000

+
 def test_save_and_load():
    memory1 = ShortTermMemory()
    memory1.add("user", "Hello, world!")
@ -125,4 +131,4 @@ def test_save_and_load():
    memory2 = ShortTermMemory()
    memory2.load_from_file("memory.json")
    assert memory1.get_short_term() == memory2.get_short_term()
-    assert memory1.get_medium_term() == memory2.get_medium_term()
+    assert memory1.get_medium_term() == memory2.get_medium_term()