diff --git a/docs/swarms/utils/math_eval.md b/docs/swarms/utils/math_eval.md new file mode 100644 index 00000000..21fd62cc --- /dev/null +++ b/docs/swarms/utils/math_eval.md @@ -0,0 +1,99 @@ +# Math Evaluation Decorator Documentation + +## Introduction +The Math Evaluation Decorator is a utility function that helps you compare the output of two functions, `func1` and `func2`, when given the same input. This decorator is particularly useful for validating whether a generated function produces the same results as a ground truth function. This documentation provides a detailed explanation of the Math Evaluation Decorator, its purpose, usage, and examples. + +## Purpose +The Math Evaluation Decorator serves the following purposes: +1. To compare the output of two functions, `func1` and `func2`, when given the same input. +2. To log any errors that may occur during the evaluation. +3. To provide a warning if the outputs of `func1` and `func2` do not match. + +## Decorator Definition +```python +def math_eval(func1, func2): + """Math evaluation decorator. + + Args: + func1 (_type_): The first function to be evaluated. + func2 (_type_): The second function to be evaluated. + + Example: + >>> @math_eval(ground_truth, generated_func) + >>> def test_func(x): + >>> return x + >>> result1, result2 = test_func(5) + >>> print(f"Result from ground_truth: {result1}") + >>> print(f"Result from generated_func: {result2}") + + """ +``` + +### Parameters +| Parameter | Type | Description | +|-----------|--------|--------------------------------------------------| +| `func1` | _type_ | The first function to be evaluated. | +| `func2` | _type_ | The second function to be evaluated. | + +## Usage +The Math Evaluation Decorator is used as a decorator for a test function that you want to evaluate. Here's how to use it: + +1. Define the two functions, `func1` and `func2`, that you want to compare. + +2. Create a test function and decorate it with `@math_eval(func1, func2)`. + +3. In the test function, provide the input(s) to both `func1` and `func2`. + +4. The decorator will compare the outputs of `func1` and `func2` when given the same input(s). + +5. Any errors that occur during the evaluation will be logged. + +6. If the outputs of `func1` and `func2` do not match, a warning will be generated. + +## Examples + +### Example 1: Comparing Two Simple Functions +```python +# Define the ground truth function +def ground_truth(x): + return x * 2 + +# Define the generated function +def generated_func(x): + return x - 10 + +# Create a test function and decorate it +@math_eval(ground_truth, generated_func) +def test_func(x): + return x + +# Evaluate the test function with an input +result1, result2 = test_func(5) + +# Print the results +print(f"Result from ground_truth: {result1}") +print(f"Result from generated_func: {result2}") +``` + +In this example, the decorator compares the outputs of `ground_truth` and `generated_func` when given the input `5`. If the outputs do not match, a warning will be generated. + +### Example 2: Handling Errors +If an error occurs in either `func1` or `func2`, the decorator will log the error and set the result to `None`. This ensures that the evaluation continues even if one of the functions encounters an issue. + +## Additional Information and Tips + +- The Math Evaluation Decorator is a powerful tool for comparing the outputs of functions, especially when validating machine learning models or generated code. + +- Ensure that the functions `func1` and `func2` take the same input(s) to ensure a meaningful comparison. + +- Regularly check the logs for any errors or warnings generated during the evaluation. + +- If the decorator logs a warning about mismatched outputs, investigate and debug the functions accordingly. + +## References and Resources + +- For more information on Python decorators, refer to the [Python Decorators Documentation](https://docs.python.org/3/glossary.html#term-decorator). + +- Explore advanced use cases of the Math Evaluation Decorator in your projects to ensure code correctness and reliability. + +This comprehensive documentation explains the Math Evaluation Decorator, its purpose, usage, and examples. Use this decorator to compare the outputs of functions and validate code effectively. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index b0e34ca7..eeb64c04 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -110,6 +110,7 @@ nav: - ShortTermMemory: "swarms/memory/short_term_memory.md" - swarms.utils: - phoenix_trace_decorator: "swarms/utils/phoenix_tracer.md" + - math_eval: "swarms/utils/math_eval.md" - Guides: - Overview: "examples/index.md" - Agents: diff --git a/pyproject.toml b/pyproject.toml index 7022250b..aa9962eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "2.9.2" +version = "2.9.3" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] diff --git a/swarms/memory/__init__.py b/swarms/memory/__init__.py index 086bd1bd..4f92880a 100644 --- a/swarms/memory/__init__.py +++ b/swarms/memory/__init__.py @@ -1,7 +1,4 @@ from swarms.memory.base_vectordb import VectorDatabase from swarms.memory.short_term_memory import ShortTermMemory -__all__ = [ - "VectorDatabase", - "ShortTermMemory" -] +__all__ = ["VectorDatabase", "ShortTermMemory"] diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py index 173c9487..288d4d95 100644 --- a/swarms/models/__init__.py +++ b/swarms/models/__init__.py @@ -31,7 +31,8 @@ from swarms.models.layoutlm_document_qa import ( ) # noqa: E402 from swarms.models.gpt4_vision_api import GPT4VisionAPI # noqa: E402 from swarms.models.openai_tts import OpenAITTS # noqa: E402 -from swarms.models.gemini import Gemini # noqa: E402 +from swarms.models.gemini import Gemini # noqa: E402 + # from swarms.models.gpt4v import GPT4Vision # from swarms.models.dalle3 import Dalle3 # from swarms.models.distilled_whisperx import DistilWhisperModel # noqa: E402 diff --git a/swarms/utils/__init__.py b/swarms/utils/__init__.py index 7c3ef717..9ddbd324 100644 --- a/swarms/utils/__init__.py +++ b/swarms/utils/__init__.py @@ -4,6 +4,7 @@ from swarms.utils.parse_code import ( extract_code_in_backticks_in_string, ) from swarms.utils.pdf_to_text import pdf_to_text +from swarms.utils.math_eval import math_eval # from swarms.utils.phoenix_handler import phoenix_trace_decorator @@ -13,4 +14,5 @@ __all__ = [ "extract_code_in_backticks_in_string", "pdf_to_text", # "phoenix_trace_decorator", + "math_eval", ] diff --git a/swarms/utils/math_eval.py b/swarms/utils/math_eval.py new file mode 100644 index 00000000..6dbff2b5 --- /dev/null +++ b/swarms/utils/math_eval.py @@ -0,0 +1,61 @@ +import functools +import logging + + +def math_eval(func1, func2): + """Math evaluation decorator. + + Args: + func1 (_type_): _description_ + func2 (_type_): _description_ + + Example: + >>> @math_eval(ground_truth, generated_func) + >>> def test_func(x): + >>> return x + >>> result1, result2 = test_func(5) + >>> print(f"Result from ground_truth: {result1}") + >>> print(f"Result from generated_func: {result2}") + + """ + + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + result1 = func1(*args, **kwargs) + except Exception as e: + logging.error(f"Error in func1: {e}") + result1 = None + + try: + result2 = func2(*args, **kwargs) + except Exception as e: + logging.error(f"Error in func2: {e}") + result2 = None + + if result1 != result2: + logging.warning( + f"Outputs do not match: {result1} != {result2}" + ) + + return result1, result2 + + return wrapper + + return decorator + + +# def ground_truth(x): +# return x * 2 + +# def generated_func(x): +# return x - 10 + +# @math_eval(ground_truth, generated_func) +# def test_func(x): +# return x + +# result1, result2 = test_func(5) +# print(f"Result from ground_truth: {result1}") +# print(f"Result from generated_func: {result2}") diff --git a/test_math_eval.py b/test_math_eval.py new file mode 100644 index 00000000..91013ae3 --- /dev/null +++ b/test_math_eval.py @@ -0,0 +1,89 @@ +import pytest +from swarms.utils.math_eval import math_eval + + +def test_math_eval_same_output(): + @math_eval(lambda x: x + 1, lambda x: x + 1) + def func(x): + return x + + for i in range(20): + result1, result2 = func(i) + assert result1 == result2 + assert result1 == i + 1 + + +def test_math_eval_different_output(): + @math_eval(lambda x: x + 1, lambda x: x + 2) + def func(x): + return x + + for i in range(20): + result1, result2 = func(i) + assert result1 != result2 + assert result1 == i + 1 + assert result2 == i + 2 + + +def test_math_eval_exception_in_func1(): + @math_eval(lambda x: 1 / x, lambda x: x) + def func(x): + return x + + with pytest.raises(ZeroDivisionError): + func(0) + + +def test_math_eval_exception_in_func2(): + @math_eval(lambda x: x, lambda x: 1 / x) + def func(x): + return x + + with pytest.raises(ZeroDivisionError): + func(0) + + +def test_math_eval_with_multiple_arguments(): + @math_eval(lambda x, y: x + y, lambda x, y: y + x) + def func(x, y): + return x, y + + for i in range(10): + for j in range(10): + result1, result2 = func(i, j) + assert result1 == result2 + assert result1 == i + j + + +def test_math_eval_with_kwargs(): + @math_eval(lambda x, y=0: x + y, lambda x, y=0: y + x) + def func(x, y=0): + return x, y + + for i in range(10): + for j in range(10): + result1, result2 = func(i, y=j) + assert result1 == result2 + assert result1 == i + j + + +def test_math_eval_with_no_arguments(): + @math_eval(lambda: 1, lambda: 1) + def func(): + return + + result1, result2 = func() + assert result1 == result2 + assert result1 == 1 + + +def test_math_eval_with_different_types(): + @math_eval(lambda x: str(x), lambda x: x) + def func(x): + return x + + for i in range(10): + result1, result2 = func(i) + assert result1 != result2 + assert result1 == str(i) + assert result2 == i diff --git a/tests/memory/test_short_term_memory.py b/tests/memory/test_short_term_memory.py index 32d5d008..903c3a0e 100644 --- a/tests/memory/test_short_term_memory.py +++ b/tests/memory/test_short_term_memory.py @@ -2,6 +2,7 @@ import pytest from swarms.memory.short_term_memory import ShortTermMemory import threading + def test_init(): memory = ShortTermMemory() assert memory.short_term_memory == [] @@ -108,16 +109,21 @@ def test_return_medium_memory_as_str(): def test_thread_safety(): memory = ShortTermMemory() + def add_messages(): for _ in range(1000): memory.add("user", "Hello, world!") - threads = [threading.Thread(target=add_messages) for _ in range(10)] + + threads = [ + threading.Thread(target=add_messages) for _ in range(10) + ] for thread in threads: thread.start() for thread in threads: thread.join() assert len(memory.get_short_term()) == 10000 + def test_save_and_load(): memory1 = ShortTermMemory() memory1.add("user", "Hello, world!") @@ -125,4 +131,4 @@ def test_save_and_load(): memory2 = ShortTermMemory() memory2.load_from_file("memory.json") assert memory1.get_short_term() == memory2.get_short_term() - assert memory1.get_medium_term() == memory2.get_medium_term() \ No newline at end of file + assert memory1.get_medium_term() == memory2.get_medium_term()