diff --git a/swarms/utils/__init__.py b/swarms/utils/__init__.py index df9fe6ca..b57f818d 100644 --- a/swarms/utils/__init__.py +++ b/swarms/utils/__init__.py @@ -25,6 +25,11 @@ from swarms.utils.download_weights_from_url import ( from swarms.utils.save_logs import parse_log_file +######## +from swarms.utils.yaml_output_parser import YamlOutputParser +from swarms.utils.json_output_parser import JsonOutputParser + + __all__ = [ "SubprocessCodeInterpreter", "display_markdown_message", @@ -45,4 +50,6 @@ __all__ = [ "try_except_wrapper", "download_weights_from_url", "parse_log_file", + "YamlOutputParser", + "JsonOutputParser", ] diff --git a/swarms/utils/json_output_parser.py b/swarms/utils/json_output_parser.py new file mode 100644 index 00000000..a180e408 --- /dev/null +++ b/swarms/utils/json_output_parser.py @@ -0,0 +1,82 @@ +import json +import re +from typing import Type, TypeVar +from pydantic import BaseModel, ValidationError + +T = TypeVar("T", bound=BaseModel) + +class JsonParsingException(Exception): + """Custom exception for errors in JSON parsing.""" + +class JsonOutputParser: + """Parse JSON output using a Pydantic model. + + This parser is designed to extract JSON formatted data from a given string + and parse it using a specified Pydantic model for validation. + + Attributes: + pydantic_object: A Pydantic model class for parsing and validation. + pattern: A regex pattern to match JSON code blocks. + + Examples: + >>> from pydantic import BaseModel + >>> from swarms.utils.json_output_parser import JsonOutputParser + >>> class MyModel(BaseModel): + ... name: str + ... age: int + ... + >>> parser = JsonOutputParser(MyModel) + >>> text = "```json\n{\"name\": \"John\", \"age\": 42}\n```" + >>> model = parser.parse(text) + >>> model.name + + """ + + def __init__(self, pydantic_object: Type[T]): + self.pydantic_object = pydantic_object + self.pattern = re.compile(r"^```(?:json)?(?P[^`]*)", re.MULTILINE | re.DOTALL) + + def parse(self, text: str) -> T: + """Parse the provided text to extract and validate JSON data. + + Args: + text: A string containing potential JSON data. + + Returns: + An instance of the specified Pydantic model with parsed data. + + Raises: + JsonParsingException: If parsing or validation fails. + """ + try: + match = re.search(self.pattern, text.strip()) + json_str = match.group("json") if match else text + + json_object = json.loads(json_str) + return self.pydantic_object.parse_obj(json_object) + + except (json.JSONDecodeError, ValidationError) as e: + name = self.pydantic_object.__name__ + msg = f"Failed to parse {name} from text '{text}'. Error: {e}" + raise JsonParsingException(msg) from e + + def get_format_instructions(self) -> str: + """Generate formatting instructions based on the Pydantic model schema. + + Returns: + A string containing formatting instructions. + """ + schema = self.pydantic_object.schema() + reduced_schema = {k: v for k, v in schema.items() if k not in ['title', 'type']} + schema_str = json.dumps(reduced_schema, indent=4) + + format_instructions = f"JSON Formatting Instructions:\n{schema_str}" + return format_instructions + +# # Example usage +# class ExampleModel(BaseModel): +# field1: int +# field2: str + +# parser = JsonOutputParser(ExampleModel) +# # Use parser.parse(text) to parse JSON data diff --git a/swarms/utils/yaml_output_parser.py b/swarms/utils/yaml_output_parser.py new file mode 100644 index 00000000..c6749d8a --- /dev/null +++ b/swarms/utils/yaml_output_parser.py @@ -0,0 +1,76 @@ +import json +import re +import yaml +from typing import Type, TypeVar +from pydantic import BaseModel, ValidationError + +T = TypeVar("T", bound=BaseModel) + +class YamlParsingException(Exception): + """Custom exception for errors in YAML parsing.""" + +class YamlOutputParser: + """Parse YAML output using a Pydantic model. + + This parser is designed to extract YAML formatted data from a given string + and parse it using a specified Pydantic model for validation. + + Attributes: + pydantic_object: A Pydantic model class for parsing and validation. + pattern: A regex pattern to match YAML code blocks. + + + Examples: + >>> from pydantic import BaseModel + >>> from swarms.utils.yaml_output_parser import YamlOutputParser + >>> class MyModel(BaseModel): + ... name: str + ... age: int + ... + >>> parser = YamlOutputParser(MyModel) + >>> text = "```yaml\nname: John\nage: 42\n```" + >>> model = parser.parse(text) + >>> model.name + + """ + + def __init__(self, pydantic_object: Type[T]): + self.pydantic_object = pydantic_object + self.pattern = re.compile(r"^```(?:ya?ml)?(?P[^`]*)", re.MULTILINE | re.DOTALL) + + def parse(self, text: str) -> T: + """Parse the provided text to extract and validate YAML data. + + Args: + text: A string containing potential YAML data. + + Returns: + An instance of the specified Pydantic model with parsed data. + + Raises: + YamlParsingException: If parsing or validation fails. + """ + try: + match = re.search(self.pattern, text.strip()) + yaml_str = match.group("yaml") if match else text + + json_object = yaml.safe_load(yaml_str) + return self.pydantic_object.parse_obj(json_object) + + except (yaml.YAMLError, ValidationError) as e: + name = self.pydantic_object.__name__ + msg = f"Failed to parse {name} from text '{text}'. Error: {e}" + raise YamlParsingException(msg) from e + + def get_format_instructions(self) -> str: + """Generate formatting instructions based on the Pydantic model schema. + + Returns: + A string containing formatting instructions. + """ + schema = self.pydantic_object.schema() + reduced_schema = {k: v for k, v in schema.items() if k not in ['title', 'type']} + schema_str = json.dumps(reduced_schema, indent=4) + + format_instructions = f"YAML Formatting Instructions:\n{schema_str}" + return format_instructions