parent
cdf68c9467
commit
970da21846
@ -0,0 +1,82 @@
|
||||
import json
|
||||
import re
|
||||
from typing import Type, TypeVar
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
T = TypeVar("T", bound=BaseModel)
|
||||
|
||||
class JsonParsingException(Exception):
|
||||
"""Custom exception for errors in JSON parsing."""
|
||||
|
||||
class JsonOutputParser:
|
||||
"""Parse JSON output using a Pydantic model.
|
||||
|
||||
This parser is designed to extract JSON formatted data from a given string
|
||||
and parse it using a specified Pydantic model for validation.
|
||||
|
||||
Attributes:
|
||||
pydantic_object: A Pydantic model class for parsing and validation.
|
||||
pattern: A regex pattern to match JSON code blocks.
|
||||
|
||||
Examples:
|
||||
>>> from pydantic import BaseModel
|
||||
>>> from swarms.utils.json_output_parser import JsonOutputParser
|
||||
>>> class MyModel(BaseModel):
|
||||
... name: str
|
||||
... age: int
|
||||
...
|
||||
>>> parser = JsonOutputParser(MyModel)
|
||||
>>> text = "```json\n{\"name\": \"John\", \"age\": 42}\n```"
|
||||
>>> model = parser.parse(text)
|
||||
>>> model.name
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, pydantic_object: Type[T]):
|
||||
self.pydantic_object = pydantic_object
|
||||
self.pattern = re.compile(r"^```(?:json)?(?P<json>[^`]*)", re.MULTILINE | re.DOTALL)
|
||||
|
||||
def parse(self, text: str) -> T:
|
||||
"""Parse the provided text to extract and validate JSON data.
|
||||
|
||||
Args:
|
||||
text: A string containing potential JSON data.
|
||||
|
||||
Returns:
|
||||
An instance of the specified Pydantic model with parsed data.
|
||||
|
||||
Raises:
|
||||
JsonParsingException: If parsing or validation fails.
|
||||
"""
|
||||
try:
|
||||
match = re.search(self.pattern, text.strip())
|
||||
json_str = match.group("json") if match else text
|
||||
|
||||
json_object = json.loads(json_str)
|
||||
return self.pydantic_object.parse_obj(json_object)
|
||||
|
||||
except (json.JSONDecodeError, ValidationError) as e:
|
||||
name = self.pydantic_object.__name__
|
||||
msg = f"Failed to parse {name} from text '{text}'. Error: {e}"
|
||||
raise JsonParsingException(msg) from e
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
"""Generate formatting instructions based on the Pydantic model schema.
|
||||
|
||||
Returns:
|
||||
A string containing formatting instructions.
|
||||
"""
|
||||
schema = self.pydantic_object.schema()
|
||||
reduced_schema = {k: v for k, v in schema.items() if k not in ['title', 'type']}
|
||||
schema_str = json.dumps(reduced_schema, indent=4)
|
||||
|
||||
format_instructions = f"JSON Formatting Instructions:\n{schema_str}"
|
||||
return format_instructions
|
||||
|
||||
# # Example usage
|
||||
# class ExampleModel(BaseModel):
|
||||
# field1: int
|
||||
# field2: str
|
||||
|
||||
# parser = JsonOutputParser(ExampleModel)
|
||||
# # Use parser.parse(text) to parse JSON data
|
@ -0,0 +1,76 @@
|
||||
import json
|
||||
import re
|
||||
import yaml
|
||||
from typing import Type, TypeVar
|
||||
from pydantic import BaseModel, ValidationError
|
||||
|
||||
T = TypeVar("T", bound=BaseModel)
|
||||
|
||||
class YamlParsingException(Exception):
|
||||
"""Custom exception for errors in YAML parsing."""
|
||||
|
||||
class YamlOutputParser:
|
||||
"""Parse YAML output using a Pydantic model.
|
||||
|
||||
This parser is designed to extract YAML formatted data from a given string
|
||||
and parse it using a specified Pydantic model for validation.
|
||||
|
||||
Attributes:
|
||||
pydantic_object: A Pydantic model class for parsing and validation.
|
||||
pattern: A regex pattern to match YAML code blocks.
|
||||
|
||||
|
||||
Examples:
|
||||
>>> from pydantic import BaseModel
|
||||
>>> from swarms.utils.yaml_output_parser import YamlOutputParser
|
||||
>>> class MyModel(BaseModel):
|
||||
... name: str
|
||||
... age: int
|
||||
...
|
||||
>>> parser = YamlOutputParser(MyModel)
|
||||
>>> text = "```yaml\nname: John\nage: 42\n```"
|
||||
>>> model = parser.parse(text)
|
||||
>>> model.name
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, pydantic_object: Type[T]):
|
||||
self.pydantic_object = pydantic_object
|
||||
self.pattern = re.compile(r"^```(?:ya?ml)?(?P<yaml>[^`]*)", re.MULTILINE | re.DOTALL)
|
||||
|
||||
def parse(self, text: str) -> T:
|
||||
"""Parse the provided text to extract and validate YAML data.
|
||||
|
||||
Args:
|
||||
text: A string containing potential YAML data.
|
||||
|
||||
Returns:
|
||||
An instance of the specified Pydantic model with parsed data.
|
||||
|
||||
Raises:
|
||||
YamlParsingException: If parsing or validation fails.
|
||||
"""
|
||||
try:
|
||||
match = re.search(self.pattern, text.strip())
|
||||
yaml_str = match.group("yaml") if match else text
|
||||
|
||||
json_object = yaml.safe_load(yaml_str)
|
||||
return self.pydantic_object.parse_obj(json_object)
|
||||
|
||||
except (yaml.YAMLError, ValidationError) as e:
|
||||
name = self.pydantic_object.__name__
|
||||
msg = f"Failed to parse {name} from text '{text}'. Error: {e}"
|
||||
raise YamlParsingException(msg) from e
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
"""Generate formatting instructions based on the Pydantic model schema.
|
||||
|
||||
Returns:
|
||||
A string containing formatting instructions.
|
||||
"""
|
||||
schema = self.pydantic_object.schema()
|
||||
reduced_schema = {k: v for k, v in schema.items() if k not in ['title', 'type']}
|
||||
schema_str = json.dumps(reduced_schema, indent=4)
|
||||
|
||||
format_instructions = f"YAML Formatting Instructions:\n{schema_str}"
|
||||
return format_instructions
|
Loading…
Reference in new issue