You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/swarms/chunkers/markdown.py

25 lines
733 B

from swarms.chunkers.base import BaseChunker
from swarms.chunkers.chunk_seperator import ChunkSeparator
class MarkdownChunker(BaseChunker):
DEFAULT_SEPARATORS = [
ChunkSeparator("##", is_prefix=True),
ChunkSeparator("###", is_prefix=True),
ChunkSeparator("####", is_prefix=True),
ChunkSeparator("#####", is_prefix=True),
ChunkSeparator("######", is_prefix=True),
ChunkSeparator("\n\n"),
ChunkSeparator(". "),
ChunkSeparator("! "),
ChunkSeparator("? "),
ChunkSeparator(" "),
]
# # Example using chunker to chunk a markdown file
# file = open("README.md", "r")
# text = file.read()
# chunker = MarkdownChunker()
# chunks = chunker.chunk(text)