You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
25 lines
733 B
25 lines
733 B
from swarms.chunkers.base import BaseChunker
|
|
from swarms.chunkers.chunk_seperator import ChunkSeparator
|
|
|
|
|
|
class MarkdownChunker(BaseChunker):
|
|
DEFAULT_SEPARATORS = [
|
|
ChunkSeparator("##", is_prefix=True),
|
|
ChunkSeparator("###", is_prefix=True),
|
|
ChunkSeparator("####", is_prefix=True),
|
|
ChunkSeparator("#####", is_prefix=True),
|
|
ChunkSeparator("######", is_prefix=True),
|
|
ChunkSeparator("\n\n"),
|
|
ChunkSeparator(". "),
|
|
ChunkSeparator("! "),
|
|
ChunkSeparator("? "),
|
|
ChunkSeparator(" "),
|
|
]
|
|
|
|
|
|
# # Example using chunker to chunk a markdown file
|
|
# file = open("README.md", "r")
|
|
# text = file.read()
|
|
# chunker = MarkdownChunker()
|
|
# chunks = chunker.chunk(text)
|