You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
20 lines
430 B
20 lines
430 B
from swarms.chunkers.base import BaseChunker
|
|
from swarms.chunkers.chunk_seperator import ChunkSeparator
|
|
|
|
|
|
class PdfChunker(BaseChunker):
|
|
DEFAULT_SEPARATORS = [
|
|
ChunkSeparator("\n\n"),
|
|
ChunkSeparator(". "),
|
|
ChunkSeparator("! "),
|
|
ChunkSeparator("? "),
|
|
ChunkSeparator(" "),
|
|
]
|
|
|
|
|
|
# # Example
|
|
# pdf = "swarmdeck.pdf"
|
|
# chunker = PdfChunker()
|
|
# chunks = chunker.chunk(pdf)
|
|
# print(chunks)
|