You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/swarms/chunkers/pdf.py

13 lines
321 B

from swarms.chunkers.base import BaseChunker
from swarms.chunkers.chunk_seperator import ChunkSeparator
class PdfChunker(BaseChunker):
DEFAULT_SEPARATORS = [
ChunkSeparator("\n\n"),
ChunkSeparator(". "),
ChunkSeparator("! "),
ChunkSeparator("? "),
ChunkSeparator(" "),
]