dynamic auto chunking

4 months ago · f43e6787fd
parent dbb085c371
commit f43e6787fd
3 changed files with 100 additions and 10 deletions
--- a/swarms/sims/bell_labs.py
+++ b/swarms/sims/bell_labs.py
@ -18,6 +18,7 @@ from swarms.structs.conversation import Conversation
 from swarms.utils.history_output_formatter import (
    history_output_formatter,
 )
+
 # from examples.tools.claude_as_a_tool import developer_worker_agent


--- a/swarms/utils/init.py
+++ b/swarms/utils/init.py
@ -1,27 +1,30 @@
+from swarms.utils.check_all_model_max_tokens import (
+    check_all_model_max_tokens,
+)
 from swarms.utils.data_to_text import (
    csv_to_text,
    data_to_text,
    json_to_text,
    txt_to_text,
 )
+from swarms.utils.dynamic_context_window import (
+    dynamic_auto_chunking,
+)
 from swarms.utils.file_processing import (
+    create_file_in_folder,
    load_json,
    sanitize_file_path,
-    zip_workspace,
-    create_file_in_folder,
    zip_folders,
+    zip_workspace,
 )
-from swarms.utils.parse_code import extract_code_from_markdown
-from swarms.utils.pdf_to_text import pdf_to_text
-from swarms.utils.try_except_wrapper import try_except_wrapper
-from swarms.utils.litellm_tokenizer import count_tokens
-from swarms.utils.output_types import HistoryOutputType
 from swarms.utils.history_output_formatter import (
    history_output_formatter,
 )
-from swarms.utils.check_all_model_max_tokens import (
-    check_all_model_max_tokens,
-)
+from swarms.utils.litellm_tokenizer import count_tokens
+from swarms.utils.output_types import HistoryOutputType
+from swarms.utils.parse_code import extract_code_from_markdown
+from swarms.utils.pdf_to_text import pdf_to_text
+from swarms.utils.try_except_wrapper import try_except_wrapper


 __all__ = [
@ -41,4 +44,5 @@ __all__ = [
    "HistoryOutputType",
    "history_output_formatter",
    "check_all_model_max_tokens",
+    "dynamic_auto_chunking",
 ]
--- a/swarms/utils/dynamic_context_window.py
+++ b/swarms/utils/dynamic_context_window.py
@ -0,0 +1,85 @@
+import traceback
+
+from loguru import logger
+
+from swarms.utils.litellm_tokenizer import count_tokens
+from typing import Optional
+
+
+def dynamic_auto_chunking_(
+    content: str,
+    context_length: Optional[int] = 8192,
+    tokenizer_model_name: Optional[str] = "gpt-4.1",
+):
+    """
+    Dynamically chunk the conversation history to fit within the context length.
+
+    Args:
+        content (str): The conversation history as a string.
+        context_length (int): The maximum number of tokens allowed.
+        tokenizer_model_name (str): The name of the tokenizer model to use.
+
+    Returns:
+        str: The chunked conversation history as a string that fits within context_length tokens.
+    """
+    total_tokens = count_tokens(
+        text=content, model=tokenizer_model_name
+    )
+
+    if total_tokens <= context_length:
+        return content
+
+    # We need to remove characters from the beginning until we're under the limit
+    # Start by removing a percentage of characters and adjust iteratively
+    target_tokens = context_length
+    current_string = content
+
+    # Binary search approach to find the right cutoff point
+    left, right = 0, len(content)
+
+    while left < right:
+        mid = (left + right) // 2
+        test_string = content[mid:]
+
+        if not test_string:
+            break
+
+        test_tokens = count_tokens(
+            text=test_string, model=tokenizer_model_name
+        )
+
+        if test_tokens <= target_tokens:
+            # We can remove more from the beginning
+            right = mid
+            current_string = test_string
+        else:
+            # We need to keep more from the beginning
+            left = mid + 1
+
+    return current_string
+
+
+def dynamic_auto_chunking(
+    content: str,
+    context_length: Optional[int] = 8192,
+    tokenizer_model_name: Optional[str] = "gpt-4.1",
+):
+    """
+    Dynamically chunk the conversation history to fit within the context length.
+
+    Args:
+        content (str): The conversation history as a string.
+        context_length (int): The maximum number of tokens allowed.
+        tokenizer_model_name (str): The name of the tokenizer model to use.
+    """
+    try:
+        return dynamic_auto_chunking_(
+            content=content,
+            context_length=context_length,
+            tokenizer_model_name=tokenizer_model_name,
+        )
+    except Exception as e:
+        logger.error(
+            f"Dynamic auto chunking failed: {e} Traceback: {traceback.format_exc()}"
+        )
+        return content