From 932a3e7a472d79b1cb704489e06a73dd534e31cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=A5=A5=E5=AE=87?= <625024108@qq.com> Date: Wed, 30 Jul 2025 15:13:14 +0800 Subject: [PATCH 1/5] fix bugs truncate_memory_with_tokenizer --- swarms/structs/conversation.py | 119 +++++++++++++++++++++++++++++---- 1 file changed, 105 insertions(+), 14 deletions(-) diff --git a/swarms/structs/conversation.py b/swarms/structs/conversation.py index 7c8d3109..3e47cae1 100644 --- a/swarms/structs/conversation.py +++ b/swarms/structs/conversation.py @@ -184,6 +184,7 @@ class Conversation: system_prompt: Optional[str] = None, time_enabled: bool = False, autosave: bool = False, # Changed default to False + save_enabled: bool = False, # New parameter to control if saving is enabled save_filepath: str = None, load_filepath: str = None, # New parameter to specify which file to load from context_length: int = 8192, @@ -222,6 +223,7 @@ class Conversation: self.system_prompt = system_prompt self.time_enabled = time_enabled self.autosave = autosave + self.save_enabled = save_enabled self.conversations_dir = conversations_dir self.tokenizer_model_name = tokenizer_model_name self.message_id_on = message_id_on @@ -1019,6 +1021,13 @@ class Conversation: ) return + # Don't save if saving is disabled (你的PR代码) + if not self.save_enabled: + logger.warning( + "An attempt to save the conversation failed: save_enabled is False." + "Please set save_enabled=True when creating a Conversation object to enable saving." + ) + return # Get the full data including metadata and conversation history data = self.get_init_params() @@ -1267,39 +1276,121 @@ class Conversation: def truncate_memory_with_tokenizer(self): """ - Truncates the conversation history based on the total number of tokens using a tokenizer. - + Truncate conversation history based on the total token count using tokenizer. + + This version is more generic, not dependent on a specific LLM model, and can work with any model that provides a counter. + Uses count_tokens function to calculate and truncate by message, ensuring the result is still valid content. + Returns: None """ + from swarms.utils.litellm_tokenizer import count_tokens + total_tokens = 0 truncated_history = [] - + for message in self.conversation_history: role = message.get("role") content = message.get("content") - tokens = count_tokens(content, self.tokenizer_model_name) - count = tokens # Assign the token count - total_tokens += count - - if total_tokens <= self.context_length: + + # Convert content to string if it's not already a string + if not isinstance(content, str): + content = str(content) + + # Calculate token count for this message + token_count = count_tokens(content, self.tokenizer_model_name) + + # Check if adding this message would exceed the limit + if total_tokens + token_count <= self.context_length: + # If not exceeding limit, add the full message truncated_history.append(message) + total_tokens += token_count else: - remaining_tokens = self.context_length - ( - total_tokens - count + # Calculate remaining tokens we can include + remaining_tokens = self.context_length - total_tokens + + # If no token space left, break the loop + if remaining_tokens <= 0: + break + + # If we have space left, we need to truncate this message + # Use binary search to find content length that fits remaining token space + truncated_content = self._binary_search_truncate( + content, + remaining_tokens, + self.tokenizer_model_name ) - truncated_content = content[ - :remaining_tokens - ] # Truncate the content based on the remaining tokens + + # Create the truncated message truncated_message = { "role": role, "content": truncated_content, } + + # Add any other fields from the original message + for key, value in message.items(): + if key not in ["role", "content"]: + truncated_message[key] = value + truncated_history.append(truncated_message) break - + + # Update conversation history self.conversation_history = truncated_history + def _binary_search_truncate(self, text, target_tokens, model_name): + """ + Use binary search to find the maximum text substring that fits the target token count. + + Parameters: + text (str): Original text to truncate + target_tokens (int): Target token count + model_name (str): Model name for token counting + + Returns: + str: Truncated text with token count not exceeding target_tokens + """ + from swarms.utils.litellm_tokenizer import count_tokens + + # If text is empty or target tokens is 0, return empty string + if not text or target_tokens <= 0: + return "" + + # If original text token count is already less than or equal to target, return as is + original_tokens = count_tokens(text, model_name) + if original_tokens <= target_tokens: + return text + + # Binary search + left, right = 0, len(text) + best_length = 0 + best_text = "" + + while left <= right: + mid = (left + right) // 2 + truncated = text[:mid] + tokens = count_tokens(truncated, model_name) + + if tokens <= target_tokens: + # If current truncated text token count is less than or equal to target, try longer text + best_length = mid + best_text = truncated + left = mid + 1 + else: + # Otherwise try shorter text + right = mid - 1 + + # Try to truncate at sentence boundaries if possible + sentence_delimiters = ['.', '!', '?', '\n'] + for delimiter in sentence_delimiters: + last_pos = best_text.rfind(delimiter) + if last_pos > len(best_text) * 0.75: # Only truncate at sentence boundary if we don't lose too much content + truncated_at_sentence = best_text[:last_pos+1] + if count_tokens(truncated_at_sentence, model_name) <= target_tokens: + return truncated_at_sentence + + return best_text + def clear(self): """Clear the conversation history.""" if self.backend_instance: From 6cdf3d84c8968d08af41796597d4658428826dc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=A5=A5=E5=AE=87?= <625024108@qq.com> Date: Thu, 31 Jul 2025 10:07:18 +0800 Subject: [PATCH 2/5] update --- examples/utils/misc/conversation_test.py | 103 +++++++++++++++++++---- 1 file changed, 86 insertions(+), 17 deletions(-) diff --git a/examples/utils/misc/conversation_test.py b/examples/utils/misc/conversation_test.py index ec8a0534..ae34692b 100644 --- a/examples/utils/misc/conversation_test.py +++ b/examples/utils/misc/conversation_test.py @@ -1,22 +1,91 @@ from swarms.structs.conversation import Conversation +from dotenv import load_dotenv -# Create a conversation object -conversation = Conversation(backend="in-memory") -# Add a message to the conversation -conversation.add( - role="user", content="Hello, how are you?", category="input" -) +# Load environment variables from .env file +load_dotenv() -# Add a message to the conversation -conversation.add( - role="assistant", - content="I'm good, thank you!", - category="output", -) - -print( - conversation.export_and_count_categories( - tokenizer_model_name="claude-3-5-sonnet-20240620" +def demonstrate_truncation(): + # Using a smaller context length to clearly see the truncation effect + context_length = 25 + print(f"Creating a conversation instance with context length {context_length}") + + # Using Claude model as the tokenizer model + conversation = Conversation( + context_length=context_length, + tokenizer_model_name="claude-3-7-sonnet-20250219" ) -) + + # Adding first message - short message + short_message = "Hello, I am a user." + print(f"\nAdding short message: '{short_message}'") + conversation.add("user", short_message) + + # Display token count + from swarms.utils.litellm_tokenizer import count_tokens + tokens = count_tokens(short_message, conversation.tokenizer_model_name) + print(f"Short message token count: {tokens}") + + # Adding second message - long message, should be truncated + long_message = "I have a question about artificial intelligence. I want to understand how large language models handle long texts, especially under token constraints. This issue is important because it relates to the model's practicality and effectiveness. I hope to get a detailed answer that helps me understand this complex technical problem." + print(f"\nAdding long message:\n'{long_message}'") + conversation.add("assistant", long_message) + + # Display long message token count + tokens = count_tokens(long_message, conversation.tokenizer_model_name) + print(f"Long message token count: {tokens}") + + # Display current conversation total token count + total_tokens = sum(count_tokens(msg["content"], conversation.tokenizer_model_name) + for msg in conversation.conversation_history) + print(f"Total token count before truncation: {total_tokens}") + + # Print the complete conversation history before truncation + print("\nConversation history before truncation:") + for i, msg in enumerate(conversation.conversation_history): + print(f"[{i}] {msg['role']}: {msg['content']}") + print(f" Token count: {count_tokens(msg['content'], conversation.tokenizer_model_name)}") + + # Execute truncation + print("\nExecuting truncation...") + conversation.truncate_memory_with_tokenizer() + + # Print conversation history after truncation + print("\nConversation history after truncation:") + for i, msg in enumerate(conversation.conversation_history): + print(f"[{i}] {msg['role']}: {msg['content']}") + print(f" Token count: {count_tokens(msg['content'], conversation.tokenizer_model_name)}") + + # Display total token count after truncation + total_tokens = sum(count_tokens(msg["content"], conversation.tokenizer_model_name) + for msg in conversation.conversation_history) + print(f"\nTotal token count after truncation: {total_tokens}") + print(f"Context length limit: {context_length}") + + # Verify if successfully truncated below the limit + if total_tokens <= context_length: + print("✅ Success: Total token count is now less than or equal to context length limit") + else: + print("❌ Failure: Total token count still exceeds context length limit") + + # Test sentence boundary truncation + print("\n\nTesting sentence boundary truncation:") + sentence_test = Conversation(context_length=15, tokenizer_model_name="claude-3-opus-20240229") + test_text = "This is the first sentence. This is the second very long sentence that contains a lot of content. This is the third sentence." + print(f"Original text: '{test_text}'") + print(f"Original token count: {count_tokens(test_text, sentence_test.tokenizer_model_name)}") + + # Using binary search for truncation + truncated = sentence_test._binary_search_truncate(test_text, 10, sentence_test.tokenizer_model_name) + print(f"Truncated text: '{truncated}'") + print(f"Truncated token count: {count_tokens(truncated, sentence_test.tokenizer_model_name)}") + + # Check if truncated at period + if truncated.endswith("."): + print("✅ Success: Text was truncated at sentence boundary") + else: + print("Note: Text was not truncated at sentence boundary") + + +if __name__ == "__main__": + demonstrate_truncation() \ No newline at end of file From 578452fb6c52989c0ffcb602064ed3c3be23ddf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=A5=A5=E5=AE=87?= <625024108@qq.com> Date: Fri, 1 Aug 2025 11:47:21 +0800 Subject: [PATCH 3/5] update --- swarms/structs/conversation.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/swarms/structs/conversation.py b/swarms/structs/conversation.py index 3e47cae1..6e11ebdd 100644 --- a/swarms/structs/conversation.py +++ b/swarms/structs/conversation.py @@ -184,7 +184,6 @@ class Conversation: system_prompt: Optional[str] = None, time_enabled: bool = False, autosave: bool = False, # Changed default to False - save_enabled: bool = False, # New parameter to control if saving is enabled save_filepath: str = None, load_filepath: str = None, # New parameter to specify which file to load from context_length: int = 8192, @@ -223,7 +222,6 @@ class Conversation: self.system_prompt = system_prompt self.time_enabled = time_enabled self.autosave = autosave - self.save_enabled = save_enabled self.conversations_dir = conversations_dir self.tokenizer_model_name = tokenizer_model_name self.message_id_on = message_id_on @@ -1021,13 +1019,6 @@ class Conversation: ) return - # Don't save if saving is disabled (你的PR代码) - if not self.save_enabled: - logger.warning( - "An attempt to save the conversation failed: save_enabled is False." - "Please set save_enabled=True when creating a Conversation object to enable saving." - ) - return # Get the full data including metadata and conversation history data = self.get_init_params() @@ -1284,7 +1275,7 @@ class Conversation: Returns: None """ - from swarms.utils.litellm_tokenizer import count_tokens + total_tokens = 0 truncated_history = [] @@ -1350,7 +1341,7 @@ class Conversation: Returns: str: Truncated text with token count not exceeding target_tokens """ - from swarms.utils.litellm_tokenizer import count_tokens + # If text is empty or target tokens is 0, return empty string if not text or target_tokens <= 0: @@ -1390,7 +1381,7 @@ class Conversation: return truncated_at_sentence return best_text - + def clear(self): """Clear the conversation history.""" if self.backend_instance: @@ -1796,4 +1787,4 @@ class Conversation: # # # conversation.add("assistant", "I am doing well, thanks.") # # # # print(conversation.to_json()) # # print(type(conversation.to_dict())) -# # print(conversation.to_yaml()) +# # print(conversation.to_yaml()) \ No newline at end of file From c9ed0dba2701c78926fff7fab1c5129fbe1d38b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=A5=A5=E5=AE=87?= <625024108@qq.com> Date: Sat, 2 Aug 2025 02:10:20 +0800 Subject: [PATCH 4/5] update --- examples/utils/misc/conversation_test.py | 103 +++--------------- .../utils/misc/conversation_test_truncate.py | 91 ++++++++++++++++ 2 files changed, 108 insertions(+), 86 deletions(-) create mode 100644 examples/utils/misc/conversation_test_truncate.py diff --git a/examples/utils/misc/conversation_test.py b/examples/utils/misc/conversation_test.py index ae34692b..a7a6750e 100644 --- a/examples/utils/misc/conversation_test.py +++ b/examples/utils/misc/conversation_test.py @@ -1,91 +1,22 @@ from swarms.structs.conversation import Conversation -from dotenv import load_dotenv +# Create a conversation object +conversation = Conversation(backend="in-memory") -# Load environment variables from .env file -load_dotenv() - -def demonstrate_truncation(): - # Using a smaller context length to clearly see the truncation effect - context_length = 25 - print(f"Creating a conversation instance with context length {context_length}") - - # Using Claude model as the tokenizer model - conversation = Conversation( - context_length=context_length, - tokenizer_model_name="claude-3-7-sonnet-20250219" - ) - - # Adding first message - short message - short_message = "Hello, I am a user." - print(f"\nAdding short message: '{short_message}'") - conversation.add("user", short_message) - - # Display token count - from swarms.utils.litellm_tokenizer import count_tokens - tokens = count_tokens(short_message, conversation.tokenizer_model_name) - print(f"Short message token count: {tokens}") - - # Adding second message - long message, should be truncated - long_message = "I have a question about artificial intelligence. I want to understand how large language models handle long texts, especially under token constraints. This issue is important because it relates to the model's practicality and effectiveness. I hope to get a detailed answer that helps me understand this complex technical problem." - print(f"\nAdding long message:\n'{long_message}'") - conversation.add("assistant", long_message) - - # Display long message token count - tokens = count_tokens(long_message, conversation.tokenizer_model_name) - print(f"Long message token count: {tokens}") - - # Display current conversation total token count - total_tokens = sum(count_tokens(msg["content"], conversation.tokenizer_model_name) - for msg in conversation.conversation_history) - print(f"Total token count before truncation: {total_tokens}") - - # Print the complete conversation history before truncation - print("\nConversation history before truncation:") - for i, msg in enumerate(conversation.conversation_history): - print(f"[{i}] {msg['role']}: {msg['content']}") - print(f" Token count: {count_tokens(msg['content'], conversation.tokenizer_model_name)}") - - # Execute truncation - print("\nExecuting truncation...") - conversation.truncate_memory_with_tokenizer() - - # Print conversation history after truncation - print("\nConversation history after truncation:") - for i, msg in enumerate(conversation.conversation_history): - print(f"[{i}] {msg['role']}: {msg['content']}") - print(f" Token count: {count_tokens(msg['content'], conversation.tokenizer_model_name)}") - - # Display total token count after truncation - total_tokens = sum(count_tokens(msg["content"], conversation.tokenizer_model_name) - for msg in conversation.conversation_history) - print(f"\nTotal token count after truncation: {total_tokens}") - print(f"Context length limit: {context_length}") - - # Verify if successfully truncated below the limit - if total_tokens <= context_length: - print("✅ Success: Total token count is now less than or equal to context length limit") - else: - print("❌ Failure: Total token count still exceeds context length limit") - - # Test sentence boundary truncation - print("\n\nTesting sentence boundary truncation:") - sentence_test = Conversation(context_length=15, tokenizer_model_name="claude-3-opus-20240229") - test_text = "This is the first sentence. This is the second very long sentence that contains a lot of content. This is the third sentence." - print(f"Original text: '{test_text}'") - print(f"Original token count: {count_tokens(test_text, sentence_test.tokenizer_model_name)}") - - # Using binary search for truncation - truncated = sentence_test._binary_search_truncate(test_text, 10, sentence_test.tokenizer_model_name) - print(f"Truncated text: '{truncated}'") - print(f"Truncated token count: {count_tokens(truncated, sentence_test.tokenizer_model_name)}") - - # Check if truncated at period - if truncated.endswith("."): - print("✅ Success: Text was truncated at sentence boundary") - else: - print("Note: Text was not truncated at sentence boundary") +# Add a message to the conversation +conversation.add( + role="user", content="Hello, how are you?", category="input" +) +# Add a message to the conversation +conversation.add( + role="assistant", + content="I'm good, thank you!", + category="output", +) -if __name__ == "__main__": - demonstrate_truncation() \ No newline at end of file +print( + conversation.export_and_count_categories( + tokenizer_model_name="claude-3-5-sonnet-20240620" + ) +) \ No newline at end of file diff --git a/examples/utils/misc/conversation_test_truncate.py b/examples/utils/misc/conversation_test_truncate.py new file mode 100644 index 00000000..ae34692b --- /dev/null +++ b/examples/utils/misc/conversation_test_truncate.py @@ -0,0 +1,91 @@ +from swarms.structs.conversation import Conversation +from dotenv import load_dotenv + + +# Load environment variables from .env file +load_dotenv() + +def demonstrate_truncation(): + # Using a smaller context length to clearly see the truncation effect + context_length = 25 + print(f"Creating a conversation instance with context length {context_length}") + + # Using Claude model as the tokenizer model + conversation = Conversation( + context_length=context_length, + tokenizer_model_name="claude-3-7-sonnet-20250219" + ) + + # Adding first message - short message + short_message = "Hello, I am a user." + print(f"\nAdding short message: '{short_message}'") + conversation.add("user", short_message) + + # Display token count + from swarms.utils.litellm_tokenizer import count_tokens + tokens = count_tokens(short_message, conversation.tokenizer_model_name) + print(f"Short message token count: {tokens}") + + # Adding second message - long message, should be truncated + long_message = "I have a question about artificial intelligence. I want to understand how large language models handle long texts, especially under token constraints. This issue is important because it relates to the model's practicality and effectiveness. I hope to get a detailed answer that helps me understand this complex technical problem." + print(f"\nAdding long message:\n'{long_message}'") + conversation.add("assistant", long_message) + + # Display long message token count + tokens = count_tokens(long_message, conversation.tokenizer_model_name) + print(f"Long message token count: {tokens}") + + # Display current conversation total token count + total_tokens = sum(count_tokens(msg["content"], conversation.tokenizer_model_name) + for msg in conversation.conversation_history) + print(f"Total token count before truncation: {total_tokens}") + + # Print the complete conversation history before truncation + print("\nConversation history before truncation:") + for i, msg in enumerate(conversation.conversation_history): + print(f"[{i}] {msg['role']}: {msg['content']}") + print(f" Token count: {count_tokens(msg['content'], conversation.tokenizer_model_name)}") + + # Execute truncation + print("\nExecuting truncation...") + conversation.truncate_memory_with_tokenizer() + + # Print conversation history after truncation + print("\nConversation history after truncation:") + for i, msg in enumerate(conversation.conversation_history): + print(f"[{i}] {msg['role']}: {msg['content']}") + print(f" Token count: {count_tokens(msg['content'], conversation.tokenizer_model_name)}") + + # Display total token count after truncation + total_tokens = sum(count_tokens(msg["content"], conversation.tokenizer_model_name) + for msg in conversation.conversation_history) + print(f"\nTotal token count after truncation: {total_tokens}") + print(f"Context length limit: {context_length}") + + # Verify if successfully truncated below the limit + if total_tokens <= context_length: + print("✅ Success: Total token count is now less than or equal to context length limit") + else: + print("❌ Failure: Total token count still exceeds context length limit") + + # Test sentence boundary truncation + print("\n\nTesting sentence boundary truncation:") + sentence_test = Conversation(context_length=15, tokenizer_model_name="claude-3-opus-20240229") + test_text = "This is the first sentence. This is the second very long sentence that contains a lot of content. This is the third sentence." + print(f"Original text: '{test_text}'") + print(f"Original token count: {count_tokens(test_text, sentence_test.tokenizer_model_name)}") + + # Using binary search for truncation + truncated = sentence_test._binary_search_truncate(test_text, 10, sentence_test.tokenizer_model_name) + print(f"Truncated text: '{truncated}'") + print(f"Truncated token count: {count_tokens(truncated, sentence_test.tokenizer_model_name)}") + + # Check if truncated at period + if truncated.endswith("."): + print("✅ Success: Text was truncated at sentence boundary") + else: + print("Note: Text was not truncated at sentence boundary") + + +if __name__ == "__main__": + demonstrate_truncation() \ No newline at end of file From 9576957adf7934e1d3bc5ddf0f8c0deef962f973 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=A5=A5=E5=AE=87?= <625024108@qq.com> Date: Sat, 2 Aug 2025 02:17:48 +0800 Subject: [PATCH 5/5] update --- examples/utils/misc/conversation_test_truncate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/utils/misc/conversation_test_truncate.py b/examples/utils/misc/conversation_test_truncate.py index ae34692b..f660ff45 100644 --- a/examples/utils/misc/conversation_test_truncate.py +++ b/examples/utils/misc/conversation_test_truncate.py @@ -1,6 +1,6 @@ from swarms.structs.conversation import Conversation from dotenv import load_dotenv - +from swarms.utils.litellm_tokenizer import count_tokens # Load environment variables from .env file load_dotenv() @@ -22,7 +22,7 @@ def demonstrate_truncation(): conversation.add("user", short_message) # Display token count - from swarms.utils.litellm_tokenizer import count_tokens + tokens = count_tokens(short_message, conversation.tokenizer_model_name) print(f"Short message token count: {tokens}")