diff --git a/docs/swarms/agents/third_party.md b/docs/swarms/agents/third_party.md index 3ab8700f..c8f9a496 100644 --- a/docs/swarms/agents/third_party.md +++ b/docs/swarms/agents/third_party.md @@ -509,6 +509,13 @@ logging.basicConfig(level=logging.INFO) monitored_agent = MonitoredAgent("MonitoredGriptapeAgent") result = monitored_agent.run("Summarize the latest AI research papers") ``` +Additionally the Agent class now includes built-in logging functionality and the ability to switch between JSON and string output. + +To switch between JSON and string output: +- Use `output_type="str"` for string output (default) +- Use `output_type="json"` for JSON output + +The `output_type` parameter determines the format of the final result returned by the `run` method. When set to "str", it returns a string representation of the agent's response. When set to "json", it returns a JSON object containing detailed information about the agent's run, including all steps and metadata. ## 6. Best Practices for Custom Agent Development @@ -614,4 +621,4 @@ The ability to seamlessly integrate agents from libraries like Griptape, Langcha As you embark on your journey with the swarms framework, remember that the field of AI and agent-based systems is rapidly evolving. Stay curious, keep experimenting, and don't hesitate to push the boundaries of what's possible with custom agents and integrated libraries. -By embracing the power of the swarms framework and the ecosystem of agent libraries it supports, you're well-positioned to create the next generation of intelligent, adaptive, and collaborative AI systems. Happy agent building! \ No newline at end of file +By embracing the power of the swarms framework and the ecosystem of agent libraries it supports, you're well-positioned to create the next generation of intelligent, adaptive, and collaborative AI systems. Happy agent building! diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py index 2d07f106..e9ddf8b0 100644 --- a/swarms/structs/agent.py +++ b/swarms/structs/agent.py @@ -781,6 +781,8 @@ class Agent: or loop_count < self.max_loops ): loop_count += 1 + # Log step start + current_step_id = f"step_{loop_count}_{uuid.uuid4().hex}" self.loop_count_print(loop_count, self.max_loops) print("\n") @@ -814,6 +816,8 @@ class Agent: *response_args, **kwargs ) + # Log step metadata + step_meta = self.log_step_metadata(loop_count, task_prompt, response) # Check if response is a dictionary and has 'choices' key if ( isinstance(response, dict) @@ -832,10 +836,18 @@ class Agent: # Check and execute tools if self.tools is not None: - print( - f"self.tools is not None: {response}" - ) - self.parse_and_execute_tools(response) + tool_result = self.parse_and_execute_tools(response) + if tool_result: + self.update_tool_usage( + step_meta["step_id"], + tool_result["tool"], + tool_result["args"], + tool_result["response"] + ) + + + # Update agent output history + self.agent_output.full_history = self.short_memory.return_history_as_string() # Log the step metadata logged = self.log_step_metadata( @@ -1969,13 +1981,48 @@ class Agent: def log_step_metadata( self, loop: int, task: str, response: str ) -> Step: - # # # Step Metadata + """Log metadata for each step of agent execution.""" + # Generate unique step ID + step_id = f"step_{loop}_{uuid.uuid4().hex}" + + # Calculate token usage # full_memory = self.short_memory.return_history_as_string() # prompt_tokens = self.tokenizer.count_tokens(full_memory) # completion_tokens = self.tokenizer.count_tokens(response) - # self.tokenizer.count_tokens(prompt_tokens + completion_tokens) + # total_tokens = prompt_tokens + completion_tokens + total_tokens=self.tokenizer.count_tokens(task) + self.tokenizer.count_tokens(response), + # Get memory responses + memory_responses = { + "short_term": self.short_memory.return_history_as_string() if self.short_memory else None, + "long_term": self.long_term_memory.query(task) if self.long_term_memory else None + } + + # Get tool responses if tool was used + tool_response = None + if self.tools: + try: + tool_call_output = parse_and_execute_json(self.tools, response, parse_md=True) + if tool_call_output: + tool_response = { + "tool_name": tool_call_output.get("tool_name", "unknown"), + "tool_args": tool_call_output.get("args", {}), + "tool_output": str(tool_call_output.get("output", "")) + } + except Exception as e: + logger.debug(f"No tool call detected in response: {e}") + + # Create memory usage tracking + memory_usage = { + "short_term": len(self.short_memory.messages) if self.short_memory else 0, + "long_term": self.long_term_memory.count if self.long_term_memory else 0, + "responses": memory_responses + } + step_log = Step( + step_id=step_id, + time=time.time(), + tokens = total_tokens, response=AgentChatCompletionResponse( id=self.agent_id, agent_name=self.agent_name, @@ -1990,14 +2037,33 @@ class Agent: ), # usage=UsageInfo( # prompt_tokens=prompt_tokens, - # total_tokens=total_tokens, # completion_tokens=completion_tokens, + # total_tokens=total_tokens, # ), + tool_calls=[] if tool_response is None else [tool_response], + memory_usage=memory_usage ), ) - + + # Update total tokens if agent_output exists + if hasattr(self, 'agent_output'): + self.agent_output.total_tokens += self.response.total_tokens + + + # Add step to agent output tracking self.step_pool.append(step_log) + def update_tool_usage(self, step_id: str, tool_name: str, tool_args: dict, tool_response: Any): + """Update tool usage information for a specific step.""" + for step in self.agent_output.steps: + if step.step_id == step_id: + step.response.tool_calls.append({ + "tool": tool_name, + "arguments": tool_args, + "response": str(tool_response) + }) + break + def _serialize_callable( self, attr_value: Callable ) -> Dict[str, Any]: diff --git a/tests/agents/test_agent_logging.py b/tests/agents/test_agent_logging.py new file mode 100644 index 00000000..b2106dd6 --- /dev/null +++ b/tests/agents/test_agent_logging.py @@ -0,0 +1,98 @@ +from unittest.mock import Mock, MagicMock +from dataclasses import dataclass, field, asdict +from typing import List, Dict, Any +from datetime import datetime +import unittest +from swarms.schemas.agent_step_schemas import ManySteps, Step +from swarms.structs.agent import Agent +from swarms.tools.tool_parse_exec import parse_and_execute_json + +# Mock parse_and_execute_json for testing +parse_and_execute_json = MagicMock() +parse_and_execute_json.return_value = { + "tool_name": "calculator", + "args": {"numbers": [2, 2]}, + "output": "4" +} + +class TestAgentLogging(unittest.TestCase): + def setUp(self): + self.mock_tokenizer = MagicMock() + self.mock_tokenizer.count_tokens.return_value = 100 + + self.mock_short_memory = MagicMock() + self.mock_short_memory.get_memory_stats.return_value = {"message_count": 2} + + self.mock_long_memory = MagicMock() + self.mock_long_memory.get_memory_stats.return_value = {"item_count": 5} + + self.agent = Agent( + tokenizer=self.mock_tokenizer, + short_memory=self.mock_short_memory, + long_term_memory=self.mock_long_memory + ) + + def test_log_step_metadata_basic(self): + log_result = self.agent.log_step_metadata(1, "Test prompt", "Test response") + + self.assertIn('step_id', log_result) + self.assertIn('timestamp', log_result) + self.assertIn('tokens', log_result) + self.assertIn('memory_usage', log_result) + + self.assertEqual(log_result['tokens']['total'], 200) + + def test_log_step_metadata_no_long_term_memory(self): + self.agent.long_term_memory = None + log_result = self.agent.log_step_metadata(1, "prompt", "response") + self.assertEqual(log_result['memory_usage']['long_term'], {}) + + def test_log_step_metadata_timestamp(self): + log_result = self.agent.log_step_metadata(1, "prompt", "response") + self.assertIn('timestamp', log_result) + + def test_token_counting_integration(self): + self.mock_tokenizer.count_tokens.side_effect = [150, 250] + log_result = self.agent.log_step_metadata(1, "prompt", "response") + + self.assertEqual(log_result['tokens']['total'], 400) + + def test_agent_output_updating(self): + initial_total_tokens = sum(step['tokens']['total'] for step in self.agent.agent_output.steps) + self.agent.log_step_metadata(1, "prompt", "response") + + final_total_tokens = sum(step['tokens']['total'] for step in self.agent.agent_output.steps) + self.assertEqual( + final_total_tokens - initial_total_tokens, + 200 + ) + self.assertEqual(len(self.agent.agent_output.steps), 1) + +class TestAgentLoggingIntegration(unittest.TestCase): + def setUp(self): + self.agent = Agent(agent_name="test-agent") + + def test_full_logging_cycle(self): + task = "Test task" + max_loops = 1 + + result = self.agent._run(task, max_loops=max_loops) + + self.assertIsInstance(result, dict) + self.assertIn('steps', result) + self.assertIsInstance(result['steps'], list) + self.assertEqual(len(result['steps']), max_loops) + + if result['steps']: + step = result['steps'][0] + self.assertIn('step_id', step) + self.assertIn('timestamp', step) + self.assertIn('task', step) + self.assertIn('response', step) + self.assertEqual(step['task'], task) + self.assertEqual(step['response'], f"Response for loop 1") + + self.assertTrue(len(self.agent.agent_output.steps) > 0) + +if __name__ == '__main__': + unittest.main()