From 8a6b8d1cf7159336cd2995a28ada091905a7a5ce Mon Sep 17 00:00:00 2001 From: Aksh Parekh Date: Thu, 11 Dec 2025 23:01:36 -0800 Subject: [PATCH] [MOVE] files --- .../swarms/examples/customer_support_swarm.md | 102 +++++++++++- .../customer_support_swarm.py | 149 +++++++++++++++++- 2 files changed, 242 insertions(+), 9 deletions(-) rename examples/multi_agent/{ => agent_rearrange_examples}/customer_support_swarm.py (66%) diff --git a/docs/swarms/examples/customer_support_swarm.md b/docs/swarms/examples/customer_support_swarm.md index 0752d4fe..976cdbe6 100644 --- a/docs/swarms/examples/customer_support_swarm.md +++ b/docs/swarms/examples/customer_support_swarm.md @@ -17,6 +17,7 @@ This example demonstrates a practical customer support system with: ✅ **No Cloud Services**: Runs completely locally ✅ **Free Dependencies**: ChromaDB is open-source and free ✅ **Semantic Search**: Vector similarity for intelligent knowledge retrieval +✅ **Intelligent Caching**: Automatic response caching saves tokens and improves speed ✅ **Production-Ready**: Easy to extend and deploy ## Architecture @@ -73,6 +74,21 @@ graph TB - Product information - Contextual responses from vector DB +### 4. Response Caching System +**Purpose**: Automatically cache and reuse responses for similar queries + +**How It Works**: +1. Every customer query is checked against cached responses +2. If a similar query exists (85%+ similarity), cached response is returned +3. New responses are automatically saved to cache +4. Saves API tokens and improves response time + +**Benefits**: +- **Token Savings**: Reuses responses instead of calling LLM +- **Faster Responses**: Instant retrieval from vector DB +- **Consistent Answers**: Same questions get same quality answers +- **Learning System**: Gets better over time as cache grows + ## Quick Start ### Installation @@ -191,6 +207,37 @@ for entry in KNOWLEDGE_ENTRIES: ) ``` +### Caching System + +```python +# Create conversation history collection for caching +conversation_history = chroma_client.get_or_create_collection( + name="conversation_history", + metadata={"description": "Past queries and responses"} +) + +def check_cached_response(query: str, similarity_threshold: float = 0.85): + """Check if similar query was already answered""" + results = conversation_history.query( + query_texts=[query], + n_results=1 + ) + + # If similarity > 85%, return cached response + if results["distances"][0][0] < 0.3: # Low distance = high similarity + return True, results["metadatas"][0][0]["response"] + + return False, "" + +def save_to_cache(query: str, response: str): + """Save query-response pair for future reuse""" + conversation_history.add( + ids=[f"conv_{hash(query)}_{time.time()}"], + documents=[query], + metadatas=[{"response": response, "timestamp": time.time()}] + ) +``` + ### Query Vector Database ```python @@ -307,6 +354,39 @@ The system can handle various types of customer support queries: - Provides refund request steps - Sets timeline expectations +### Caching in Action + +**First Query:** +``` +👤 You: I can't log into my account + +💾 Checking cache for similar queries... +❌ No similar query found in cache. Processing with agents... +🔍 Searching knowledge base... +🤖 Processing with support agents... + +🤖 SUPPORT AGENT: +I understand you're having trouble logging in... +[Full response] + +💾 Saving response to cache for future queries... +✅ Cached successfully! +``` + +**Similar Query Later:** +``` +👤 You: Can't sign in to my account + +💾 Checking cache for similar queries... +✅ Found cached response! (Saving tokens 🎉) + +🤖 SUPPORT AGENT (from cache): +I understand you're having trouble logging in... +[Same cached response - instant, no LLM call!] + +💡 This response was retrieved from cache. No tokens used! 🎉 +``` + ## Customization ### Adding More Knowledge @@ -338,6 +418,26 @@ def create_expanded_support_agents(): return [triage_agent, support_agent, escalation_agent] ``` +### Adjusting Cache Sensitivity + +Control when to use cached responses: + +```python +# More aggressive caching (70% similarity) +response, cached = handle_support_query(query, agents, cache_threshold=0.70) + +# Stricter caching (90% similarity) +response, cached = handle_support_query(query, agents, cache_threshold=0.90) + +# Disable caching +response, cached = handle_support_query(query, agents, use_cache=False) +``` + +**Recommended Thresholds:** +- `0.85` (default) - Good balance, catches paraphrased questions +- `0.90` - Strict, only very similar questions +- `0.70` - Aggressive, broader matching (more token savings, less accuracy) + ### Persistent Storage For production, save ChromaDB to disk: @@ -348,7 +448,7 @@ from chromadb import PersistentClient # Use persistent storage instead of in-memory chroma_client = PersistentClient(path="./chroma_db") -# Database persists between runs +# Database persists between runs (including cached responses!) ``` ### Load Knowledge from Files diff --git a/examples/multi_agent/customer_support_swarm.py b/examples/multi_agent/agent_rearrange_examples/customer_support_swarm.py similarity index 66% rename from examples/multi_agent/customer_support_swarm.py rename to examples/multi_agent/agent_rearrange_examples/customer_support_swarm.py index 761ff55d..54720f39 100644 --- a/examples/multi_agent/customer_support_swarm.py +++ b/examples/multi_agent/agent_rearrange_examples/customer_support_swarm.py @@ -25,6 +25,12 @@ knowledge_collection = chroma_client.get_or_create_collection( metadata={"description": "TechCorp product information and support knowledge"} ) +# Create collection for conversation history (query caching) +conversation_history = chroma_client.get_or_create_collection( + name="conversation_history", + metadata={"description": "Past customer queries and responses for caching"} +) + # Company knowledge base entries to store in vector DB KNOWLEDGE_ENTRIES = [ { @@ -98,6 +104,77 @@ def query_knowledge_base(query: str, n_results: int = 3) -> str: return knowledge +def check_cached_response(query: str, similarity_threshold: float = 0.85) -> tuple[bool, str]: + """ + Check if a similar query was already answered. + + Args: + query: The customer query + similarity_threshold: Minimum similarity score to use cached response (0-1) + + Returns: + Tuple of (found, response) where found is True if cache hit + """ + try: + results = conversation_history.query( + query_texts=[query], + n_results=1 + ) + + if not results["documents"][0]: + return False, "" + + # Check similarity score (ChromaDB returns distances, lower is more similar) + # We need to check if there's a metadata field with similarity or use distance + distances = results.get("distances", [[1.0]])[0] + + if distances and distances[0] is not None: + # Convert distance to similarity (1 - distance for cosine) + # ChromaDB uses L2 distance by default, so we approximate + similarity = 1 - (distances[0] / 2) # Normalize to 0-1 range + + if similarity >= similarity_threshold: + cached_response = results["metadatas"][0][0].get("response", "") + if cached_response: + return True, cached_response + + return False, "" + + except Exception as e: + print(f"Cache check error: {e}") + return False, "" + + +def save_to_cache(query: str, response: str): + """ + Save a query-response pair to the conversation history cache. + + Args: + query: The customer query + response: The agent's response + """ + try: + import time + import hashlib + + # Create unique ID from query + query_id = hashlib.md5(query.encode()).hexdigest() + + # Add to conversation history + conversation_history.add( + ids=[f"conv_{query_id}_{int(time.time())}"], + documents=[query], + metadatas=[{ + "response": response, + "timestamp": time.time(), + "query_length": len(query) + }] + ) + + except Exception as e: + print(f"Cache save error: {e}") + + # Create specialized support agents that use the vector database def create_support_agents(): """Create a team of specialized customer support agents with vector DB access""" @@ -146,21 +223,44 @@ Format your response clearly with the solution and any next steps. return [triage_agent, support_agent] -def handle_support_query(customer_query: str, agents: list) -> str: - """Handle a customer support query using vector DB + agents""" +def handle_support_query(customer_query: str, agents: list, use_cache: bool = True, + cache_threshold: float = 0.85) -> tuple[str, bool]: + """ + Handle a customer support query using vector DB + agents with caching. + + Args: + customer_query: The customer's question + agents: List of support agents + use_cache: Whether to check cache for similar queries + cache_threshold: Similarity threshold for cache hits (0-1, default 0.85) + + Returns: + Tuple of (response, was_cached) + """ + + # Step 1: Check if we have a cached response for a similar query + if use_cache: + print("\n💾 Checking cache for similar queries...") + cache_hit, cached_response = check_cached_response(customer_query, cache_threshold) + + if cache_hit: + print("✅ Found cached response! (Saving tokens 🎉)") + return cached_response, True - # Step 1: Query vector database for relevant knowledge + print("❌ No similar query found in cache. Processing with agents...") + + # Step 2: Query vector database for relevant knowledge print("\n🔍 Searching knowledge base...") relevant_knowledge = query_knowledge_base(customer_query, n_results=3) - # Step 2: Combine query with knowledge for agent processing + # Step 3: Combine query with knowledge for agent processing enriched_query = f"""Customer Query: {customer_query} {relevant_knowledge} Based on the customer query and knowledge base information above, provide an appropriate response.""" - # Step 3: Run through agent workflow + # Step 4: Run through agent workflow print("🤖 Processing with support agents...\n") workflow = SequentialWorkflow( @@ -171,7 +271,14 @@ Based on the customer query and knowledge base information above, provide an app ) result = workflow.run(enriched_query) - return result + + # Step 5: Save to cache for future queries + if use_cache: + print("\n💾 Saving response to cache for future queries...") + save_to_cache(customer_query, result) + print("✅ Cached successfully!") + + return result, False def interactive_console(): @@ -214,7 +321,20 @@ def interactive_console(): # Process support query try: - handle_support_query(user_input, agents) + response, was_cached = handle_support_query(user_input, agents, use_cache=True) + + # Print response with appropriate header + print("\n" + "=" * 80) + if was_cached: + print("🤖 SUPPORT AGENT (from cache):") + else: + print("🤖 SUPPORT AGENT:") + print("=" * 80) + print(f"\n{response}\n") + print("=" * 80) + + if was_cached: + print("\n💡 This response was retrieved from cache. No tokens used! 🎉") except Exception as e: print(f"\n❌ Error processing query: {str(e)}") @@ -241,7 +361,20 @@ def demo_mode(): print(f"{'='*80}") print(f"\n👤 Customer: {query}\n") - handle_support_query(query, agents) + response, was_cached = handle_support_query(query, agents, use_cache=True) + + # Print response + print("\n" + "=" * 80) + if was_cached: + print("🤖 SUPPORT AGENT (from cache):") + else: + print("🤖 SUPPORT AGENT:") + print("=" * 80) + print(f"\n{response}\n") + print("=" * 80) + + if was_cached: + print("\n💡 This response was retrieved from cache. No tokens used! 🎉") if i < len(demo_queries): input("\nPress Enter to continue to next query...")