[MOVE] files

2 weeks ago · 8a6b8d1cf7
parent 873504fdf6
commit 8a6b8d1cf7
2 changed files with 242 additions and 9 deletions
--- a/docs/swarms/examples/customer_support_swarm.md
+++ b/docs/swarms/examples/customer_support_swarm.md
@ -17,6 +17,7 @@ This example demonstrates a practical customer support system with:
 ✅ **No Cloud Services**: Runs completely locally
 ✅ **Free Dependencies**: ChromaDB is open-source and free
 ✅ **Semantic Search**: Vector similarity for intelligent knowledge retrieval
 ✅ **Intelligent Caching**: Automatic response caching saves tokens and improves speed
 ✅ **Production-Ready**: Easy to extend and deploy
 ## Architecture
@ -73,6 +74,21 @@ graph TB
 - Product information
 - Contextual responses from vector DB
 ### 4. Response Caching System
 **Purpose**: Automatically cache and reuse responses for similar queries
 **How It Works**:
 1. Every customer query is checked against cached responses
 2. If a similar query exists (85%+ similarity), cached response is returned
 3. New responses are automatically saved to cache
 4. Saves API tokens and improves response time
 **Benefits**:
 - **Token Savings**: Reuses responses instead of calling LLM
 - **Faster Responses**: Instant retrieval from vector DB
 - **Consistent Answers**: Same questions get same quality answers
 - **Learning System**: Gets better over time as cache grows
 ## Quick Start
 ### Installation
@ -191,6 +207,37 @@ for entry in KNOWLEDGE_ENTRIES:
    )
 ```
 ### Caching System
 ```python
 # Create conversation history collection for caching
 conversation_history = chroma_client.get_or_create_collection(
    name="conversation_history",
    metadata={"description": "Past queries and responses"}
 )
 def check_cached_response(query: str, similarity_threshold: float = 0.85):
    """Check if similar query was already answered"""
    results = conversation_history.query(
        query_texts=[query],
        n_results=1
    )
    # If similarity > 85%, return cached response
    if results["distances"][0][0] < 0.3:  # Low distance = high similarity
        return True, results["metadatas"][0][0]["response"]
    return False, ""
 def save_to_cache(query: str, response: str):
    """Save query-response pair for future reuse"""
    conversation_history.add(
        ids=[f"conv_{hash(query)}_{time.time()}"],
        documents=[query],
        metadatas=[{"response": response, "timestamp": time.time()}]
    )
 ```
 ### Query Vector Database
 ```python
@ -307,6 +354,39 @@ The system can handle various types of customer support queries:
 - Provides refund request steps
 - Sets timeline expectations
 ### Caching in Action
 **First Query:**
 ```
 👤 You: I can't log into my account
 💾 Checking cache for similar queries...
 ❌ No similar query found in cache. Processing with agents...
 🔍 Searching knowledge base...
 🤖 Processing with support agents...
 🤖 SUPPORT AGENT:
 I understand you're having trouble logging in...
 [Full response]
 💾 Saving response to cache for future queries...
 ✅ Cached successfully!
 ```
 **Similar Query Later:**
 ```
 👤 You: Can't sign in to my account
 💾 Checking cache for similar queries...
 ✅ Found cached response! (Saving tokens 🎉)
 🤖 SUPPORT AGENT (from cache):
 I understand you're having trouble logging in...
 [Same cached response - instant, no LLM call!]
 💡 This response was retrieved from cache. No tokens used! 🎉
 ```
 ## Customization
 ### Adding More Knowledge
@ -338,6 +418,26 @@ def create_expanded_support_agents():
    return [triage_agent, support_agent, escalation_agent]
 ```
 ### Adjusting Cache Sensitivity
 Control when to use cached responses:
 ```python
 # More aggressive caching (70% similarity)
 response, cached = handle_support_query(query, agents, cache_threshold=0.70)
 # Stricter caching (90% similarity)
 response, cached = handle_support_query(query, agents, cache_threshold=0.90)
 # Disable caching
 response, cached = handle_support_query(query, agents, use_cache=False)
 ```
 **Recommended Thresholds:**
 - `0.85` (default) - Good balance, catches paraphrased questions
 - `0.90` - Strict, only very similar questions
 - `0.70` - Aggressive, broader matching (more token savings, less accuracy)
 ### Persistent Storage
 For production, save ChromaDB to disk:
@ -348,7 +448,7 @@ from chromadb import PersistentClient
 # Use persistent storage instead of in-memory
 chroma_client = PersistentClient(path="./chroma_db")
-# Database persists between runs
+# Database persists between runs (including cached responses!)
 ```
 ### Load Knowledge from Files
--- a/examples/multi_agent/agent_rearrange_examples/customer_support_swarm.py
+++ b/examples/multi_agent/agent_rearrange_examples/customer_support_swarm.py
@ -25,6 +25,12 @@ knowledge_collection = chroma_client.get_or_create_collection(
    metadata={"description": "TechCorp product information and support knowledge"}
 )
 # Create collection for conversation history (query caching)
 conversation_history = chroma_client.get_or_create_collection(
    name="conversation_history",
    metadata={"description": "Past customer queries and responses for caching"}
 )
 # Company knowledge base entries to store in vector DB
 KNOWLEDGE_ENTRIES = [
    {
@ -98,6 +104,77 @@ def query_knowledge_base(query: str, n_results: int = 3) -> str:
    return knowledge
 def check_cached_response(query: str, similarity_threshold: float = 0.85) -> tuple[bool, str]:
    """
    Check if a similar query was already answered.
    Args:
        query: The customer query
        similarity_threshold: Minimum similarity score to use cached response (0-1)
    Returns:
        Tuple of (found, response) where found is True if cache hit
    """
    try:
        results = conversation_history.query(
            query_texts=[query],
            n_results=1
        )
        if not results["documents"][0]:
            return False, ""
        # Check similarity score (ChromaDB returns distances, lower is more similar)
        # We need to check if there's a metadata field with similarity or use distance
        distances = results.get("distances", [[1.0]])[0]
        if distances and distances[0] is not None:
            # Convert distance to similarity (1 - distance for cosine)
            # ChromaDB uses L2 distance by default, so we approximate
            similarity = 1 - (distances[0] / 2)  # Normalize to 0-1 range
            if similarity >= similarity_threshold:
                cached_response = results["metadatas"][0][0].get("response", "")
                if cached_response:
                    return True, cached_response
        return False, ""
    except Exception as e:
        print(f"Cache check error: {e}")
        return False, ""
 def save_to_cache(query: str, response: str):
    """
    Save a query-response pair to the conversation history cache.
    Args:
        query: The customer query
        response: The agent's response
    """
    try:
        import time
        import hashlib
        # Create unique ID from query
        query_id = hashlib.md5(query.encode()).hexdigest()
        # Add to conversation history
        conversation_history.add(
            ids=[f"conv_{query_id}_{int(time.time())}"],
            documents=[query],
            metadatas=[{
                "response": response,
                "timestamp": time.time(),
                "query_length": len(query)
            }]
        )
    except Exception as e:
        print(f"Cache save error: {e}")
 # Create specialized support agents that use the vector database
 def create_support_agents():
    """Create a team of specialized customer support agents with vector DB access"""
@ -146,21 +223,44 @@ Format your response clearly with the solution and any next steps.
    return [triage_agent, support_agent]
-def handle_support_query(customer_query: str, agents: list) -> str:
+def handle_support_query(customer_query: str, agents: list, use_cache: bool = True,
-    """Handle a customer support query using vector DB + agents"""
+                         cache_threshold: float = 0.85) -> tuple[str, bool]:
    """
    Handle a customer support query using vector DB + agents with caching.
    Args:
        customer_query: The customer's question
        agents: List of support agents
        use_cache: Whether to check cache for similar queries
        cache_threshold: Similarity threshold for cache hits (0-1, default 0.85)
    Returns:
        Tuple of (response, was_cached)
    """
    # Step 1: Check if we have a cached response for a similar query
    if use_cache:
        print("\n💾 Checking cache for similar queries...")
        cache_hit, cached_response = check_cached_response(customer_query, cache_threshold)
        if cache_hit:
            print("✅ Found cached response! (Saving tokens 🎉)")
            return cached_response, True
-    # Step 1: Query vector database for relevant knowledge
+        print("❌ No similar query found in cache. Processing with agents...")
    # Step 2: Query vector database for relevant knowledge
    print("\n🔍 Searching knowledge base...")
    relevant_knowledge = query_knowledge_base(customer_query, n_results=3)
-    # Step 2: Combine query with knowledge for agent processing
+    # Step 3: Combine query with knowledge for agent processing
    enriched_query = f"""Customer Query: {customer_query}
 {relevant_knowledge}
 Based on the customer query and knowledge base information above, provide an appropriate response."""
-    # Step 3: Run through agent workflow
+    # Step 4: Run through agent workflow
    print("🤖 Processing with support agents...\n")
    workflow = SequentialWorkflow(
@ -171,7 +271,14 @@ Based on the customer query and knowledge base information above, provide an app
    )
    result = workflow.run(enriched_query)
-    return result
+
    # Step 5: Save to cache for future queries
    if use_cache:
        print("\n💾 Saving response to cache for future queries...")
        save_to_cache(customer_query, result)
        print("✅ Cached successfully!")
    return result, False
 def interactive_console():
@ -214,7 +321,20 @@ def interactive_console():
        # Process support query
        try:
-            handle_support_query(user_input, agents)
+            response, was_cached = handle_support_query(user_input, agents, use_cache=True)
            # Print response with appropriate header
            print("\n" + "=" * 80)
            if was_cached:
                print("🤖 SUPPORT AGENT (from cache):")
            else:
                print("🤖 SUPPORT AGENT:")
            print("=" * 80)
            print(f"\n{response}\n")
            print("=" * 80)
            if was_cached:
                print("\n💡 This response was retrieved from cache. No tokens used! 🎉")
        except Exception as e:
            print(f"\n❌ Error processing query: {str(e)}")
@ -241,7 +361,20 @@ def demo_mode():
        print(f"{'='*80}")
        print(f"\n👤 Customer: {query}\n")
-        handle_support_query(query, agents)
+        response, was_cached = handle_support_query(query, agents, use_cache=True)
        # Print response
        print("\n" + "=" * 80)
        if was_cached:
            print("🤖 SUPPORT AGENT (from cache):")
        else:
            print("🤖 SUPPORT AGENT:")
        print("=" * 80)
        print(f"\n{response}\n")
        print("=" * 80)
        if was_cached:
            print("\n💡 This response was retrieved from cache. No tokens used! 🎉")
        if i < len(demo_queries):
            input("\nPress Enter to continue to next query...")