diff --git a/gradio_templates/chatbot.py b/gradio_templates/chatbot.py
new file mode 100644
index 00000000..65ca67e9
--- /dev/null
+++ b/gradio_templates/chatbot.py
@@ -0,0 +1,43 @@
+import requests
+import gradio as gr
+
+def api_response(message, history):
+    # Extract the string content from the Gradio message
+    user_message = message["content"] if isinstance(message, dict) else message
+    
+    url = "http://localhost:8888/chat"
+    payload = {
+        "id": "string",
+        "model": {
+            "id": "llama-2-70b.Q5_K_M",
+            "name": "llama-2-70b.Q5_K_M",
+            "maxLength": 2048,
+            "tokenLimit": 2048
+        },
+        "messages": [
+            {
+            "role": "system",
+            "content": "Hello, how may I help you?  AMA!"
+            },
+            {
+            "role": "user",
+            "content": user_message  # Use the extracted message content here
+            }
+        ],
+        "maxTokens": 2048,
+        "temperature": 0,
+        "prompt": "HUMAN: \n You are a helpful AI assistant.  Use the following context and chat history to answer the question at the end with a helpful answer.  Get straight to the point and always think things through step-by-step before answering.  If you don't know the answer, just say 'I don't know'; don't try to make up an answer. \n\n<context>{context}</context>\n<chat_history>{chat_history}</chat_history>\n<question>{question}</question>\n\nAI:  Here is the most relevant sentence in the context:  \n",
+        "file": {
+            "filename": "None",
+            "title": "None",
+            "username": "None",
+            "state": "Unavailable"
+        }
+    }
+    response = requests.post(url, json=payload)
+    if response.status_code == 200:
+        return response.json().get("answer", "Error: No answer returned")
+    else:
+        return f"Error: {response.status_code}"
+
+gr.ChatInterface(api_response).launch()
\ No newline at end of file
diff --git a/swarms/server/server.py b/swarms/server/server.py
index 860f0567..14e5a49d 100644
--- a/swarms/server/server.py
+++ b/swarms/server/server.py
@@ -20,7 +20,7 @@ from fastapi.routing import APIRouter
 from fastapi.staticfiles import StaticFiles
 from huggingface_hub import login
 from langchain.callbacks import StreamingStdOutCallbackHandler
-from langchain.memory import VectorStoreRetrieverMemory
+from langchain.memory import ConversationStringBufferMemory
 from langchain.memory.chat_message_histories.in_memory import ChatMessageHistory
 from langchain.prompts.prompt import PromptTemplate
 from langchain_community.chat_models import ChatOpenAI
@@ -184,27 +184,27 @@ async def create_chain(
         elif message.role == Role.SYSTEM:
             chat_memory.add_message(message)
 
-    # memory = ConversationSummaryBufferMemory(
-    #     llm=llm,
-    #     chat_memory=chat_memory,
-    #     memory_key="chat_history",
+    memory = ConversationStringBufferMemory(
+        llm=llm,
+        chat_memory=chat_memory,
+        memory_key="chat_history",
+        input_key="question",
+        output_key="answer",
+        prompt=SUMMARY_PROMPT_TEMPLATE,
+        return_messages=False,
+    )
+
+    # memory = VectorStoreRetrieverMemory(
     #     input_key="question",
     #     output_key="answer",
-    #     prompt=SUMMARY_PROMPT_TEMPLATE,
+    #     chat_memory=chat_memory,
+    #     memory_key="chat_history",
+    #     return_docs=False,  # Change this to False
+    #     retriever=retriever,
     #     return_messages=True,
+    #     prompt=SUMMARY_PROMPT_TEMPLATE
     # )
 
-    memory = VectorStoreRetrieverMemory(
-        input_key="question",
-        output_key="answer",
-        chat_memory=chat_memory,
-        memory_key="chat_history",
-        return_docs=True,  # Change this to False
-        retriever=retriever,
-        return_messages=True,
-        prompt=SUMMARY_PROMPT_TEMPLATE
-    )
-
     question_generator = LLMChain(
         llm=llm,
         prompt=CONDENSE_PROMPT_TEMPLATE,