diff --git a/gradio_templates/chatbot.py b/gradio_templates/chatbot.py
new file mode 100644
index 00000000..65ca67e9
--- /dev/null
+++ b/gradio_templates/chatbot.py
@@ -0,0 +1,43 @@
+import requests
+import gradio as gr
+
+def api_response(message, history):
+ # Extract the string content from the Gradio message
+ user_message = message["content"] if isinstance(message, dict) else message
+
+ url = "http://localhost:8888/chat"
+ payload = {
+ "id": "string",
+ "model": {
+ "id": "llama-2-70b.Q5_K_M",
+ "name": "llama-2-70b.Q5_K_M",
+ "maxLength": 2048,
+ "tokenLimit": 2048
+ },
+ "messages": [
+ {
+ "role": "system",
+ "content": "Hello, how may I help you? AMA!"
+ },
+ {
+ "role": "user",
+ "content": user_message # Use the extracted message content here
+ }
+ ],
+ "maxTokens": 2048,
+ "temperature": 0,
+ "prompt": "HUMAN: \n You are a helpful AI assistant. Use the following context and chat history to answer the question at the end with a helpful answer. Get straight to the point and always think things through step-by-step before answering. If you don't know the answer, just say 'I don't know'; don't try to make up an answer. \n\n{context}\n{chat_history}\n{question}\n\nAI: Here is the most relevant sentence in the context: \n",
+ "file": {
+ "filename": "None",
+ "title": "None",
+ "username": "None",
+ "state": "Unavailable"
+ }
+ }
+ response = requests.post(url, json=payload)
+ if response.status_code == 200:
+ return response.json().get("answer", "Error: No answer returned")
+ else:
+ return f"Error: {response.status_code}"
+
+gr.ChatInterface(api_response).launch()
\ No newline at end of file
diff --git a/swarms/server/server.py b/swarms/server/server.py
index 860f0567..14e5a49d 100644
--- a/swarms/server/server.py
+++ b/swarms/server/server.py
@@ -20,7 +20,7 @@ from fastapi.routing import APIRouter
from fastapi.staticfiles import StaticFiles
from huggingface_hub import login
from langchain.callbacks import StreamingStdOutCallbackHandler
-from langchain.memory import VectorStoreRetrieverMemory
+from langchain.memory import ConversationStringBufferMemory
from langchain.memory.chat_message_histories.in_memory import ChatMessageHistory
from langchain.prompts.prompt import PromptTemplate
from langchain_community.chat_models import ChatOpenAI
@@ -184,27 +184,27 @@ async def create_chain(
elif message.role == Role.SYSTEM:
chat_memory.add_message(message)
- # memory = ConversationSummaryBufferMemory(
- # llm=llm,
- # chat_memory=chat_memory,
- # memory_key="chat_history",
+ memory = ConversationStringBufferMemory(
+ llm=llm,
+ chat_memory=chat_memory,
+ memory_key="chat_history",
+ input_key="question",
+ output_key="answer",
+ prompt=SUMMARY_PROMPT_TEMPLATE,
+ return_messages=False,
+ )
+
+ # memory = VectorStoreRetrieverMemory(
# input_key="question",
# output_key="answer",
- # prompt=SUMMARY_PROMPT_TEMPLATE,
+ # chat_memory=chat_memory,
+ # memory_key="chat_history",
+ # return_docs=False, # Change this to False
+ # retriever=retriever,
# return_messages=True,
+ # prompt=SUMMARY_PROMPT_TEMPLATE
# )
- memory = VectorStoreRetrieverMemory(
- input_key="question",
- output_key="answer",
- chat_memory=chat_memory,
- memory_key="chat_history",
- return_docs=True, # Change this to False
- retriever=retriever,
- return_messages=True,
- prompt=SUMMARY_PROMPT_TEMPLATE
- )
-
question_generator = LLMChain(
llm=llm,
prompt=CONDENSE_PROMPT_TEMPLATE,