diff --git a/gradio_templates/chatbot.py b/gradio_templates/chatbot.py new file mode 100644 index 00000000..65ca67e9 --- /dev/null +++ b/gradio_templates/chatbot.py @@ -0,0 +1,43 @@ +import requests +import gradio as gr + +def api_response(message, history): + # Extract the string content from the Gradio message + user_message = message["content"] if isinstance(message, dict) else message + + url = "http://localhost:8888/chat" + payload = { + "id": "string", + "model": { + "id": "llama-2-70b.Q5_K_M", + "name": "llama-2-70b.Q5_K_M", + "maxLength": 2048, + "tokenLimit": 2048 + }, + "messages": [ + { + "role": "system", + "content": "Hello, how may I help you? AMA!" + }, + { + "role": "user", + "content": user_message # Use the extracted message content here + } + ], + "maxTokens": 2048, + "temperature": 0, + "prompt": "HUMAN: \n You are a helpful AI assistant. Use the following context and chat history to answer the question at the end with a helpful answer. Get straight to the point and always think things through step-by-step before answering. If you don't know the answer, just say 'I don't know'; don't try to make up an answer. \n\n{context}\n{chat_history}\n{question}\n\nAI: Here is the most relevant sentence in the context: \n", + "file": { + "filename": "None", + "title": "None", + "username": "None", + "state": "Unavailable" + } + } + response = requests.post(url, json=payload) + if response.status_code == 200: + return response.json().get("answer", "Error: No answer returned") + else: + return f"Error: {response.status_code}" + +gr.ChatInterface(api_response).launch() \ No newline at end of file diff --git a/swarms/server/server.py b/swarms/server/server.py index 860f0567..14e5a49d 100644 --- a/swarms/server/server.py +++ b/swarms/server/server.py @@ -20,7 +20,7 @@ from fastapi.routing import APIRouter from fastapi.staticfiles import StaticFiles from huggingface_hub import login from langchain.callbacks import StreamingStdOutCallbackHandler -from langchain.memory import VectorStoreRetrieverMemory +from langchain.memory import ConversationStringBufferMemory from langchain.memory.chat_message_histories.in_memory import ChatMessageHistory from langchain.prompts.prompt import PromptTemplate from langchain_community.chat_models import ChatOpenAI @@ -184,27 +184,27 @@ async def create_chain( elif message.role == Role.SYSTEM: chat_memory.add_message(message) - # memory = ConversationSummaryBufferMemory( - # llm=llm, - # chat_memory=chat_memory, - # memory_key="chat_history", + memory = ConversationStringBufferMemory( + llm=llm, + chat_memory=chat_memory, + memory_key="chat_history", + input_key="question", + output_key="answer", + prompt=SUMMARY_PROMPT_TEMPLATE, + return_messages=False, + ) + + # memory = VectorStoreRetrieverMemory( # input_key="question", # output_key="answer", - # prompt=SUMMARY_PROMPT_TEMPLATE, + # chat_memory=chat_memory, + # memory_key="chat_history", + # return_docs=False, # Change this to False + # retriever=retriever, # return_messages=True, + # prompt=SUMMARY_PROMPT_TEMPLATE # ) - memory = VectorStoreRetrieverMemory( - input_key="question", - output_key="answer", - chat_memory=chat_memory, - memory_key="chat_history", - return_docs=True, # Change this to False - retriever=retriever, - return_messages=True, - prompt=SUMMARY_PROMPT_TEMPLATE - ) - question_generator = LLMChain( llm=llm, prompt=CONDENSE_PROMPT_TEMPLATE,