gradio UI support fixes

pull/570/head
Richard Anthony Hein 9 months ago
parent 96a3e46dbb
commit 4c18e8d588

@ -0,0 +1,43 @@
import requests
import gradio as gr
def api_response(message, history):
# Extract the string content from the Gradio message
user_message = message["content"] if isinstance(message, dict) else message
url = "http://localhost:8888/chat"
payload = {
"id": "string",
"model": {
"id": "llama-2-70b.Q5_K_M",
"name": "llama-2-70b.Q5_K_M",
"maxLength": 2048,
"tokenLimit": 2048
},
"messages": [
{
"role": "system",
"content": "Hello, how may I help you? AMA!"
},
{
"role": "user",
"content": user_message # Use the extracted message content here
}
],
"maxTokens": 2048,
"temperature": 0,
"prompt": "HUMAN: \n You are a helpful AI assistant. Use the following context and chat history to answer the question at the end with a helpful answer. Get straight to the point and always think things through step-by-step before answering. If you don't know the answer, just say 'I don't know'; don't try to make up an answer. \n\n<context>{context}</context>\n<chat_history>{chat_history}</chat_history>\n<question>{question}</question>\n\nAI: Here is the most relevant sentence in the context: \n",
"file": {
"filename": "None",
"title": "None",
"username": "None",
"state": "Unavailable"
}
}
response = requests.post(url, json=payload)
if response.status_code == 200:
return response.json().get("answer", "Error: No answer returned")
else:
return f"Error: {response.status_code}"
gr.ChatInterface(api_response).launch()

@ -20,7 +20,7 @@ from fastapi.routing import APIRouter
from fastapi.staticfiles import StaticFiles
from huggingface_hub import login
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.memory import VectorStoreRetrieverMemory
from langchain.memory import ConversationStringBufferMemory
from langchain.memory.chat_message_histories.in_memory import ChatMessageHistory
from langchain.prompts.prompt import PromptTemplate
from langchain_community.chat_models import ChatOpenAI
@ -184,27 +184,27 @@ async def create_chain(
elif message.role == Role.SYSTEM:
chat_memory.add_message(message)
# memory = ConversationSummaryBufferMemory(
# llm=llm,
# chat_memory=chat_memory,
# memory_key="chat_history",
memory = ConversationStringBufferMemory(
llm=llm,
chat_memory=chat_memory,
memory_key="chat_history",
input_key="question",
output_key="answer",
prompt=SUMMARY_PROMPT_TEMPLATE,
return_messages=False,
)
# memory = VectorStoreRetrieverMemory(
# input_key="question",
# output_key="answer",
# prompt=SUMMARY_PROMPT_TEMPLATE,
# chat_memory=chat_memory,
# memory_key="chat_history",
# return_docs=False, # Change this to False
# retriever=retriever,
# return_messages=True,
# prompt=SUMMARY_PROMPT_TEMPLATE
# )
memory = VectorStoreRetrieverMemory(
input_key="question",
output_key="answer",
chat_memory=chat_memory,
memory_key="chat_history",
return_docs=True, # Change this to False
retriever=retriever,
return_messages=True,
prompt=SUMMARY_PROMPT_TEMPLATE
)
question_generator = LLMChain(
llm=llm,
prompt=CONDENSE_PROMPT_TEMPLATE,

Loading…
Cancel
Save