You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
36 lines
1.2 KiB
36 lines
1.2 KiB
1 year ago
|
from langchain.llms import LlamaCpp
|
||
|
from langchain import PromptTemplate, LLMChain
|
||
|
from langchain.callbacks.manager import CallbackManager
|
||
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||
|
|
||
|
template = """Question: {question}
|
||
|
|
||
|
Answer: Let's work this out in a step by step way to be sure we have the right answer."""
|
||
|
|
||
|
prompt = PromptTemplate(template=template, input_variables=["question"])
|
||
|
|
||
|
# Callbacks support token-wise streaming
|
||
|
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
||
|
# Verbose is required to pass to the callback manager
|
||
|
|
||
|
n_gpu_layers = 1 # Metal set to 1 is enough.
|
||
|
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
|
||
|
|
||
|
# Make sure the model path is correct for your system!
|
||
|
llm = LlamaCpp(
|
||
|
model_path="ggml-model-q4_0.bin",
|
||
|
n_gpu_layers=n_gpu_layers,
|
||
|
n_ctx=2048,
|
||
|
ctx=2048,
|
||
|
n_batch=n_batch,
|
||
|
f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
|
||
|
callback_manager=callback_manager,
|
||
|
verbose=True,
|
||
|
)
|
||
|
|
||
|
llm_chain = LLMChain(prompt=prompt, llm=llm)
|
||
|
|
||
|
question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
|
||
|
|
||
|
llm_chain.run(question)
|