kempt-kinkajou-2023/weather_platform/apps/py-tg-bot-weather-agent/examples/llama_test.py

from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

template = """Question: {question}

Answer: Let's work this out in a step by step way to be sure we have the right answer."""

prompt = PromptTemplate(template=template, input_variables=["question"])

# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Verbose is required to pass to the callback manager

n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="ggml-model-q4_0.bin",
    n_gpu_layers=n_gpu_layers,
    n_ctx=2048,
    ctx=2048,
    n_batch=n_batch,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True,
)

llm_chain = LLMChain(prompt=prompt, llm=llm)

question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"

llm_chain.run(question)
feat: TelegramBot with OpenAI assistant support 1 year ago			`from langchain.llms import LlamaCpp`
			`from langchain import PromptTemplate, LLMChain`
			`from langchain.callbacks.manager import CallbackManager`
			`from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler`

			`template = """Question: {question}`

			`Answer: Let's work this out in a step by step way to be sure we have the right answer."""`

			`prompt = PromptTemplate(template=template, input_variables=["question"])`

			`# Callbacks support token-wise streaming`
			`callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])`
			`# Verbose is required to pass to the callback manager`

			`n_gpu_layers = 1 # Metal set to 1 is enough.`
			`n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.`

			`# Make sure the model path is correct for your system!`
			`llm = LlamaCpp(`
			`model_path="ggml-model-q4_0.bin",`
			`n_gpu_layers=n_gpu_layers,`
			`n_ctx=2048,`
			`ctx=2048,`
			`n_batch=n_batch,`
			`f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls`
			`callback_manager=callback_manager,`
			`verbose=True,`
			`)`

			`llm_chain = LLMChain(prompt=prompt, llm=llm)`

			`question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"`

			`llm_chain.run(question)`