|
|
|
@ -12,6 +12,7 @@ class Lumo:
|
|
|
|
|
"""
|
|
|
|
|
A class for generating text using the Lumo model with 4-bit quantization.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
"""
|
|
|
|
|
Initializes the Lumo model with 4-bit quantization and a tokenizer.
|
|
|
|
@ -21,7 +22,7 @@ class Lumo:
|
|
|
|
|
load_in_4bit=True,
|
|
|
|
|
bnb_4bit_quant_type="nf4",
|
|
|
|
|
bnb_4bit_compute_dtype=torch.float16,
|
|
|
|
|
llm_int8_enable_fp32_cpu_offload=True
|
|
|
|
|
llm_int8_enable_fp32_cpu_offload=True,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.model = LlamaForCausalLM.from_pretrained(
|
|
|
|
@ -29,9 +30,11 @@ class Lumo:
|
|
|
|
|
device_map="auto",
|
|
|
|
|
quantization_config=bnb_config,
|
|
|
|
|
use_cache=False,
|
|
|
|
|
attn_implementation="sdpa"
|
|
|
|
|
attn_implementation="sdpa",
|
|
|
|
|
)
|
|
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
|
"lumolabs-ai/Lumo-70B-Instruct"
|
|
|
|
|
)
|
|
|
|
|
self.tokenizer = AutoTokenizer.from_pretrained("lumolabs-ai/Lumo-70B-Instruct")
|
|
|
|
|
|
|
|
|
|
def run(self, task: str) -> str:
|
|
|
|
|
"""
|
|
|
|
@ -43,11 +46,13 @@ class Lumo:
|
|
|
|
|
Returns:
|
|
|
|
|
str: The generated text.
|
|
|
|
|
"""
|
|
|
|
|
inputs = self.tokenizer(task, return_tensors="pt").to(self.model.device)
|
|
|
|
|
inputs = self.tokenizer(task, return_tensors="pt").to(
|
|
|
|
|
self.model.device
|
|
|
|
|
)
|
|
|
|
|
outputs = self.model.generate(**inputs, max_new_tokens=100)
|
|
|
|
|
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return self.tokenizer.decode(
|
|
|
|
|
outputs[0], skip_special_tokens=True
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Agent(
|
|
|
|
|