mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-29 18:20:35 +08:00
[model] temporarily support npu fused options on v0, powered by v1 kernels (#9520)
Co-authored-by: frozenleaves <frozen@Mac.local>
This commit is contained in:
@@ -174,6 +174,10 @@ class BaseModelArguments:
|
||||
default=True,
|
||||
metadata={"help": "Whether or not to use KV cache in generation."},
|
||||
)
|
||||
use_v1_kernels: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether or not to use high-performance kernels in training."},
|
||||
)
|
||||
infer_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field(
|
||||
default="auto",
|
||||
metadata={"help": "Data type for model weights and activations at inference."},
|
||||
|
||||
Reference in New Issue
Block a user