mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-16 20:00:36 +08:00
[model] temporarily support npu fused options on v0, powered by v1 kernels (#9520)
Co-authored-by: frozenleaves <frozen@Mac.local>
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
prefill_device: "cuda"
|
||||
|
||||
- match:
|
||||
name: "^lm_head$" # regular expression
|
||||
name: "^lm_head$" # regular expression
|
||||
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||
replace:
|
||||
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||
@@ -18,7 +18,7 @@
|
||||
prefill_op: "KLinearTorch"
|
||||
|
||||
# - match:
|
||||
# name: "^model\\.layers\\..*$" # regular expression
|
||||
# name: "^model\\.layers\\..*$" # regular expression
|
||||
# class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||
# replace:
|
||||
# class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||
@@ -28,7 +28,7 @@
|
||||
# generate_op: "KLinearTorch"
|
||||
# prefill_op: "KLinearTorch"
|
||||
- match:
|
||||
name: "^model\\.layers\\.(?!.*mlp\\.shared_expert_gate).*$" # regular expression
|
||||
name: "^model\\.layers\\.(?!.*mlp\\.shared_expert_gate).*$" # regular expression
|
||||
class: torch.nn.Linear # only match modules matching name and class simultaneously
|
||||
replace:
|
||||
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
|
||||
@@ -77,4 +77,4 @@
|
||||
replace:
|
||||
class: "ktransformers.operators.models.KQwen3MoeModel"
|
||||
kwargs:
|
||||
per_layer_prefill_intput_threshold: 0
|
||||
per_layer_prefill_intput_threshold: 0
|
||||
|
||||
Reference in New Issue
Block a user