diff --git a/examples/ktransformers/infer_lora/deepseek2_lora_sft_kt.yaml b/examples/ktransformers/infer_lora/deepseek2_lora_sft_kt.yaml index d6b35ac0f..cd10c83f1 100644 --- a/examples/ktransformers/infer_lora/deepseek2_lora_sft_kt.yaml +++ b/examples/ktransformers/infer_lora/deepseek2_lora_sft_kt.yaml @@ -5,6 +5,6 @@ infer_backend: ktransformers # choices: [huggingface, vllm, sglang, ktransforme trust_remote_code: true use_kt: true # use KTransformers as LoRA sft backend to inference -kt_optimize_rule: examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml +kt_optimize_rule: examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml cpu_infer: 32 chunk_size: 8192 diff --git a/examples/ktransformers/infer_lora/deepseek3_kt.yaml b/examples/ktransformers/infer_lora/deepseek3_kt.yaml index 76c516b19..6534d4078 100644 --- a/examples/ktransformers/infer_lora/deepseek3_kt.yaml +++ b/examples/ktransformers/infer_lora/deepseek3_kt.yaml @@ -1,9 +1,9 @@ model_name_or_path: opensourcerelease/DeepSeek-V3-bf16 -template: deepseek +template: deepseek3 infer_backend: ktransformers # choices: [huggingface, vllm, sglang, ktransformers] trust_remote_code: true use_kt: true # use KTransformers as LoRA sft backend to inference -kt_optimize_rule: examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml +kt_optimize_rule: examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml cpu_infer: 32 chunk_size: 8192 diff --git a/examples/ktransformers/infer_lora/deepseek3_lora_sft_kt.yaml b/examples/ktransformers/infer_lora/deepseek3_lora_sft_kt.yaml index 3fbc76ded..a7171eb4a 100644 --- a/examples/ktransformers/infer_lora/deepseek3_lora_sft_kt.yaml +++ b/examples/ktransformers/infer_lora/deepseek3_lora_sft_kt.yaml @@ -1,10 +1,10 @@ model_name_or_path: opensourcerelease/DeepSeek-V3-bf16 adapter_name_or_path: saves/Kllama_deepseekV3 -template: deepseek +template: deepseek3 infer_backend: ktransformers # choices: [huggingface, vllm, sglang, ktransformers] trust_remote_code: true use_kt: true # use KTransformers as LoRA sft backend to inference -kt_optimize_rule: examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml +kt_optimize_rule: examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml cpu_infer: 32 chunk_size: 8192 diff --git a/examples/ktransformers/infer_lora/qwen3moe_lora_sft_kt.yaml b/examples/ktransformers/infer_lora/qwen3moe_lora_sft_kt.yaml index a84959069..c71321c33 100644 --- a/examples/ktransformers/infer_lora/qwen3moe_lora_sft_kt.yaml +++ b/examples/ktransformers/infer_lora/qwen3moe_lora_sft_kt.yaml @@ -5,6 +5,6 @@ infer_backend: ktransformers # choices: [huggingface, vllm, sglang, ktransforme trust_remote_code: true use_kt: true # use KTransformers as LoRA sft backend to inference -kt_optimize_rule: examples/kt_optimize_rules/Qwen3Moe-sft-amx.yaml +kt_optimize_rule: examples/ktransformers/kt_optimize_rules/Qwen3Moe-sft-amx.yaml cpu_infer: 32 chunk_size: 8192 diff --git a/examples/ktransformers/train_lora/deepseek2_lora_sft_kt.yaml b/examples/ktransformers/train_lora/deepseek2_lora_sft_kt.yaml index eacb521ef..0d4da3dd3 100644 --- a/examples/ktransformers/train_lora/deepseek2_lora_sft_kt.yaml +++ b/examples/ktransformers/train_lora/deepseek2_lora_sft_kt.yaml @@ -10,7 +10,7 @@ lora_rank: 8 lora_target: all ### dataset -dataset: identity +dataset: identity, alpaca_en_demo template: deepseek cutoff_len: 2048 max_samples: 100000 @@ -40,7 +40,7 @@ resume_from_checkpoint: null ### ktransformers use_kt: true # use KTransformers as LoRA sft backend -kt_optimize_rule: examples/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml +kt_optimize_rule: examples/ktransformers/kt_optimize_rules/DeepSeek-V2-Lite-Chat-sft-amx.yaml cpu_infer: 32 chunk_size: 8192 diff --git a/examples/ktransformers/train_lora/deepseek3_lora_sft_kt.yaml b/examples/ktransformers/train_lora/deepseek3_lora_sft_kt.yaml index 02d77661b..3ee95aaa5 100644 --- a/examples/ktransformers/train_lora/deepseek3_lora_sft_kt.yaml +++ b/examples/ktransformers/train_lora/deepseek3_lora_sft_kt.yaml @@ -10,8 +10,8 @@ lora_rank: 8 lora_target: all ### dataset -dataset: identity -template: deepseek +dataset: identity, alpaca_en_demo +template: deepseek3 cutoff_len: 2048 max_samples: 100000 overwrite_cache: true @@ -40,7 +40,7 @@ resume_from_checkpoint: null ### ktransformers use_kt: true # use KTransformers as LoRA sft backend -kt_optimize_rule: examples/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml +kt_optimize_rule: examples/ktransformers/kt_optimize_rules/DeepSeek-V3-Chat-sft-amx-multi-gpu.yaml cpu_infer: 32 chunk_size: 8192 diff --git a/examples/ktransformers/train_lora/qwen3moe_lora_sft_kt.yaml b/examples/ktransformers/train_lora/qwen3moe_lora_sft_kt.yaml index 8567a35f4..ee4e6e95d 100644 --- a/examples/ktransformers/train_lora/qwen3moe_lora_sft_kt.yaml +++ b/examples/ktransformers/train_lora/qwen3moe_lora_sft_kt.yaml @@ -40,7 +40,7 @@ resume_from_checkpoint: null ### ktransformers use_kt: true # use KTransformers as LoRA sft backend -kt_optimize_rule: examples/kt_optimize_rules/Qwen3Moe-sft-amx.yaml +kt_optimize_rule: examples/ktransformers/kt_optimize_rules/Qwen3Moe-sft-amx.yaml cpu_infer: 32 chunk_size: 8192