mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2026-01-10 08:00:36 +08:00
[release] Bye 2025 (#9702)
This commit is contained in:
@@ -1,10 +1,10 @@
|
||||
### model
|
||||
model_name_or_path: saves/llama3-8b/full/sft
|
||||
template: llama3
|
||||
model_name_or_path: saves/qwen3-4b/full/sft
|
||||
template: qwen3_nothink
|
||||
trust_remote_code: true
|
||||
|
||||
### export
|
||||
export_dir: output/llama3_full_sft
|
||||
export_dir: saves/qwen3_sft_merged
|
||||
export_size: 5
|
||||
export_device: cpu # choices: [cpu, auto]
|
||||
export_legacy_format: false
|
||||
@@ -1,10 +1,10 @@
|
||||
### model
|
||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
template: llama3
|
||||
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||
template: qwen3_nothink
|
||||
trust_remote_code: true
|
||||
|
||||
### export
|
||||
export_dir: output/llama3_gptq
|
||||
export_dir: saves/qwen3_gptq
|
||||
export_quantization_bit: 4
|
||||
export_quantization_dataset: data/c4_demo.jsonl
|
||||
export_size: 5
|
||||
@@ -1,13 +1,13 @@
|
||||
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
|
||||
|
||||
### model
|
||||
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
adapter_name_or_path: saves/llama3-8b/lora/sft
|
||||
template: llama3
|
||||
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
|
||||
adapter_name_or_path: saves/qwen3-4b/lora/sft
|
||||
template: qwen3_nothink
|
||||
trust_remote_code: true
|
||||
|
||||
### export
|
||||
export_dir: output/llama3_lora_sft
|
||||
export_dir: saves/qwen3_sft_merged
|
||||
export_size: 5
|
||||
export_device: cpu # choices: [cpu, auto]
|
||||
export_legacy_format: false
|
||||
@@ -1,13 +1,13 @@
|
||||
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
|
||||
|
||||
### model
|
||||
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
|
||||
adapter_name_or_path: saves/qwen2_5vl-7b/lora/sft
|
||||
template: qwen2_vl
|
||||
model_name_or_path: Qwen/Qwen3-VL-4B-Instruct
|
||||
adapter_name_or_path: saves/qwen3-vl-4b/lora/sft
|
||||
template: qwen3_vl_nothink
|
||||
trust_remote_code: true
|
||||
|
||||
### export
|
||||
export_dir: output/qwen2_5vl_lora_sft
|
||||
export_dir: saves/qwen3_vl_sft_merged
|
||||
export_size: 5
|
||||
export_device: cpu # choices: [cpu, auto]
|
||||
export_legacy_format: false
|
||||
Reference in New Issue
Block a user