[release] Bye 2025 (#9702)

This commit is contained in:
Yaowei Zheng
2025-12-31 22:22:40 +08:00
committed by GitHub
parent 000526908a
commit 95ac3f2373
59 changed files with 154 additions and 401 deletions

View File

@@ -1,10 +1,10 @@
### model
model_name_or_path: saves/llama3-8b/full/sft
template: llama3
model_name_or_path: saves/qwen3-4b/full/sft
template: qwen3_nothink
trust_remote_code: true
### export
export_dir: output/llama3_full_sft
export_dir: saves/qwen3_sft_merged
export_size: 5
export_device: cpu # choices: [cpu, auto]
export_legacy_format: false

View File

@@ -1,10 +1,10 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
template: llama3
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
template: qwen3_nothink
trust_remote_code: true
### export
export_dir: output/llama3_gptq
export_dir: saves/qwen3_gptq
export_quantization_bit: 4
export_quantization_dataset: data/c4_demo.jsonl
export_size: 5

View File

@@ -1,13 +1,13 @@
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path: saves/llama3-8b/lora/sft
template: llama3
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
adapter_name_or_path: saves/qwen3-4b/lora/sft
template: qwen3_nothink
trust_remote_code: true
### export
export_dir: output/llama3_lora_sft
export_dir: saves/qwen3_sft_merged
export_size: 5
export_device: cpu # choices: [cpu, auto]
export_legacy_format: false

View File

@@ -1,13 +1,13 @@
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
### model
model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
adapter_name_or_path: saves/qwen2_5vl-7b/lora/sft
template: qwen2_vl
model_name_or_path: Qwen/Qwen3-VL-4B-Instruct
adapter_name_or_path: saves/qwen3-vl-4b/lora/sft
template: qwen3_vl_nothink
trust_remote_code: true
### export
export_dir: output/qwen2_5vl_lora_sft
export_dir: saves/qwen3_vl_sft_merged
export_size: 5
export_device: cpu # choices: [cpu, auto]
export_legacy_format: false