[release] Bye 2025 (#9702)

This commit is contained in:
Yaowei Zheng
2025-12-31 22:22:40 +08:00
committed by GitHub
parent 000526908a
commit 95ac3f2373
59 changed files with 154 additions and 401 deletions

View File

@@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4

View File

@@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4

View File

@@ -14,7 +14,6 @@ dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4

View File

@@ -1,5 +1,5 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
quantization_bit: 4
quantization_method: bnb
double_quantization: false
@@ -14,15 +14,14 @@ lora_target: all
### dataset
dataset: identity,alpaca_en_demo
template: llama3
template: qwen3_nothink
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/sft
output_dir: saves/qwen3-4b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true

View File

@@ -1,5 +1,5 @@
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
model_name_or_path: Qwen/Qwen3-4B-Instruct-2507
quantization_bit: 4 # choices: [8 (bnb/hqq/eetq), 4 (bnb/hqq), 3 (hqq), 2 (hqq)]
quantization_method: bnb # choices: [bnb, hqq, eetq]
trust_remote_code: true
@@ -13,15 +13,14 @@ lora_target: all
### dataset
dataset: identity,alpaca_en_demo
template: llama3
template: qwen3_nothink
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
### output
output_dir: saves/llama3-8b/lora/sft
output_dir: saves/qwen3-4b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true