mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-04 20:52:59 +08:00
tiny fix
Former-commit-id: 415bca900e5cc3afaddd5b06d35f472d9ead3263
This commit is contained in:
parent
9b98790fb3
commit
6ee32cf71c
@ -45,7 +45,7 @@ class FinetuningArguments:
|
|||||||
default=None,
|
default=None,
|
||||||
metadata={"help": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules. \
|
metadata={"help": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules. \
|
||||||
LLaMA choices: [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
|
LLaMA choices: [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
|
||||||
BLOOM & Falcon & ChatGLM choices: [\"query_key_value\", \"self_attention.dense\", \"mlp.dense_h_to_4h\", \"mlp.dense_4h_to_h\"], \
|
BLOOM & Falcon & ChatGLM choices: [\"query_key_value\", \"dense\", \"dense_h_to_4h\", \"dense_4h_to_h\"], \
|
||||||
Baichuan choices: [\"W_pack\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
|
Baichuan choices: [\"W_pack\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
|
||||||
Qwen choices: [\"c_attn\", \"attn.c_proj\", \"w1\", \"w2\", \"mlp.c_proj\"], \
|
Qwen choices: [\"c_attn\", \"attn.c_proj\", \"w1\", \"w2\", \"mlp.c_proj\"], \
|
||||||
Phi-1.5 choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \
|
Phi-1.5 choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \
|
||||||
|
@ -50,7 +50,7 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
|
self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
|
||||||
|
|
||||||
def _prepare_deepspeed(self, model: "PreTrainedModelWrapper"):
|
def _prepare_deepspeed(self, model: "PreTrainedModelWrapper"):
|
||||||
# Adapted from accelerate: https://github.com/huggingface/accelerate/blob/739b135f8367becb67ffaada12fe76e3aa60fefd/src/accelerate/accelerator.py#L1473
|
# adapted from accelerate: https://github.com/huggingface/accelerate/blob/739b135f8367becb67ffaada12fe76e3aa60fefd/src/accelerate/accelerator.py#L1473
|
||||||
deepspeed_plugin = self.accelerator.state.deepspeed_plugin
|
deepspeed_plugin = self.accelerator.state.deepspeed_plugin
|
||||||
config_kwargs = deepcopy(deepspeed_plugin.deepspeed_config)
|
config_kwargs = deepcopy(deepspeed_plugin.deepspeed_config)
|
||||||
if model is not None:
|
if model is not None:
|
||||||
@ -75,7 +75,8 @@ class CustomDPOTrainer(DPOTrainer):
|
|||||||
# Otherwise, we assume the reference model fits in memory and is initialized on each device with ZeRO disabled (stage 0)
|
# Otherwise, we assume the reference model fits in memory and is initialized on each device with ZeRO disabled (stage 0)
|
||||||
if config_kwargs["zero_optimization"]["stage"] != 3:
|
if config_kwargs["zero_optimization"]["stage"] != 3:
|
||||||
config_kwargs["zero_optimization"]["stage"] = 0
|
config_kwargs["zero_optimization"]["stage"] = 0
|
||||||
# lazy load
|
|
||||||
|
# Lazy load
|
||||||
import deepspeed # type: ignore
|
import deepspeed # type: ignore
|
||||||
model, *_ = deepspeed.initialize(model=model, config=config_kwargs)
|
model, *_ = deepspeed.initialize(model=model, config=config_kwargs)
|
||||||
model.eval()
|
model.eval()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user