update gc kwargs

Former-commit-id: 0ae9a16b9d13bc1093662aa0b9bd990400ec2646
This commit is contained in:
hiyouga 2024-02-07 00:38:24 +08:00
parent caeffc780d
commit bbe5ff0570

View File

@ -227,7 +227,9 @@ def _prepare_model_for_training(
if not getattr(model, "supports_gradient_checkpointing", False):
logger.warning("Current model does not support gradient checkpointing.")
else:
model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": False})
# use_reentrant=False might increase VRAM usage (have not been empirically verified yet)
# According to: https://github.com/huggingface/transformers/issues/28339
model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": True})
model.enable_input_require_grads()
model.config.use_cache = False # turn off when gradient checkpointing is enabled
logger.info("Gradient checkpointing enabled.")