From 7bd4c59b7e0941fe37647a2be0d18732db982059 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sat, 12 Aug 2023 00:25:29 +0800 Subject: [PATCH] fix unusual output of 8bit models #278 #391 Former-commit-id: dd51c242032ce3f878cb191dc144536db4a2bb45 --- src/llmtuner/extras/misc.py | 3 +++ src/llmtuner/tuner/core/loader.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/llmtuner/extras/misc.py b/src/llmtuner/extras/misc.py index ee918fbb..b0a7365c 100644 --- a/src/llmtuner/extras/misc.py +++ b/src/llmtuner/extras/misc.py @@ -142,6 +142,9 @@ def dispatch_model(model: "PreTrainedModel") -> "PreTrainedModel": Dispatches a pre-trained model to GPUs with balanced memory. Borrowed from: https://github.com/huggingface/transformers/blob/v4.31.0/src/transformers/modeling_utils.py#L2803 """ + if getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False): # do nothing + return model + if torch.cuda.device_count() > 1: from accelerate import dispatch_model from accelerate.utils import infer_auto_device_map, get_balanced_memory diff --git a/src/llmtuner/tuner/core/loader.py b/src/llmtuner/tuner/core/loader.py index 39bec1d8..74b4b59f 100644 --- a/src/llmtuner/tuner/core/loader.py +++ b/src/llmtuner/tuner/core/loader.py @@ -92,7 +92,7 @@ def load_model_and_tokenizer( ) is_mergeable = False - config_kwargs["device_map"] = {"": int(os.environ.get("LOCAL_RANK", "0"))} + config_kwargs["device_map"] = {"": int(os.environ.get("LOCAL_RANK", "0"))} if is_trainable else "auto" logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit)) # Load and prepare pretrained models (without valuehead).