From 0d98d1a28c32f408c8b74501bd9ae506a98bba65 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Fri, 17 Nov 2023 22:21:29 +0800 Subject: [PATCH] fix quantization Former-commit-id: ccb0f58e22f55b15531fd0e85f5935b150575bec --- src/llmtuner/model/loader.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/llmtuner/model/loader.py b/src/llmtuner/model/loader.py index 4d2e1974..20b9b5d4 100644 --- a/src/llmtuner/model/loader.py +++ b/src/llmtuner/model/loader.py @@ -168,17 +168,12 @@ def load_model_and_tokenizer( config_kwargs["device_map"] = {"": get_current_device()} logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit)) - if is_deepspeed_zero3_enabled() or getattr(config, "model_type", None) == "chatglm": - low_cpu_mem_usage = False - else: - low_cpu_mem_usage = True - # Load pre-trained models (without valuehead) model = AutoModelForCausalLM.from_pretrained( model_to_load, config=config, torch_dtype=model_args.compute_dtype, - low_cpu_mem_usage=low_cpu_mem_usage, + low_cpu_mem_usage=(not is_deepspeed_zero3_enabled()), **config_kwargs )