support infer 4bit model on GPUs #3023

This commit is contained in:
hiyouga
2024-04-01 17:34:04 +08:00
parent d0842f6828
commit eb259cc573
2 changed files with 14 additions and 6 deletions

View File

@@ -53,6 +53,10 @@ class ModelArguments:
default=True,
metadata={"help": "Whether or not to use double quantization in int4 training."},
)
quantization_device_map: Optional[Literal["auto"]] = field(
default=None,
metadata={"help": "Device map used for loading the 4-bit quantized model, needs bitsandbytes>=0.43.0."},
)
rope_scaling: Optional[Literal["linear", "dynamic"]] = field(
default=None,
metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."},