mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-23 14:22:51 +08:00
Merge pull request #4080 from MengqingCao/npu
Add npu option for model exporting Former-commit-id: ca459f67eb319c262d5bab882e9cb3535a76eb69
This commit is contained in:
commit
d31c9c73c7
@ -153,9 +153,9 @@ class ModelArguments:
|
|||||||
default=1,
|
default=1,
|
||||||
metadata={"help": "The file shard size (in GB) of the exported model."},
|
metadata={"help": "The file shard size (in GB) of the exported model."},
|
||||||
)
|
)
|
||||||
export_device: Literal["cpu", "cuda"] = field(
|
export_device: Literal["cpu", "auto"] = field(
|
||||||
default="cpu",
|
default="cpu",
|
||||||
metadata={"help": "The device used in model export, use cuda to avoid addmm errors."},
|
metadata={"help": "The device used in model export, use `auto` to accelerate exporting."},
|
||||||
)
|
)
|
||||||
export_quantization_bit: Optional[int] = field(
|
export_quantization_bit: Optional[int] = field(
|
||||||
default=None,
|
default=None,
|
||||||
|
@ -89,7 +89,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
|
|||||||
export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1)
|
export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1)
|
||||||
export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none")
|
export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none")
|
||||||
export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
|
export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
|
||||||
export_device = gr.Radio(choices=["cpu", "cuda"], value="cpu")
|
export_device = gr.Radio(choices=["cpu", "auto"], value="cpu")
|
||||||
export_legacy_format = gr.Checkbox()
|
export_legacy_format = gr.Checkbox()
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user