From 15f6ab73a578d3188168d4d82e7d39b93bbc02a1 Mon Sep 17 00:00:00 2001
From: MengqingCao <cmq0113@163.com>
Date: Wed, 5 Jun 2024 07:06:40 +0000
Subject: [PATCH 1/4] add npu for model export

Former-commit-id: 07045c876a4c6025ba56905b65cce6c9faedacb5
---
 src/llamafactory/hparams/model_args.py      | 4 ++--
 src/llamafactory/webui/components/export.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 7003cbee..99c02850 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -145,9 +145,9 @@ class ModelArguments:
         default=1,
         metadata={"help": "The file shard size (in GB) of the exported model."},
     )
-    export_device: Literal["cpu", "cuda"] = field(
+    export_device: Literal["cpu", "cuda", "npu"] = field(
         default="cpu",
-        metadata={"help": "The device used in model export, use cuda to avoid addmm errors."},
+        metadata={"help": "The device used in model export, use cuda to avoid addmm errors; use npu/cuda to speed up exporting."},
     )
     export_quantization_bit: Optional[int] = field(
         default=None,
diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py
index 2f354011..32bedffb 100644
--- a/src/llamafactory/webui/components/export.py
+++ b/src/llamafactory/webui/components/export.py
@@ -89,7 +89,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
         export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1)
         export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none")
         export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
-        export_device = gr.Radio(choices=["cpu", "cuda"], value="cpu")
+        export_device = gr.Radio(choices=["cpu", "cuda", "npu"], value="cpu")
         export_legacy_format = gr.Checkbox()
 
     with gr.Row():

From 71b9b87d88179fab9702e2c6410894555aef12f1 Mon Sep 17 00:00:00 2001
From: MengqingCao <cmq0113@163.com>
Date: Wed, 5 Jun 2024 09:37:36 +0000
Subject: [PATCH 2/4] modify export_device option

Former-commit-id: 2c0305266291fdb163c4445db227d4c77529f3b5
---
 src/llamafactory/webui/components/export.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py
index 32bedffb..791a833b 100644
--- a/src/llamafactory/webui/components/export.py
+++ b/src/llamafactory/webui/components/export.py
@@ -89,7 +89,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
         export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1)
         export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none")
         export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
-        export_device = gr.Radio(choices=["cpu", "cuda", "npu"], value="cpu")
+        export_device = gr.Radio(choices=["cpu", "cuda/npu"], value="cpu")
         export_legacy_format = gr.Checkbox()
 
     with gr.Row():

From d9a372658aa51cbda7e12520a6b58df8780e711b Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 6 Jun 2024 03:14:23 +0800
Subject: [PATCH 3/4] Update model_args.py

Former-commit-id: af2c3cbee4372724cf00379d8494f5000dc8a3df
---
 src/llamafactory/hparams/model_args.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llamafactory/hparams/model_args.py b/src/llamafactory/hparams/model_args.py
index 99c02850..024bc2f8 100644
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -145,9 +145,9 @@ class ModelArguments:
         default=1,
         metadata={"help": "The file shard size (in GB) of the exported model."},
     )
-    export_device: Literal["cpu", "cuda", "npu"] = field(
+    export_device: Literal["cpu", "auto"] = field(
         default="cpu",
-        metadata={"help": "The device used in model export, use cuda to avoid addmm errors; use npu/cuda to speed up exporting."},
+        metadata={"help": "The device used in model export, use `auto` to accelerate exporting."},
     )
     export_quantization_bit: Optional[int] = field(
         default=None,

From 0b7cf306f71b45790ce64af8c5ac470b8a87dc36 Mon Sep 17 00:00:00 2001
From: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Date: Thu, 6 Jun 2024 03:14:46 +0800
Subject: [PATCH 4/4] Update export.py

Former-commit-id: feaee36c4642558c6c4e069b0fd42c20691e3ff4
---
 src/llamafactory/webui/components/export.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py
index 791a833b..7e1493c8 100644
--- a/src/llamafactory/webui/components/export.py
+++ b/src/llamafactory/webui/components/export.py
@@ -89,7 +89,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
         export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1)
         export_quantization_bit = gr.Dropdown(choices=["none"] + GPTQ_BITS, value="none")
         export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
-        export_device = gr.Radio(choices=["cpu", "cuda/npu"], value="cpu")
+        export_device = gr.Radio(choices=["cpu", "auto"], value="cpu")
         export_legacy_format = gr.Checkbox()
 
     with gr.Row():