mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-02 03:32:50 +08:00
release v0.9.0 (real)
Former-commit-id: 90d6df622252c6fad985f68b97771c979357e2fc
This commit is contained in:
parent
561ae4d1af
commit
3aefdad4ec
@ -24,10 +24,12 @@ WANDB_DISABLED=
|
|||||||
WANDB_PROJECT=huggingface
|
WANDB_PROJECT=huggingface
|
||||||
WANDB_API_KEY=
|
WANDB_API_KEY=
|
||||||
# gradio ui
|
# gradio ui
|
||||||
GRADIO_SHARE=0
|
GRADIO_SHARE=False
|
||||||
GRADIO_SERVER_NAME=0.0.0.0
|
GRADIO_SERVER_NAME=0.0.0.0
|
||||||
GRADIO_SERVER_PORT=
|
GRADIO_SERVER_PORT=
|
||||||
GRADIO_ROOT_PATH=
|
GRADIO_ROOT_PATH=
|
||||||
|
# setup
|
||||||
|
ENABLE_SHORT_CONSOLE=1
|
||||||
# reserved (do not use)
|
# reserved (do not use)
|
||||||
LLAMABOARD_ENABLED=
|
LLAMABOARD_ENABLED=
|
||||||
LLAMABOARD_WORKDIR=
|
LLAMABOARD_WORKDIR=
|
||||||
|
@ -275,7 +275,7 @@ You also can add a custom chat template to [template.py](src/llamafactory/data/t
|
|||||||
- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
|
- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
|
||||||
- [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1)
|
- [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1)
|
||||||
- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
|
- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
|
||||||
- [Pokemon-gpt4o-captions](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions)
|
- [Pokemon-gpt4o-captions (en&zh)](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions)
|
||||||
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
|
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
|
||||||
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
|
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
|
||||||
- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
|
- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
|
||||||
|
@ -276,7 +276,7 @@ https://github.com/user-attachments/assets/e6ce34b0-52d5-4f3e-a830-592106c4c272
|
|||||||
- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
|
- [Magpie-Pro-300K-Filtered (en)](https://huggingface.co/datasets/Magpie-Align/Magpie-Pro-300K-Filtered)
|
||||||
- [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1)
|
- [Magpie-ultra-v0.1 (en)](https://huggingface.co/datasets/argilla/magpie-ultra-v0.1)
|
||||||
- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
|
- [LLaVA mixed (en&zh)](https://huggingface.co/datasets/BUAADreamer/llava-en-zh-300k)
|
||||||
- [Pokemon-gpt4o-captions](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions)
|
- [Pokemon-gpt4o-captions (en&zh)](https://huggingface.co/datasets/jugg1024/pokemon-gpt4o-captions)
|
||||||
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
|
- [Open Assistant (de)](https://huggingface.co/datasets/mayflowergmbh/oasst_de)
|
||||||
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
|
- [Dolly 15k (de)](https://huggingface.co/datasets/mayflowergmbh/dolly-15k_de)
|
||||||
- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
|
- [Alpaca GPT4 (de)](https://huggingface.co/datasets/mayflowergmbh/alpaca-gpt4_de)
|
||||||
|
@ -131,6 +131,9 @@ def calculate_mfu(
|
|||||||
"dataset": "c4_demo",
|
"dataset": "c4_demo",
|
||||||
"cutoff_len": seq_length,
|
"cutoff_len": seq_length,
|
||||||
"output_dir": os.path.join("saves", "test_mfu"),
|
"output_dir": os.path.join("saves", "test_mfu"),
|
||||||
|
"logging_strategy": "no",
|
||||||
|
"save_strategy": "no",
|
||||||
|
"save_only_model": True,
|
||||||
"overwrite_output_dir": True,
|
"overwrite_output_dir": True,
|
||||||
"per_device_train_batch_size": batch_size,
|
"per_device_train_batch_size": batch_size,
|
||||||
"max_steps": num_steps,
|
"max_steps": num_steps,
|
||||||
|
15
setup.py
15
setup.py
@ -14,11 +14,12 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
from typing import List
|
||||||
|
|
||||||
from setuptools import find_packages, setup
|
from setuptools import find_packages, setup
|
||||||
|
|
||||||
|
|
||||||
def get_version():
|
def get_version() -> str:
|
||||||
with open(os.path.join("src", "llamafactory", "extras", "env.py"), "r", encoding="utf-8") as f:
|
with open(os.path.join("src", "llamafactory", "extras", "env.py"), "r", encoding="utf-8") as f:
|
||||||
file_content = f.read()
|
file_content = f.read()
|
||||||
pattern = r"{}\W*=\W*\"([^\"]+)\"".format("VERSION")
|
pattern = r"{}\W*=\W*\"([^\"]+)\"".format("VERSION")
|
||||||
@ -26,13 +27,21 @@ def get_version():
|
|||||||
return version
|
return version
|
||||||
|
|
||||||
|
|
||||||
def get_requires():
|
def get_requires() -> List[str]:
|
||||||
with open("requirements.txt", "r", encoding="utf-8") as f:
|
with open("requirements.txt", "r", encoding="utf-8") as f:
|
||||||
file_content = f.read()
|
file_content = f.read()
|
||||||
lines = [line.strip() for line in file_content.strip().split("\n") if not line.startswith("#")]
|
lines = [line.strip() for line in file_content.strip().split("\n") if not line.startswith("#")]
|
||||||
return lines
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
def get_console_scripts() -> List[str]:
|
||||||
|
console_scripts = ["llamafactory-cli = llamafactory.cli:main"]
|
||||||
|
if os.environ.get("ENABLE_SHORT_CONSOLE", "1").lower() in ["true", "1"]:
|
||||||
|
console_scripts.append("lmf = llamafactory.cli:main")
|
||||||
|
|
||||||
|
return console_scripts
|
||||||
|
|
||||||
|
|
||||||
extra_require = {
|
extra_require = {
|
||||||
"torch": ["torch>=1.13.1"],
|
"torch": ["torch>=1.13.1"],
|
||||||
"torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"],
|
"torch-npu": ["torch==2.1.0", "torch-npu==2.1.0.post3", "decorator"],
|
||||||
@ -72,7 +81,7 @@ def main():
|
|||||||
python_requires=">=3.8.0",
|
python_requires=">=3.8.0",
|
||||||
install_requires=get_requires(),
|
install_requires=get_requires(),
|
||||||
extras_require=extra_require,
|
extras_require=extra_require,
|
||||||
entry_points={"console_scripts": ["llamafactory-cli = llamafactory.cli:main"]},
|
entry_points={"console_scripts": get_console_scripts()},
|
||||||
classifiers=[
|
classifiers=[
|
||||||
"Development Status :: 4 - Beta",
|
"Development Status :: 4 - Beta",
|
||||||
"Intended Audience :: Developers",
|
"Intended Audience :: Developers",
|
||||||
|
@ -829,7 +829,7 @@ register_model_group(
|
|||||||
|
|
||||||
register_model_group(
|
register_model_group(
|
||||||
models={
|
models={
|
||||||
"MiniCPM3-4B": {
|
"MiniCPM3-4B-Chat": {
|
||||||
DownloadSource.DEFAULT: "openbmb/MiniCPM3-4B",
|
DownloadSource.DEFAULT: "openbmb/MiniCPM3-4B",
|
||||||
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM3-4B",
|
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM3-4B",
|
||||||
},
|
},
|
||||||
|
@ -96,38 +96,45 @@ def fix_valuehead_checkpoint(
|
|||||||
|
|
||||||
|
|
||||||
class FixValueHeadModelCallback(TrainerCallback):
|
class FixValueHeadModelCallback(TrainerCallback):
|
||||||
|
r"""
|
||||||
|
A callback for fixing the checkpoint for valuehead models.
|
||||||
|
"""
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def on_save(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_save(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
r"""
|
||||||
Event called after a checkpoint save.
|
Event called after a checkpoint save.
|
||||||
"""
|
"""
|
||||||
if args.should_save:
|
if args.should_save:
|
||||||
|
output_dir = os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step))
|
||||||
fix_valuehead_checkpoint(
|
fix_valuehead_checkpoint(
|
||||||
model=kwargs.pop("model"),
|
model=kwargs.pop("model"), output_dir=output_dir, safe_serialization=args.save_safetensors
|
||||||
output_dir=os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step)),
|
|
||||||
safe_serialization=args.save_safetensors,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SaveProcessorCallback(TrainerCallback):
|
class SaveProcessorCallback(TrainerCallback):
|
||||||
|
r"""
|
||||||
|
A callback for saving the processor.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, processor: "ProcessorMixin") -> None:
|
def __init__(self, processor: "ProcessorMixin") -> None:
|
||||||
r"""
|
|
||||||
Initializes a callback for saving the processor.
|
|
||||||
"""
|
|
||||||
self.processor = processor
|
self.processor = processor
|
||||||
|
|
||||||
|
@override
|
||||||
|
def on_save(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
|
if args.should_save:
|
||||||
|
output_dir = os.path.join(args.output_dir, "{}-{}".format(PREFIX_CHECKPOINT_DIR, state.global_step))
|
||||||
|
getattr(self.processor, "image_processor").save_pretrained(output_dir)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
|
||||||
Event called at the end of training.
|
|
||||||
"""
|
|
||||||
if args.should_save:
|
if args.should_save:
|
||||||
getattr(self.processor, "image_processor").save_pretrained(args.output_dir)
|
getattr(self.processor, "image_processor").save_pretrained(args.output_dir)
|
||||||
|
|
||||||
|
|
||||||
class PissaConvertCallback(TrainerCallback):
|
class PissaConvertCallback(TrainerCallback):
|
||||||
r"""
|
r"""
|
||||||
Initializes a callback for converting the PiSSA adapter to a normal one.
|
A callback for converting the PiSSA adapter to a normal one.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@override
|
@override
|
||||||
@ -147,9 +154,6 @@ class PissaConvertCallback(TrainerCallback):
|
|||||||
|
|
||||||
@override
|
@override
|
||||||
def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
|
||||||
Event called at the end of training.
|
|
||||||
"""
|
|
||||||
if args.should_save:
|
if args.should_save:
|
||||||
model = kwargs.pop("model")
|
model = kwargs.pop("model")
|
||||||
pissa_init_dir = os.path.join(args.output_dir, "pissa_init")
|
pissa_init_dir = os.path.join(args.output_dir, "pissa_init")
|
||||||
@ -177,21 +181,22 @@ class PissaConvertCallback(TrainerCallback):
|
|||||||
|
|
||||||
|
|
||||||
class LogCallback(TrainerCallback):
|
class LogCallback(TrainerCallback):
|
||||||
|
r"""
|
||||||
|
A callback for logging training and evaluation status.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
r"""
|
# Progress
|
||||||
Initializes a callback for logging training and evaluation status.
|
|
||||||
"""
|
|
||||||
""" Progress """
|
|
||||||
self.start_time = 0
|
self.start_time = 0
|
||||||
self.cur_steps = 0
|
self.cur_steps = 0
|
||||||
self.max_steps = 0
|
self.max_steps = 0
|
||||||
self.elapsed_time = ""
|
self.elapsed_time = ""
|
||||||
self.remaining_time = ""
|
self.remaining_time = ""
|
||||||
self.thread_pool: Optional["ThreadPoolExecutor"] = None
|
self.thread_pool: Optional["ThreadPoolExecutor"] = None
|
||||||
""" Status """
|
# Status
|
||||||
self.aborted = False
|
self.aborted = False
|
||||||
self.do_train = False
|
self.do_train = False
|
||||||
""" Web UI """
|
# Web UI
|
||||||
self.webui_mode = os.environ.get("LLAMABOARD_ENABLED", "0").lower() in ["true", "1"]
|
self.webui_mode = os.environ.get("LLAMABOARD_ENABLED", "0").lower() in ["true", "1"]
|
||||||
if self.webui_mode:
|
if self.webui_mode:
|
||||||
signal.signal(signal.SIGABRT, self._set_abort)
|
signal.signal(signal.SIGABRT, self._set_abort)
|
||||||
@ -233,9 +238,6 @@ class LogCallback(TrainerCallback):
|
|||||||
|
|
||||||
@override
|
@override
|
||||||
def on_init_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_init_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
|
||||||
Event called at the end of the initialization of the `Trainer`.
|
|
||||||
"""
|
|
||||||
if (
|
if (
|
||||||
args.should_save
|
args.should_save
|
||||||
and os.path.exists(os.path.join(args.output_dir, TRAINER_LOG))
|
and os.path.exists(os.path.join(args.output_dir, TRAINER_LOG))
|
||||||
@ -246,9 +248,6 @@ class LogCallback(TrainerCallback):
|
|||||||
|
|
||||||
@override
|
@override
|
||||||
def on_train_begin(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_train_begin(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
|
||||||
Event called at the beginning of training.
|
|
||||||
"""
|
|
||||||
if args.should_save:
|
if args.should_save:
|
||||||
self.do_train = True
|
self.do_train = True
|
||||||
self._reset(max_steps=state.max_steps)
|
self._reset(max_steps=state.max_steps)
|
||||||
@ -256,50 +255,32 @@ class LogCallback(TrainerCallback):
|
|||||||
|
|
||||||
@override
|
@override
|
||||||
def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_train_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
|
||||||
Event called at the end of training.
|
|
||||||
"""
|
|
||||||
self._close_thread_pool()
|
self._close_thread_pool()
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def on_substep_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_substep_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
|
||||||
Event called at the end of an substep during gradient accumulation.
|
|
||||||
"""
|
|
||||||
if self.aborted:
|
if self.aborted:
|
||||||
control.should_epoch_stop = True
|
control.should_epoch_stop = True
|
||||||
control.should_training_stop = True
|
control.should_training_stop = True
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def on_step_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_step_end(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
|
||||||
Event called at the end of a training step.
|
|
||||||
"""
|
|
||||||
if self.aborted:
|
if self.aborted:
|
||||||
control.should_epoch_stop = True
|
control.should_epoch_stop = True
|
||||||
control.should_training_stop = True
|
control.should_training_stop = True
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def on_evaluate(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_evaluate(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
|
||||||
Event called after an evaluation phase.
|
|
||||||
"""
|
|
||||||
if not self.do_train:
|
if not self.do_train:
|
||||||
self._close_thread_pool()
|
self._close_thread_pool()
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def on_predict(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_predict(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
|
||||||
Event called after a successful prediction.
|
|
||||||
"""
|
|
||||||
if not self.do_train:
|
if not self.do_train:
|
||||||
self._close_thread_pool()
|
self._close_thread_pool()
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def on_log(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
def on_log(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs):
|
||||||
r"""
|
|
||||||
Event called after logging the last logs.
|
|
||||||
"""
|
|
||||||
if not args.should_save:
|
if not args.should_save:
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -342,9 +323,6 @@ class LogCallback(TrainerCallback):
|
|||||||
def on_prediction_step(
|
def on_prediction_step(
|
||||||
self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs
|
self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs
|
||||||
):
|
):
|
||||||
r"""
|
|
||||||
Event called after a prediction step.
|
|
||||||
"""
|
|
||||||
if self.do_train:
|
if self.do_train:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user