From 7ccb86b215b318e1029127bf0f0f243a32bb0312 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Sun, 8 Sep 2024 00:56:56 +0800 Subject: [PATCH] add docstrings, refactor logger Former-commit-id: 54c69059379d77dc9046c144cbe2d0253de3a4da --- .env.local | 33 +++++++++++ data/README_zh.md | 4 +- scripts/cal_mfu.py | 6 +- scripts/cal_ppl.py | 4 +- src/llamafactory/__init__.py | 1 + src/llamafactory/api/app.py | 19 ++++-- src/llamafactory/api/chat.py | 5 +- src/llamafactory/chat/base_engine.py | 30 ++++++++-- src/llamafactory/chat/chat_model.py | 27 +++++++++ src/llamafactory/chat/hf_engine.py | 4 ++ src/llamafactory/chat/vllm_engine.py | 5 ++ src/llamafactory/cli.py | 2 +- src/llamafactory/data/data_utils.py | 9 ++- src/llamafactory/data/formatter.py | 28 +++++++-- src/llamafactory/data/loader.py | 14 ++++- src/llamafactory/data/mm_plugin.py | 8 +++ src/llamafactory/data/template.py | 10 +++- src/llamafactory/data/tool_utils.py | 42 +++++++++++--- src/llamafactory/eval/evaluator.py | 10 +++- src/llamafactory/extras/logging.py | 77 +++++++++++++++++++------ src/llamafactory/extras/ploting.py | 2 +- src/llamafactory/train/callbacks.py | 14 +++++ src/llamafactory/train/dpo/trainer.py | 6 ++ src/llamafactory/train/kto/trainer.py | 8 +++ src/llamafactory/train/ppo/trainer.py | 5 ++ src/llamafactory/train/pt/trainer.py | 3 + src/llamafactory/train/rm/metric.py | 4 ++ src/llamafactory/train/rm/trainer.py | 4 ++ src/llamafactory/train/sft/trainer.py | 4 ++ src/llamafactory/train/trainer_utils.py | 3 + 30 files changed, 334 insertions(+), 57 deletions(-) create mode 100644 .env.local diff --git a/.env.local b/.env.local new file mode 100644 index 00000000..d3013934 --- /dev/null +++ b/.env.local @@ -0,0 +1,33 @@ +# Note: actually we do not support .env, just for reference +# api +API_HOST=0.0.0.0 +API_PORT=8000 +API_KEY= +API_MODEL_NAME=gpt-3.5-turbo +FASTAPI_ROOT_PATH= +# general +DISABLE_VERSION_CHECK= +FORCE_CHECK_IMPORTS= +FORCE_TORCHRUN= +LLAMAFACTORY_VERBOSITY= +USE_MODELSCOPE_HUB= +RECORD_VRAM= +# torchrun +FORCE_TORCHRUN= +MASTER_ADDR= +MASTER_PORT= +NNODES= +RANK= +NPROC_PER_NODE= +# wandb +WANDB_DISABLED= +WANDB_PROJECT=huggingface +WANDB_API_KEY= +# gradio ui +GRADIO_SHARE=0 +GRADIO_SERVER_NAME=0.0.0.0 +GRADIO_SERVER_PORT= +GRADIO_ROOT_PATH= +# reserved (do not use) +LLAMABOARD_ENABLED= +LLAMABOARD_WORKDIR= diff --git a/data/README_zh.md b/data/README_zh.md index 7d346d4b..5cfa4b80 100644 --- a/data/README_zh.md +++ b/data/README_zh.md @@ -298,7 +298,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人 多模态图像数据集需要额外添加一个 `images` 列,包含输入图像的路径。 -注意图片的数量必须和对话中 `` 标记的数量严格一致。 +注意图片的数量必须与文本中所有 `` 标记的数量严格一致。 ```json [ @@ -339,7 +339,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人 多模态视频数据集需要额外添加一个 `videos` 列,包含输入视频的路径。 -注意视频的数量必须和对话中 `