From de509fa081707007cc45f016364977bdf6a8b57d Mon Sep 17 00:00:00 2001 From: cocktailpeanut Date: Thu, 9 May 2024 01:26:15 -0400 Subject: [PATCH 01/14] remove unnecessary environment variable usage Former-commit-id: b783673e0aa064963d9cce712378f73c9b5ff51d --- src/webui.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/webui.py b/src/webui.py index b9385259..000098d1 100644 --- a/src/webui.py +++ b/src/webui.py @@ -5,9 +5,7 @@ from llmtuner.webui.interface import create_ui def main(): server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0") - server_port = int(os.environ.get("GRADIO_SERVER_PORT", "7860")) - gradio_share = bool(int(os.environ.get("GRADIO_SHARE", "0"))) - create_ui().queue().launch(share=gradio_share, server_name=server_name, server_port=server_port) + create_ui().queue().launch(server_name=server_name) if __name__ == "__main__": From 58c5a5afafde79bd854e5f66110aa6a7a2203cd1 Mon Sep 17 00:00:00 2001 From: cocktailpeanut Date: Thu, 9 May 2024 01:32:00 -0400 Subject: [PATCH 02/14] more removal of unnecessary environment variables Former-commit-id: 425b9d616622467487339d5055b5d513c0fe7814 --- src/llmtuner/webui/interface.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/llmtuner/webui/interface.py b/src/llmtuner/webui/interface.py index 969ce6bd..6cfce8aa 100644 --- a/src/llmtuner/webui/interface.py +++ b/src/llmtuner/webui/interface.py @@ -79,6 +79,4 @@ def run_web_ui() -> None: def run_web_demo() -> None: server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0") - server_port = int(os.environ.get("GRADIO_SERVER_PORT", "7860")) - gradio_share = bool(int(os.environ.get("GRADIO_SHARE", "0"))) - create_web_demo().queue().launch(share=gradio_share, server_name=server_name, server_port=server_port) + create_web_demo().queue().launch(server_name=server_name) From 2370e7403f0fba55f179b8ddfb8353f06c9929ac Mon Sep 17 00:00:00 2001 From: cocktailpeanut Date: Thu, 9 May 2024 01:33:20 -0400 Subject: [PATCH 03/14] yet another removal of unnecessary environment variables Former-commit-id: 3c11157a496ae5a1e786870e4642475e53aad123 --- src/llmtuner/webui/interface.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/llmtuner/webui/interface.py b/src/llmtuner/webui/interface.py index 6cfce8aa..bbd91bb7 100644 --- a/src/llmtuner/webui/interface.py +++ b/src/llmtuner/webui/interface.py @@ -72,9 +72,7 @@ def create_web_demo() -> gr.Blocks: def run_web_ui() -> None: server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0") - server_port = int(os.environ.get("GRADIO_SERVER_PORT", "7860")) - gradio_share = bool(int(os.environ.get("GRADIO_SHARE", "0"))) - create_ui().queue().launch(share=gradio_share, server_name=server_name, server_port=server_port) + create_ui().queue().launch(server_name=server_name) def run_web_demo() -> None: From dd42439b038cea2fbcdb854e838850e009a9efbd Mon Sep 17 00:00:00 2001 From: Tendo33 Date: Thu, 9 May 2024 14:28:01 +0800 Subject: [PATCH 04/14] 1.Change the name of is_fastapi_available function 2. Added the log of printing requests when deploying using vllm Former-commit-id: fd2e6dec589f4ebe55d4c203991c47bf5b728ef8 --- src/llmtuner/api/app.py | 16 +++++--- src/llmtuner/api/chat.py | 73 ++++++++++++++++++++++----------- src/llmtuner/api/common.py | 6 +-- src/llmtuner/extras/packages.py | 2 +- 4 files changed, 64 insertions(+), 33 deletions(-) diff --git a/src/llmtuner/api/app.py b/src/llmtuner/api/app.py index 375ee61f..2d93312d 100644 --- a/src/llmtuner/api/app.py +++ b/src/llmtuner/api/app.py @@ -4,7 +4,7 @@ from typing import Annotated, Optional from ..chat import ChatModel from ..extras.misc import torch_gc -from ..extras.packages import is_fastapi_availble, is_starlette_available, is_uvicorn_available +from ..extras.packages import is_fastapi_available, is_starlette_available, is_uvicorn_available from .chat import ( create_chat_completion_response, create_score_evaluation_response, @@ -20,7 +20,7 @@ from .protocol import ( ) -if is_fastapi_availble(): +if is_fastapi_available(): from fastapi import Depends, FastAPI, HTTPException, status from fastapi.middleware.cors import CORSMiddleware from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer @@ -54,7 +54,8 @@ def create_app(chat_model: "ChatModel") -> "FastAPI": async def verify_api_key(auth: Annotated[Optional[HTTPAuthorizationCredentials], Depends(security)]): if api_key and (auth is None or auth.credentials != api_key): - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API key.") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API key.") @app.get( "/v1/models", @@ -74,10 +75,12 @@ def create_app(chat_model: "ChatModel") -> "FastAPI": ) async def create_chat_completion(request: ChatCompletionRequest): if not chat_model.engine.can_generate: - raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed") + raise HTTPException( + status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed") if request.stream: - generate = create_stream_chat_completion_response(request, chat_model) + generate = create_stream_chat_completion_response( + request, chat_model) return EventSourceResponse(generate, media_type="text/event-stream") else: return await create_chat_completion_response(request, chat_model) @@ -90,7 +93,8 @@ def create_app(chat_model: "ChatModel") -> "FastAPI": ) async def create_score_evaluation(request: ScoreEvaluationRequest): if chat_model.engine.can_generate: - raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed") + raise HTTPException( + status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed") return await create_score_evaluation_response(request, chat_model) diff --git a/src/llmtuner/api/chat.py b/src/llmtuner/api/chat.py index 2a703877..3ab473d1 100644 --- a/src/llmtuner/api/chat.py +++ b/src/llmtuner/api/chat.py @@ -3,7 +3,8 @@ import uuid from typing import TYPE_CHECKING, AsyncGenerator, Dict, List, Optional, Tuple from ..data import Role as DataRole -from ..extras.packages import is_fastapi_availble +from ..extras.packages import is_fastapi_available +from ..extras.logging import get_logger from .common import dictify, jsonify from .protocol import ( ChatCompletionMessage, @@ -19,8 +20,9 @@ from .protocol import ( ScoreEvaluationResponse, ) +logger = get_logger(__name__) -if is_fastapi_availble(): +if is_fastapi_available(): from fastapi import HTTPException, status @@ -39,8 +41,13 @@ ROLE_MAPPING = { def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, str]], str, str]: + + params = dictify(request) + logger.info(f"==== request ====\n{params}") + if len(request.messages) == 0: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid length") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid length") if request.messages[0].role == Role.SYSTEM: system = request.messages.pop(0).content @@ -48,29 +55,37 @@ def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, s system = "" if len(request.messages) % 2 == 0: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, + detail="Only supports u/a/u/a/u...") input_messages = [] for i, message in enumerate(request.messages): if i % 2 == 0 and message.role not in [Role.USER, Role.TOOL]: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role") elif i % 2 == 1 and message.role not in [Role.ASSISTANT, Role.FUNCTION]: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role") if message.role == Role.ASSISTANT and isinstance(message.tool_calls, list) and len(message.tool_calls): name = message.tool_calls[0].function.name arguments = message.tool_calls[0].function.arguments - content = json.dumps({"name": name, "argument": arguments}, ensure_ascii=False) - input_messages.append({"role": ROLE_MAPPING[Role.FUNCTION], "content": content}) + content = json.dumps( + {"name": name, "argument": arguments}, ensure_ascii=False) + input_messages.append( + {"role": ROLE_MAPPING[Role.FUNCTION], "content": content}) else: - input_messages.append({"role": ROLE_MAPPING[message.role], "content": message.content}) + input_messages.append( + {"role": ROLE_MAPPING[message.role], "content": message.content}) tool_list = request.tools if isinstance(tool_list, list) and len(tool_list): try: - tools = json.dumps([dictify(tool.function) for tool in tool_list], ensure_ascii=False) + tools = json.dumps([dictify(tool.function) + for tool in tool_list], ensure_ascii=False) except Exception: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools") else: tools = "" @@ -84,8 +99,10 @@ def _create_stream_chat_completion_chunk( index: Optional[int] = 0, finish_reason: Optional["Finish"] = None, ) -> str: - choice_data = ChatCompletionStreamResponseChoice(index=index, delta=delta, finish_reason=finish_reason) - chunk = ChatCompletionStreamResponse(id=completion_id, model=model, choices=[choice_data]) + choice_data = ChatCompletionStreamResponseChoice( + index=index, delta=delta, finish_reason=finish_reason) + chunk = ChatCompletionStreamResponse( + id=completion_id, model=model, choices=[choice_data]) return jsonify(chunk) @@ -110,21 +127,26 @@ async def create_chat_completion_response( choices = [] for i, response in enumerate(responses): if tools: - result = chat_model.engine.template.format_tools.extract(response.response_text) + result = chat_model.engine.template.format_tools.extract( + response.response_text) else: result = response.response_text if isinstance(result, tuple): name, arguments = result function = Function(name=name, arguments=arguments) - tool_call = FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function) - response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=[tool_call]) + tool_call = FunctionCall(id="call_{}".format( + uuid.uuid4().hex), function=function) + response_message = ChatCompletionMessage( + role=Role.ASSISTANT, tool_calls=[tool_call]) finish_reason = Finish.TOOL else: - response_message = ChatCompletionMessage(role=Role.ASSISTANT, content=result) + response_message = ChatCompletionMessage( + role=Role.ASSISTANT, content=result) finish_reason = Finish.STOP if response.finish_reason == "stop" else Finish.LENGTH - choices.append(ChatCompletionResponseChoice(index=i, message=response_message, finish_reason=finish_reason)) + choices.append(ChatCompletionResponseChoice( + index=i, message=response_message, finish_reason=finish_reason)) prompt_length = response.prompt_length response_length += response.response_length @@ -143,13 +165,16 @@ async def create_stream_chat_completion_response( completion_id = "chatcmpl-{}".format(uuid.uuid4().hex) input_messages, system, tools = _process_request(request) if tools: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, + detail="Cannot stream function calls.") if request.n > 1: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream multiple responses.") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, + detail="Cannot stream multiple responses.") yield _create_stream_chat_completion_chunk( - completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(role=Role.ASSISTANT, content="") + completion_id=completion_id, model=request.model, delta=ChatCompletionMessage( + role=Role.ASSISTANT, content="") ) async for new_token in chat_model.astream_chat( input_messages, @@ -163,7 +188,8 @@ async def create_stream_chat_completion_response( ): if len(new_token) != 0: yield _create_stream_chat_completion_chunk( - completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(content=new_token) + completion_id=completion_id, model=request.model, delta=ChatCompletionMessage( + content=new_token) ) yield _create_stream_chat_completion_chunk( @@ -176,7 +202,8 @@ async def create_score_evaluation_response( request: "ScoreEvaluationRequest", chat_model: "ChatModel" ) -> "ScoreEvaluationResponse": if len(request.messages) == 0: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request") scores = await chat_model.aget_scores(request.messages, max_length=request.max_length) return ScoreEvaluationResponse(model=request.model, scores=scores) diff --git a/src/llmtuner/api/common.py b/src/llmtuner/api/common.py index 5ad9a071..3e95d211 100644 --- a/src/llmtuner/api/common.py +++ b/src/llmtuner/api/common.py @@ -6,11 +6,11 @@ if TYPE_CHECKING: from pydantic import BaseModel -def dictify(data: "BaseModel") -> Dict[str, Any]: +def dictify(data: "BaseModel", **kwargs) -> Dict[str, Any]: try: # pydantic v2 - return data.model_dump(exclude_unset=True) + return data.model_dump(**kwargs) except AttributeError: # pydantic v1 - return data.dict(exclude_unset=True) + return data.dict(**kwargs) def jsonify(data: "BaseModel") -> str: diff --git a/src/llmtuner/extras/packages.py b/src/llmtuner/extras/packages.py index a7317eec..4c9e6492 100644 --- a/src/llmtuner/extras/packages.py +++ b/src/llmtuner/extras/packages.py @@ -20,7 +20,7 @@ def _get_package_version(name: str) -> "Version": return version.parse("0.0.0") -def is_fastapi_availble(): +def is_fastapi_available(): return _is_package_available("fastapi") From 10e65f004228b7f64b36e83c70a6e9ed5c0ce058 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sun, 12 May 2024 00:03:59 +0800 Subject: [PATCH 05/14] fix #3674 Former-commit-id: 56857770f8e66d667e3fe3ad7a11ab321c7fe020 --- src/llmtuner/train/tuner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py index ffdc3e60..8f103ca1 100644 --- a/src/llmtuner/train/tuner.py +++ b/src/llmtuner/train/tuner.py @@ -68,6 +68,8 @@ def export_model(args: Optional[Dict[str, Any]] = None) -> None: output_dtype = getattr(model.config, "torch_dtype", torch.float16) setattr(model.config, "torch_dtype", output_dtype) model = model.to(output_dtype) + else: + setattr(model.config, "torch_dtype", torch.float16) model.save_pretrained( save_directory=model_args.export_dir, From 0f941f30f7ee06720903ce6e13796e7431aab382 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sun, 12 May 2024 00:33:49 +0800 Subject: [PATCH 06/14] update readme Former-commit-id: 638043ced426c392014c5f42ce00f378f92f905d --- README.md | 12 +++++++++--- README_zh.md | 13 +++++++++---- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 90c66caf..80154cae 100644 --- a/README.md +++ b/README.md @@ -366,17 +366,23 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr #### Use local environment ```bash -CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui +CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 GRADIO_SHARE=1 llamafactory-cli webui ``` -
For Alibaba Cloud users +
For Alibaba Cloud PAI or AutoDL users -If you encountered display problems in LLaMA Board on Alibaba Cloud, try using the following command to set environment variables before starting LLaMA Board: +If you encountered display problems in LLaMA Board on Alibaba Cloud PAI, try using the following command to set environment variables before starting LLaMA Board: ```bash export GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/ ``` +If you are using AutoDL, please install a specific version of Gradio: + +```bash +pip install gradio==4.10.0 +``` +
#### Use Docker diff --git a/README_zh.md b/README_zh.md index 0aba9043..5656fb4a 100644 --- a/README_zh.md +++ b/README_zh.md @@ -366,17 +366,23 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s #### 使用本地环境 ```bash -CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui +CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 GRADIO_SHARE=1 llamafactory-cli webui ``` -
阿里云用户指南 +
阿里云 PAI 和 AutoDL 用户指南 -如果您在阿里云上使用 LLaMA Board 时遇到显示问题,请尝试在启动前使用以下命令设置环境变量: +如果您在阿里云 PAI 上使用 LLaMA Board 时遇到显示问题,请尝试在启动前使用以下命令设置环境变量: ```bash export GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/ ``` +如果您正在使用 AutoDL,请安装下述 Gradio 版本: + +```bash +pip install gradio==4.10.0 +``` +
#### 使用 Docker @@ -475,7 +481,6 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1` 1. **[Luminia-13B-v3](https://huggingface.co/Nekochu/Luminia-13B-v3)**:一个用于生成 Stable Diffusion 提示词的大型语言模型。[[🤗Demo]](https://huggingface.co/spaces/Nekochu/Luminia-13B_SD_Prompt) 1. **[Chinese-LLaVA-Med](https://github.com/BUAADreamer/Chinese-LLaVA-Med)**:中文多模态医学大模型,基于 LLaVA-1.5-7B 在中文多模态医疗数据上微调而得。 -
## 协议 From 51e0f095a94852342397d468d5c17dc5e2789a6d Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sun, 12 May 2024 01:10:30 +0800 Subject: [PATCH 07/14] remove checksum and fix ui args Former-commit-id: 58c522cd5cc4498a3fa8ed99424b5d63c9e56ccb --- README.md | 4 ++-- README_zh.md | 4 ++-- data/dataset_info.json | 25 +++++-------------------- src/llmtuner/data/loader.py | 4 +--- src/llmtuner/data/parser.py | 2 -- src/llmtuner/data/utils.py | 15 --------------- src/llmtuner/webui/interface.py | 6 ++++-- src/webui.py | 3 ++- 8 files changed, 16 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 80154cae..57a34dab 100644 --- a/README.md +++ b/README.md @@ -366,7 +366,7 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr #### Use local environment ```bash -CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 GRADIO_SHARE=1 llamafactory-cli webui +CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui ```
For Alibaba Cloud PAI or AutoDL users @@ -374,7 +374,7 @@ CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 GRADIO_SHARE=1 llamafactory-cli w If you encountered display problems in LLaMA Board on Alibaba Cloud PAI, try using the following command to set environment variables before starting LLaMA Board: ```bash -export GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/ +export GRADIO_SERVER_PORT=7860 GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/ ``` If you are using AutoDL, please install a specific version of Gradio: diff --git a/README_zh.md b/README_zh.md index 5656fb4a..047b1645 100644 --- a/README_zh.md +++ b/README_zh.md @@ -366,7 +366,7 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s #### 使用本地环境 ```bash -CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 GRADIO_SHARE=1 llamafactory-cli webui +CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui ```
阿里云 PAI 和 AutoDL 用户指南 @@ -374,7 +374,7 @@ CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 GRADIO_SHARE=1 llamafactory-cli w 如果您在阿里云 PAI 上使用 LLaMA Board 时遇到显示问题,请尝试在启动前使用以下命令设置环境变量: ```bash -export GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/ +export GRADIO_SERVER_PORT=7860 GRADIO_ROOT_PATH=/${JUPYTER_NAME}/proxy/7860/ ``` 如果您正在使用 AutoDL,请安装下述 Gradio 版本: diff --git a/data/dataset_info.json b/data/dataset_info.json index d5b7208f..032a5c49 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -1,27 +1,21 @@ { "alpaca_en": { - "file_name": "alpaca_data_en_52k.json", - "file_sha1": "607f94a7f581341e59685aef32f531095232cf23" + "file_name": "alpaca_data_en_52k.json" }, "alpaca_zh": { - "file_name": "alpaca_data_zh_51k.json", - "file_sha1": "2ba9827122c158dc256668d42bd1bcb8bc6b786e" + "file_name": "alpaca_data_zh_51k.json" }, "alpaca_gpt4_en": { - "file_name": "alpaca_gpt4_data_en.json", - "file_sha1": "647f4ad447bd993e4b6b6223d1be15208bab694a" + "file_name": "alpaca_gpt4_data_en.json" }, "alpaca_gpt4_zh": { - "file_name": "alpaca_gpt4_data_zh.json", - "file_sha1": "3eaa3bda364ccdd59925d7448a698256c31ef845" + "file_name": "alpaca_gpt4_data_zh.json" }, "identity": { - "file_name": "identity.json", - "file_sha1": "0f67e97fd01612006ab3536cdaf6cfb0d1e7f279" + "file_name": "identity.json" }, "oaast_sft_zh": { "file_name": "oaast_sft_zh.json", - "file_sha1": "a6a91f18f80f37b10ded9cf633fb50c033bf7b9f", "columns": { "prompt": "instruction", "query": "input", @@ -31,7 +25,6 @@ }, "lima": { "file_name": "lima.json", - "file_sha1": "9db59f6b7007dc4b17529fc63379b9cd61640f37", "columns": { "prompt": "instruction", "query": "input", @@ -41,7 +34,6 @@ }, "glaive_toolcall": { "file_name": "glaive_toolcall_10k.json", - "file_sha1": "36aea64548fbf6aa300bef411b9221092ed84902", "formatting": "sharegpt", "columns": { "messages": "conversations", @@ -50,7 +42,6 @@ }, "mllm_demo": { "file_name": "mllm_demo.json", - "file_sha1": "d626cc0ad88a26d0dc9fcb47336821cf486d8bcc", "formatting": "sharegpt", "columns": { "messages": "messages", @@ -308,7 +299,6 @@ }, "oaast_rm_zh": { "file_name": "oaast_rm_zh.json", - "file_sha1": "1065af1f3784dd61be5e79713a35f427b713a232", "columns": { "prompt": "instruction", "query": "input", @@ -319,17 +309,14 @@ }, "comparison_gpt4_en": { "file_name": "comparison_gpt4_data_en.json", - "file_sha1": "96fa18313544e22444fe20eead7754b17da452ae", "ranking": true }, "comparison_gpt4_zh": { "file_name": "comparison_gpt4_data_zh.json", - "file_sha1": "515b18ed497199131ddcc1af950345c11dc5c7fd", "ranking": true }, "orca_rlhf": { "file_name": "orca_rlhf.json", - "file_sha1": "acc8f74d16fd1fc4f68e7d86eaa781c2c3f5ba8e", "ranking": true, "columns": { "prompt": "question", @@ -370,14 +357,12 @@ }, "wiki_demo": { "file_name": "wiki_demo.txt", - "file_sha1": "e70375e28eda542a90c68213640cc371898ce181", "columns": { "prompt": "text" } }, "c4_demo": { "file_name": "c4_demo.json", - "file_sha1": "a5a0c86759732f9a5238e447fecd74f28a66cca8", "columns": { "prompt": "text" } diff --git a/src/llmtuner/data/loader.py b/src/llmtuner/data/loader.py index ca0d5407..3cc01b0d 100644 --- a/src/llmtuner/data/loader.py +++ b/src/llmtuner/data/loader.py @@ -11,7 +11,7 @@ from .aligner import align_dataset from .parser import get_dataset_list from .preprocess import get_preprocess_and_print_func from .template import get_template_and_fix_tokenizer -from .utils import checksum, merge_dataset +from .utils import merge_dataset if TYPE_CHECKING: @@ -61,8 +61,6 @@ def load_single_dataset( if data_path is None: raise ValueError("File extension must be txt, csv, json or jsonl.") - - checksum(data_files, dataset_attr.file_sha1) else: raise NotImplementedError diff --git a/src/llmtuner/data/parser.py b/src/llmtuner/data/parser.py index 01a417a9..3170fd8a 100644 --- a/src/llmtuner/data/parser.py +++ b/src/llmtuner/data/parser.py @@ -21,7 +21,6 @@ class DatasetAttr: load_from: Literal["hf_hub", "ms_hub", "script", "file"] dataset_name: str """ extra configs """ - file_sha1: Optional[str] = None subset: Optional[str] = None folder: Optional[str] = None ranking: bool = False @@ -99,7 +98,6 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]: else: dataset_attr = DatasetAttr("file", dataset_name=dataset_info[name]["file_name"]) - dataset_attr.set_attr("file_sha1", dataset_info[name]) dataset_attr.set_attr("subset", dataset_info[name]) dataset_attr.set_attr("folder", dataset_info[name]) dataset_attr.set_attr("ranking", dataset_info[name], default=False) diff --git a/src/llmtuner/data/utils.py b/src/llmtuner/data/utils.py index dc189609..29fd4ad4 100644 --- a/src/llmtuner/data/utils.py +++ b/src/llmtuner/data/utils.py @@ -26,21 +26,6 @@ class Role(str, Enum): OBSERVATION = "observation" -def checksum(data_files: List[str], file_sha1: Optional[str] = None) -> None: - if file_sha1 is None: - logger.warning("Checksum failed: missing SHA-1 hash value in dataset_info.json.") - return - - if len(data_files) != 1: - logger.warning("Checksum failed: too many files.") - return - - with open(data_files[0], "rb") as f: - sha1 = hashlib.sha1(f.read()).hexdigest() - if sha1 != file_sha1: - logger.warning("Checksum failed: mismatched SHA-1 hash value at {}.".format(data_files[0])) - - def infer_max_len(source_len: int, target_len: int, max_len: int, reserved_label_len: int) -> Tuple[int, int]: max_target_len = int(max_len * (target_len / (source_len + target_len))) max_target_len = max(max_target_len, reserved_label_len) diff --git a/src/llmtuner/webui/interface.py b/src/llmtuner/webui/interface.py index bbd91bb7..91709d40 100644 --- a/src/llmtuner/webui/interface.py +++ b/src/llmtuner/webui/interface.py @@ -71,10 +71,12 @@ def create_web_demo() -> gr.Blocks: def run_web_ui() -> None: + gradio_share = bool(int(os.environ.get("GRADIO_SHARE", "0"))) server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0") - create_ui().queue().launch(server_name=server_name) + create_ui().queue().launch(share=gradio_share, server_name=server_name) def run_web_demo() -> None: + gradio_share = bool(int(os.environ.get("GRADIO_SHARE", "0"))) server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0") - create_web_demo().queue().launch(server_name=server_name) + create_web_demo().queue().launch(share=gradio_share, server_name=server_name) diff --git a/src/webui.py b/src/webui.py index 000098d1..3f8690d0 100644 --- a/src/webui.py +++ b/src/webui.py @@ -4,8 +4,9 @@ from llmtuner.webui.interface import create_ui def main(): + gradio_share = bool(int(os.environ.get("GRADIO_SHARE", "0"))) server_name = os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0") - create_ui().queue().launch(server_name=server_name) + create_ui().queue().launch(share=gradio_share, server_name=server_name) if __name__ == "__main__": From 5f61ae6a5d2703a73f53ca1ece380198d798d19d Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sun, 12 May 2024 01:25:16 +0800 Subject: [PATCH 08/14] fix #3658 Former-commit-id: 4777efe517c05a599f0ccdf9ccf760f3986d126e --- src/llmtuner/extras/callbacks.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/llmtuner/extras/callbacks.py b/src/llmtuner/extras/callbacks.py index a142928a..6d24b244 100644 --- a/src/llmtuner/extras/callbacks.py +++ b/src/llmtuner/extras/callbacks.py @@ -139,13 +139,15 @@ class LogCallback(TrainerCallback): r""" Event called after an evaluation phase. """ - self._close_thread_pool() + if not self.do_train: + self._close_thread_pool() def on_predict(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): r""" Event called after a successful prediction. """ - self._close_thread_pool() + if not self.do_train: + self._close_thread_pool() def on_log(self, args: "TrainingArguments", state: "TrainerState", control: "TrainerControl", **kwargs): r""" From c627d358a9c6817c5cda6b9a39d22f08781c198f Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sun, 12 May 2024 01:28:51 +0800 Subject: [PATCH 09/14] lint Former-commit-id: 482d412dd961896b362ef574a3df5b2d58003327 --- src/llmtuner/data/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/llmtuner/data/utils.py b/src/llmtuner/data/utils.py index 29fd4ad4..aaa5bdc0 100644 --- a/src/llmtuner/data/utils.py +++ b/src/llmtuner/data/utils.py @@ -1,6 +1,5 @@ -import hashlib from enum import Enum, unique -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Tuple, Union from datasets import concatenate_datasets, interleave_datasets From 93cbb0e2fc01c85063ccf80536b77891b0706889 Mon Sep 17 00:00:00 2001 From: Tendo33 Date: Mon, 13 May 2024 09:40:33 +0800 Subject: [PATCH 10/14] ruff check scripts src tests --fix Former-commit-id: b2bf7f5724f7962fef6b6d9d82c7a5bea9cbae47 --- src/llmtuner/api/app.py | 12 +++---- src/llmtuner/api/chat.py | 67 ++++++++++++++-------------------------- 2 files changed, 27 insertions(+), 52 deletions(-) diff --git a/src/llmtuner/api/app.py b/src/llmtuner/api/app.py index 5936955b..6d06d1d0 100644 --- a/src/llmtuner/api/app.py +++ b/src/llmtuner/api/app.py @@ -56,8 +56,7 @@ def create_app(chat_model: "ChatModel") -> "FastAPI": async def verify_api_key(auth: Annotated[Optional[HTTPAuthorizationCredentials], Depends(security)]): if api_key and (auth is None or auth.credentials != api_key): - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API key.") + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API key.") @app.get( "/v1/models", @@ -77,12 +76,10 @@ def create_app(chat_model: "ChatModel") -> "FastAPI": ) async def create_chat_completion(request: ChatCompletionRequest): if not chat_model.engine.can_generate: - raise HTTPException( - status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed") + raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed") if request.stream: - generate = create_stream_chat_completion_response( - request, chat_model) + generate = create_stream_chat_completion_response(request, chat_model) return EventSourceResponse(generate, media_type="text/event-stream") else: return await create_chat_completion_response(request, chat_model) @@ -95,8 +92,7 @@ def create_app(chat_model: "ChatModel") -> "FastAPI": ) async def create_score_evaluation(request: ScoreEvaluationRequest): if chat_model.engine.can_generate: - raise HTTPException( - status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed") + raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed") return await create_score_evaluation_response(request, chat_model) diff --git a/src/llmtuner/api/chat.py b/src/llmtuner/api/chat.py index 3ab473d1..76ddc88d 100644 --- a/src/llmtuner/api/chat.py +++ b/src/llmtuner/api/chat.py @@ -3,8 +3,8 @@ import uuid from typing import TYPE_CHECKING, AsyncGenerator, Dict, List, Optional, Tuple from ..data import Role as DataRole -from ..extras.packages import is_fastapi_available from ..extras.logging import get_logger +from ..extras.packages import is_fastapi_available from .common import dictify, jsonify from .protocol import ( ChatCompletionMessage, @@ -20,6 +20,7 @@ from .protocol import ( ScoreEvaluationResponse, ) + logger = get_logger(__name__) if is_fastapi_available(): @@ -41,13 +42,11 @@ ROLE_MAPPING = { def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, str]], str, str]: - params = dictify(request) logger.info(f"==== request ====\n{params}") if len(request.messages) == 0: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid length") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid length") if request.messages[0].role == Role.SYSTEM: system = request.messages.pop(0).content @@ -55,37 +54,29 @@ def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, s system = "" if len(request.messages) % 2 == 0: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, - detail="Only supports u/a/u/a/u...") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...") input_messages = [] for i, message in enumerate(request.messages): if i % 2 == 0 and message.role not in [Role.USER, Role.TOOL]: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role") elif i % 2 == 1 and message.role not in [Role.ASSISTANT, Role.FUNCTION]: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role") if message.role == Role.ASSISTANT and isinstance(message.tool_calls, list) and len(message.tool_calls): name = message.tool_calls[0].function.name arguments = message.tool_calls[0].function.arguments - content = json.dumps( - {"name": name, "argument": arguments}, ensure_ascii=False) - input_messages.append( - {"role": ROLE_MAPPING[Role.FUNCTION], "content": content}) + content = json.dumps({"name": name, "argument": arguments}, ensure_ascii=False) + input_messages.append({"role": ROLE_MAPPING[Role.FUNCTION], "content": content}) else: - input_messages.append( - {"role": ROLE_MAPPING[message.role], "content": message.content}) + input_messages.append({"role": ROLE_MAPPING[message.role], "content": message.content}) tool_list = request.tools if isinstance(tool_list, list) and len(tool_list): try: - tools = json.dumps([dictify(tool.function) - for tool in tool_list], ensure_ascii=False) + tools = json.dumps([dictify(tool.function) for tool in tool_list], ensure_ascii=False) except Exception: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools") else: tools = "" @@ -99,10 +90,8 @@ def _create_stream_chat_completion_chunk( index: Optional[int] = 0, finish_reason: Optional["Finish"] = None, ) -> str: - choice_data = ChatCompletionStreamResponseChoice( - index=index, delta=delta, finish_reason=finish_reason) - chunk = ChatCompletionStreamResponse( - id=completion_id, model=model, choices=[choice_data]) + choice_data = ChatCompletionStreamResponseChoice(index=index, delta=delta, finish_reason=finish_reason) + chunk = ChatCompletionStreamResponse(id=completion_id, model=model, choices=[choice_data]) return jsonify(chunk) @@ -127,26 +116,21 @@ async def create_chat_completion_response( choices = [] for i, response in enumerate(responses): if tools: - result = chat_model.engine.template.format_tools.extract( - response.response_text) + result = chat_model.engine.template.format_tools.extract(response.response_text) else: result = response.response_text if isinstance(result, tuple): name, arguments = result function = Function(name=name, arguments=arguments) - tool_call = FunctionCall(id="call_{}".format( - uuid.uuid4().hex), function=function) - response_message = ChatCompletionMessage( - role=Role.ASSISTANT, tool_calls=[tool_call]) + tool_call = FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function) + response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=[tool_call]) finish_reason = Finish.TOOL else: - response_message = ChatCompletionMessage( - role=Role.ASSISTANT, content=result) + response_message = ChatCompletionMessage(role=Role.ASSISTANT, content=result) finish_reason = Finish.STOP if response.finish_reason == "stop" else Finish.LENGTH - choices.append(ChatCompletionResponseChoice( - index=i, message=response_message, finish_reason=finish_reason)) + choices.append(ChatCompletionResponseChoice(index=i, message=response_message, finish_reason=finish_reason)) prompt_length = response.prompt_length response_length += response.response_length @@ -165,16 +149,13 @@ async def create_stream_chat_completion_response( completion_id = "chatcmpl-{}".format(uuid.uuid4().hex) input_messages, system, tools = _process_request(request) if tools: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, - detail="Cannot stream function calls.") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.") if request.n > 1: - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, - detail="Cannot stream multiple responses.") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream multiple responses.") yield _create_stream_chat_completion_chunk( - completion_id=completion_id, model=request.model, delta=ChatCompletionMessage( - role=Role.ASSISTANT, content="") + completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(role=Role.ASSISTANT, content="") ) async for new_token in chat_model.astream_chat( input_messages, @@ -188,8 +169,7 @@ async def create_stream_chat_completion_response( ): if len(new_token) != 0: yield _create_stream_chat_completion_chunk( - completion_id=completion_id, model=request.model, delta=ChatCompletionMessage( - content=new_token) + completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(content=new_token) ) yield _create_stream_chat_completion_chunk( @@ -202,8 +182,7 @@ async def create_score_evaluation_response( request: "ScoreEvaluationRequest", chat_model: "ChatModel" ) -> "ScoreEvaluationResponse": if len(request.messages) == 0: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request") + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request") scores = await chat_model.aget_scores(request.messages, max_length=request.max_length) return ScoreEvaluationResponse(model=request.model, scores=scores) From 3d6a80660e6b8fe0f54bf132a08e6df57584847a Mon Sep 17 00:00:00 2001 From: hiyouga Date: Mon, 13 May 2024 16:51:20 +0800 Subject: [PATCH 11/14] support Yi 1.5 Former-commit-id: d12b8f866aa51e5e22d2b3d29704a13308de3e5b --- README.md | 4 +-- README_zh.md | 4 +-- src/llmtuner/extras/constants.py | 39 +++++++++++++++++++++++----- src/llmtuner/model/utils/longlora.py | 2 +- 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 57a34dab..d260ad36 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ | [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj | qwen | | [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse | -| [Yi](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi | +| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi | | [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan | > [!NOTE] @@ -487,7 +487,7 @@ If you have a project that should be incorporated, please contact via email or c This repository is licensed under the [Apache-2.0 License](LICENSE). -Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2/LLaVA-1.5](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) +Please follow the model licenses to use the corresponding model weights: [Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) ## Citation diff --git a/README_zh.md b/README_zh.md index 047b1645..8912d5e1 100644 --- a/README_zh.md +++ b/README_zh.md @@ -161,7 +161,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd | [Qwen1.5 (Code/MoE)](https://huggingface.co/Qwen) | 0.5B/1.8B/4B/7B/14B/32B/72B/110B | q_proj,v_proj | qwen | | [StarCoder2](https://huggingface.co/bigcode) | 3B/7B/15B | q_proj,v_proj | - | | [XVERSE](https://huggingface.co/xverse) | 7B/13B/65B | q_proj,v_proj | xverse | -| [Yi](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi | +| [Yi (1/1.5)](https://huggingface.co/01-ai) | 6B/9B/34B | q_proj,v_proj | yi | | [Yuan](https://huggingface.co/IEITYuan) | 2B/51B/102B | q_proj,v_proj | yuan | > [!NOTE] @@ -487,7 +487,7 @@ export USE_MODELSCOPE_HUB=1 # Windows 使用 `set USE_MODELSCOPE_HUB=1` 本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。 -使用模型权重时,请遵循对应的模型协议:[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2/LLaVA-1.5](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) +使用模型权重时,请遵循对应的模型协议:[Baichuan2](https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/Community%20License%20for%20Baichuan%202%20Model.pdf) / [BLOOM](https://huggingface.co/spaces/bigscience/license) / [ChatGLM3](https://github.com/THUDM/ChatGLM3/blob/main/MODEL_LICENSE) / [Command-R](https://cohere.com/c4ai-cc-by-nc-license) / [DeepSeek](https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL) / [Falcon](https://huggingface.co/tiiuae/falcon-180B/blob/main/LICENSE.txt) / [Gemma](https://ai.google.dev/gemma/terms) / [InternLM2](https://github.com/InternLM/InternLM#license) / [LLaMA](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) / [LLaMA-2 (LLaVA-1.5)](https://ai.meta.com/llama/license/) / [LLaMA-3](https://llama.meta.com/llama3/license/) / [Mistral](LICENSE) / [OLMo](LICENSE) / [Phi-1.5/2](https://huggingface.co/microsoft/phi-1_5/resolve/main/Research%20License.docx) / [Phi-3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/blob/main/LICENSE) / [Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT) / [StarCoder2](https://huggingface.co/spaces/bigcode/bigcode-model-license-agreement) / [XVERSE](https://github.com/xverse-ai/XVERSE-13B/blob/main/MODEL_LICENSE.pdf) / [Yi](https://huggingface.co/01-ai/Yi-6B/blob/main/LICENSE) / [Yi-1.5](LICENSE) / [Yuan](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/LICENSE-Yuan) ## 引用 diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 50c78b3f..ff52f29a 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -320,14 +320,14 @@ register_model_group( DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-base", DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-base", }, + "DeepSeek-MoE-236B-Base": { + DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2", + DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2", + }, "DeepSeek-MoE-16B-Chat": { DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-chat", DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-chat", }, - "DeepSeek-MoE-236B": { - DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2", - DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2", - }, "DeepSeek-MoE-236B-Chat": { DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2-Chat", DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2-Chat", @@ -424,13 +424,13 @@ register_model_group( register_model_group( models={ "CodeGemma-2B": { - DownloadSource.DEFAULT: "google/codegemma-2b", + DownloadSource.DEFAULT: "google/codegemma-1.1-2b", }, "CodeGemma-7B": { DownloadSource.DEFAULT: "google/codegemma-7b", }, "CodeGemma-7B-Chat": { - DownloadSource.DEFAULT: "google/codegemma-7b-it", + DownloadSource.DEFAULT: "google/codegemma-1.1-7b-it", DownloadSource.MODELSCOPE: "AI-ModelScope/codegemma-7b-it", }, }, @@ -581,6 +581,9 @@ register_model_group( DownloadSource.DEFAULT: "shenzhi-wang/Llama3-8B-Chinese-Chat", DownloadSource.MODELSCOPE: "LLM-Research/Llama3-8B-Chinese-Chat", }, + "LLaMA3-70B-Chinese-Chat": { + DownloadSource.DEFAULT: "shenzhi-wang/Llama3-70B-Chinese-Chat", + }, }, template="llama3", ) @@ -1174,6 +1177,30 @@ register_model_group( DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat-4bits", DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat-4bits", }, + "Yi-1.5-6B": { + DownloadSource.DEFAULT: "01-ai/Yi-1.5-6B", + DownloadSource.MODELSCOPE: "01ai/Yi-1.5-6B", + }, + "Yi-1.5-9B": { + DownloadSource.DEFAULT: "01-ai/Yi-1.5-9B", + DownloadSource.MODELSCOPE: "01ai/Yi-1.5-9B", + }, + "Yi-1.5-34B": { + DownloadSource.DEFAULT: "01-ai/Yi-1.5-34B", + DownloadSource.MODELSCOPE: "01ai/Yi-1.5-34B", + }, + "Yi-1.5-6B-Chat": { + DownloadSource.DEFAULT: "01-ai/Yi-1.5-6B-Chat", + DownloadSource.MODELSCOPE: "01ai/Yi-1.5-6B-Chat", + }, + "Yi-1.5-9B-Chat": { + DownloadSource.DEFAULT: "01-ai/Yi-1.5-9B-Chat", + DownloadSource.MODELSCOPE: "01ai/Yi-1.5-9B-Chat", + }, + "Yi-1.5-34B-Chat": { + DownloadSource.DEFAULT: "01-ai/Yi-1.5-34B-Chat", + DownloadSource.MODELSCOPE: "01ai/Yi-1.5-34B-Chat", + }, }, template="yi", ) diff --git a/src/llmtuner/model/utils/longlora.py b/src/llmtuner/model/utils/longlora.py index c91febdd..a11351f1 100644 --- a/src/llmtuner/model/utils/longlora.py +++ b/src/llmtuner/model/utils/longlora.py @@ -302,7 +302,7 @@ def llama_sdpa_attention_forward( def _apply_llama_patch() -> None: - require_version("transformers==4.40.1", "To fix: pip install transformers==4.40.1") + require_version("transformers==4.40.2", "To fix: pip install transformers==4.40.2") LlamaAttention.forward = llama_attention_forward LlamaFlashAttention2.forward = llama_flash_attention_2_forward LlamaSdpaAttention.forward = llama_sdpa_attention_forward From 414049ba20f0916d7814973f66b125e70f508799 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Mon, 13 May 2024 18:24:35 +0800 Subject: [PATCH 12/14] fix #3702 Former-commit-id: c27afa296bc8ae2b0611b3248200e88e4c185aef --- README.md | 48 ++++++++++++++++++----------------- README_zh.md | 48 ++++++++++++++++++----------------- src/llmtuner/api/chat.py | 6 ++--- src/llmtuner/api/common.py | 6 ++--- src/llmtuner/data/template.py | 2 +- 5 files changed, 56 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index d260ad36..90fcb295 100644 --- a/README.md +++ b/README.md @@ -70,57 +70,59 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ ## Changelog -[24/04/26] We supported fine-tuning the **LLaVA-1.5** multimodal LLMs. See `examples/lora_single_gpu/sft_mllm.sh` for usage. +[24/05/13] We supported fine-tuning the **Yi-1.5** series models. + +[24/04/26] We supported fine-tuning the **LLaVA-1.5** multimodal LLMs. See [examples](examples/README.md) for usage. [24/04/22] We provided a **[Colab notebook](https://colab.research.google.com/drive/1eRTPn37ltBbYsISy9Aw2NuI2Aq5CQrD9?usp=sharing)** for fine-tuning the Llama-3 model on a free T4 GPU. Two Llama-3-derived models fine-tuned using LLaMA Factory are available at Hugging Face, check [Llama3-8B-Chinese-Chat](https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat) and [Llama3-Chinese](https://huggingface.co/zhichen/Llama3-Chinese) for details. -[24/04/21] We supported **[Mixture-of-Depths](https://arxiv.org/abs/2404.02258)** according to [AstraMindAI's implementation](https://github.com/astramind-ai/Mixture-of-depths). See `examples/extras/mod` for usage. +
Full Changelog -[24/04/16] We supported **[BAdam](https://arxiv.org/abs/2404.02827)**. See `examples/extras/badam` for usage. +[24/04/21] We supported **[Mixture-of-Depths](https://arxiv.org/abs/2404.02258)** according to [AstraMindAI's implementation](https://github.com/astramind-ai/Mixture-of-depths). See [examples](examples/README.md) for usage. + +[24/04/16] We supported **[BAdam](https://arxiv.org/abs/2404.02827)**. See [examples](examples/README.md) for usage. [24/04/16] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s long-sequence training (Llama-2-7B-56k within 24GB). It achieves **117%** speed and **50%** memory compared with FlashAttention-2, more benchmarks can be found in [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison). -
Full Changelog - -[24/03/31] We supported **[ORPO](https://arxiv.org/abs/2403.07691)**. See `examples/lora_single_gpu` for usage. +[24/03/31] We supported **[ORPO](https://arxiv.org/abs/2403.07691)**. See [examples](examples/README.md) for usage. [24/03/21] Our paper "[LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models](https://arxiv.org/abs/2403.13372)" is available at arXiv! -[24/03/20] We supported **FSDP+QLoRA** that fine-tunes a 70B model on 2x24GB GPUs. See `examples/extras/fsdp_qlora` for usage. +[24/03/20] We supported **FSDP+QLoRA** that fine-tunes a 70B model on 2x24GB GPUs. See [examples](examples/README.md) for usage. -[24/03/13] We supported **[LoRA+](https://arxiv.org/abs/2402.12354)**. See `examples/extras/loraplus` for usage. +[24/03/13] We supported **[LoRA+](https://arxiv.org/abs/2402.12354)**. See [examples](examples/README.md) for usage. -[24/03/07] We supported gradient low-rank projection (**[GaLore](https://arxiv.org/abs/2403.03507)**) algorithm. See `examples/extras/galore` for usage. +[24/03/07] We supported gradient low-rank projection (**[GaLore](https://arxiv.org/abs/2403.03507)**) algorithm. See [examples](examples/README.md) for usage. -[24/03/07] We integrated **[vLLM](https://github.com/vllm-project/vllm)** for faster and concurrent inference. Try `--infer_backend vllm` to enjoy **270%** inference speed. (LoRA is not yet supported, merge it first.) +[24/03/07] We integrated **[vLLM](https://github.com/vllm-project/vllm)** for faster and concurrent inference. Try `infer_backend: vllm` to enjoy **270%** inference speed. -[24/02/28] We supported weight-decomposed LoRA (**[DoRA](https://arxiv.org/abs/2402.09353)**). Try `--use_dora` to activate DoRA training. +[24/02/28] We supported weight-decomposed LoRA (**[DoRA](https://arxiv.org/abs/2402.09353)**). Try `use_dora: true` to activate DoRA training. -[24/02/15] We supported **block expansion** proposed by [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro). See `examples/extras/llama_pro` for usage. +[24/02/15] We supported **block expansion** proposed by [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro). See [examples](examples/README.md) for usage. [24/02/05] Qwen1.5 (Qwen2 beta version) series models are supported in LLaMA-Factory. Check this [blog post](https://qwenlm.github.io/blog/qwen1.5/) for details. -[24/01/18] We supported **agent tuning** for most models, equipping model with tool using abilities by fine-tuning with `--dataset glaive_toolcall`. +[24/01/18] We supported **agent tuning** for most models, equipping model with tool using abilities by fine-tuning with `dataset: glaive_toolcall`. -[23/12/23] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s implementation to boost LoRA tuning for the LLaMA, Mistral and Yi models. Try `--use_unsloth` argument to activate unsloth patch. It achieves **170%** speed in our benchmark, check [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison) for details. +[23/12/23] We supported **[unsloth](https://github.com/unslothai/unsloth)**'s implementation to boost LoRA tuning for the LLaMA, Mistral and Yi models. Try `use_unsloth: true` argument to activate unsloth patch. It achieves **170%** speed in our benchmark, check [this page](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison) for details. [23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement). -[23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)** for Chinese mainland users. See [this tutorial](#use-modelscope-hub-optional) for usage. +[23/12/01] We supported downloading pre-trained models and datasets from the **[ModelScope Hub](https://modelscope.cn/models)** for Chinese mainland users. See [this tutorial](#download-from-modelscope-hub) for usage. -[23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `--neftune_noise_alpha` argument to activate NEFTune, e.g., `--neftune_noise_alpha 5`. +[23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `neftune_noise_alpha: 5` argument to activate NEFTune. -[23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA) for the LLaMA models. Try `--shift_attn` argument to enable shift short attention. +[23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA) for the LLaMA models. Try `shift_attn: true` argument to enable shift short attention. -[23/09/23] We integrated MMLU, C-Eval and CMMLU benchmarks in this repo. See [this example](#evaluation) to evaluate your models. +[23/09/23] We integrated MMLU, C-Eval and CMMLU benchmarks in this repo. See [examples](examples/README.md) for usage. -[23/09/10] We supported **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**. Try `--flash_attn fa2` argument to enable FlashAttention-2 if you are using RTX4090, A100 or H100 GPUs. +[23/09/10] We supported **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**. Try `flash_attn: fa2` argument to enable FlashAttention-2 if you are using RTX4090, A100 or H100 GPUs. -[23/08/12] We supported **RoPE scaling** to extend the context length of the LLaMA models. Try `--rope_scaling linear` argument in training and `--rope_scaling dynamic` argument at inference to extrapolate the position embeddings. +[23/08/12] We supported **RoPE scaling** to extend the context length of the LLaMA models. Try `rope_scaling: linear` argument in training and `rope_scaling: dynamic` argument at inference to extrapolate the position embeddings. -[23/08/11] We supported **[DPO training](https://arxiv.org/abs/2305.18290)** for instruction-tuned models. See [this example](#dpo-training) to train your models. +[23/08/11] We supported **[DPO training](https://arxiv.org/abs/2305.18290)** for instruction-tuned models. See [examples](examples/README.md) for usage. -[23/07/31] We supported **dataset streaming**. Try `--streaming` and `--max_steps 10000` arguments to load your dataset in streaming mode. +[23/07/31] We supported **dataset streaming**. Try `streaming: true` and `max_steps: 10000` arguments to load your dataset in streaming mode. [23/07/29] We released two instruction-tuned 13B models at Hugging Face. See these Hugging Face Repos ([LLaMA-2](https://huggingface.co/hiyouga/Llama-2-Chinese-13b-chat) / [Baichuan](https://huggingface.co/hiyouga/Baichuan-13B-sft)) for details. @@ -132,7 +134,7 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/ [23/06/22] We aligned the [demo API](src/api_demo.py) with the [OpenAI's](https://platform.openai.com/docs/api-reference/chat) format where you can insert the fine-tuned model in **arbitrary ChatGPT-based applications**. -[23/06/03] We supported quantized training and inference (aka **[QLoRA](https://github.com/artidoro/qlora)**). Try `--quantization_bit 4/8` argument to work with quantized models. +[23/06/03] We supported quantized training and inference (aka **[QLoRA](https://github.com/artidoro/qlora)**). See [examples](examples/README.md) for usage.
diff --git a/README_zh.md b/README_zh.md index 8912d5e1..1d15515e 100644 --- a/README_zh.md +++ b/README_zh.md @@ -70,57 +70,59 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd ## 更新日志 -[24/04/26] 我们支持了多模态模型 **LLaVA-1.5** 的微调。详细用法请参照 `examples/lora_single_gpu/sft_mllm.sh`。 +[24/05/13] 我们支持了 Yi-1.5 系列模型的微调。 + +[24/04/26] 我们支持了多模态模型 **LLaVA-1.5** 的微调。详细用法请参照 [examples](examples/README_zh.md)。 [24/04/22] 我们提供了在免费 T4 GPU 上微调 Llama-3 模型的 **[Colab 笔记本](https://colab.research.google.com/drive/1d5KQtbemerlSDSxZIfAaWXhKr30QypiK?usp=sharing)**。Hugging Face 社区公开了两个利用 LLaMA Factory 微调的 Llama-3 模型,详情请见 [Llama3-8B-Chinese-Chat](https://huggingface.co/shenzhi-wang/Llama3-8B-Chinese-Chat) 和 [Llama3-Chinese](https://huggingface.co/zhichen/Llama3-Chinese)。 -[24/04/21] 我们基于 [AstraMindAI 的仓库](https://github.com/astramind-ai/Mixture-of-depths)支持了 **[混合深度训练](https://arxiv.org/abs/2404.02258)**。详细用法请参照 `examples/extras/mod`。 +
展开日志 -[24/04/16] 我们支持了 **[BAdam](https://arxiv.org/abs/2404.02827)**。详细用法请参照 `examples/extras/badam`。 +[24/04/21] 我们基于 [AstraMindAI 的仓库](https://github.com/astramind-ai/Mixture-of-depths)支持了 **[混合深度训练](https://arxiv.org/abs/2404.02258)**。详细用法请参照 [examples](examples/README_zh.md)。 + +[24/04/16] 我们支持了 **[BAdam](https://arxiv.org/abs/2404.02827)**。详细用法请参照 [examples](examples/README_zh.md)。 [24/04/16] 我们支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的长序列训练(24GB 可训练 Llama-2-7B-56k)。该方法相比 FlashAttention-2 提供了 **117%** 的训练速度和 **50%** 的显存节约。更多数据请见[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。 -
展开日志 - -[24/03/31] 我们支持了 **[ORPO](https://arxiv.org/abs/2403.07691)**。详细用法请参照 `examples/lora_single_gpu`。 +[24/03/31] 我们支持了 **[ORPO](https://arxiv.org/abs/2403.07691)**。详细用法请参照 [examples](examples/README_zh.md)。 [24/03/21] 我们的论文 "[LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models](https://arxiv.org/abs/2403.13372)" 可在 arXiv 上查看! -[24/03/20] 我们支持了能在 2x24GB GPU 上微调 70B 模型的 **FSDP+QLoRA**。详细用法请参照 `examples/extras/fsdp_qlora`。 +[24/03/20] 我们支持了能在 2x24GB GPU 上微调 70B 模型的 **FSDP+QLoRA**。详细用法请参照 [examples](examples/README_zh.md)。 -[24/03/13] 我们支持了 **[LoRA+](https://arxiv.org/abs/2402.12354)**。详细用法请参照 `examples/extras/loraplus`。 +[24/03/13] 我们支持了 **[LoRA+](https://arxiv.org/abs/2402.12354)**。详细用法请参照 [examples](examples/README_zh.md)。 -[24/03/07] 我们支持了梯度低秩投影(**[GaLore](https://arxiv.org/abs/2403.03507)**)算法。详细用法请参照 `examples/extras/galore`。 +[24/03/07] 我们支持了梯度低秩投影(**[GaLore](https://arxiv.org/abs/2403.03507)**)算法。详细用法请参照 [examples](examples/README_zh.md)。 -[24/03/07] 我们集成了 **[vLLM](https://github.com/vllm-project/vllm)** 以实现极速并发推理。请使用 `--infer_backend vllm` 来获得 **270%** 的推理速度。(尚不支持 LoRA,请先合并权重。) +[24/03/07] 我们集成了 **[vLLM](https://github.com/vllm-project/vllm)** 以实现极速并发推理。请使用 `infer_backend: vllm` 来获得 **270%** 的推理速度。 -[24/02/28] 我们支持了 **[DoRA](https://arxiv.org/abs/2402.09353)** 微调。请使用 `--use_dora` 参数进行 DoRA 微调。 +[24/02/28] 我们支持了 **[DoRA](https://arxiv.org/abs/2402.09353)** 微调。请使用 `use_dora: true` 参数进行 DoRA 微调。 -[24/02/15] 我们支持了 [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro) 提出的**块扩展**方法。详细用法请参照 `examples/extras/llama_pro`。 +[24/02/15] 我们支持了 [LLaMA Pro](https://github.com/TencentARC/LLaMA-Pro) 提出的**块扩展**方法。详细用法请参照 [examples](examples/README_zh.md)。 [24/02/05] Qwen1.5(Qwen2 测试版)系列模型已在 LLaMA-Factory 中实现微调支持。详情请查阅该[博客页面](https://qwenlm.github.io/zh/blog/qwen1.5/)。 -[24/01/18] 我们针对绝大多数模型实现了 **Agent 微调**,微调时指定 `--dataset glaive_toolcall` 即可使模型获得工具调用能力。 +[24/01/18] 我们针对绝大多数模型实现了 **Agent 微调**,微调时指定 `dataset: glaive_toolcall` 即可使模型获得工具调用能力。 -[23/12/23] 我们针对 LLaMA, Mistral 和 Yi 模型支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的 LoRA 训练加速。请使用 `--use_unsloth` 参数启用 unsloth 优化。该方法可提供 **170%** 的训练速度,详情请查阅[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。 +[23/12/23] 我们针对 LLaMA, Mistral 和 Yi 模型支持了 **[unsloth](https://github.com/unslothai/unsloth)** 的 LoRA 训练加速。请使用 `use_unsloth: true` 参数启用 unsloth 优化。该方法可提供 **170%** 的训练速度,详情请查阅[此页面](https://github.com/hiyouga/LLaMA-Factory/wiki/Performance-comparison)。 [23/12/12] 我们支持了微调最新的混合专家模型 **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)**。硬件需求请查阅[此处](#硬件依赖)。 -[23/12/01] 我们支持了从 **[魔搭社区](https://modelscope.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#使用魔搭社区可跳过)。 +[23/12/01] 我们支持了从 **[魔搭社区](https://modelscope.cn/models)** 下载预训练模型和数据集。详细用法请参照 [此教程](#从魔搭社区下载)。 -[23/10/21] 我们支持了 **[NEFTune](https://arxiv.org/abs/2310.05914)** 训练技巧。请使用 `--neftune_noise_alpha` 参数启用 NEFTune,例如 `--neftune_noise_alpha 5`。 +[23/10/21] 我们支持了 **[NEFTune](https://arxiv.org/abs/2310.05914)** 训练技巧。请使用 `neftune_noise_alpha: 5` 参数启用 NEFTune。 -[23/09/27] 我们针对 LLaMA 模型支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `--shift_attn` 参数以启用该功能。 +[23/09/27] 我们针对 LLaMA 模型支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `shift_attn: true` 参数以启用该功能。 -[23/09/23] 我们在项目中集成了 MMLU、C-Eval 和 CMMLU 评估集。使用方法请参阅[此示例](#模型评估)。 +[23/09/23] 我们在项目中集成了 MMLU、C-Eval 和 CMMLU 评估集。详细用法请参照 [examples](examples/README_zh.md)。 -[23/09/10] 我们支持了 **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**。如果您使用的是 RTX4090、A100 或 H100 GPU,请使用 `--flash_attn fa2` 参数以启用 FlashAttention-2。 +[23/09/10] 我们支持了 **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**。如果您使用的是 RTX4090、A100 或 H100 GPU,请使用 `flash_attn: fa2` 参数以启用 FlashAttention-2。 -[23/08/12] 我们支持了 **RoPE 插值**来扩展 LLaMA 模型的上下文长度。请使用 `--rope_scaling linear` 参数训练模型或使用 `--rope_scaling dynamic` 参数评估模型。 +[23/08/12] 我们支持了 **RoPE 插值**来扩展 LLaMA 模型的上下文长度。请使用 `rope_scaling: linear` 参数训练模型或使用 `rope_scaling: dynamic` 参数评估模型。 -[23/08/11] 我们支持了指令模型的 **[DPO 训练](https://arxiv.org/abs/2305.18290)**。使用方法请参阅[此示例](#dpo-训练)。 +[23/08/11] 我们支持了指令模型的 **[DPO 训练](https://arxiv.org/abs/2305.18290)**。详细用法请参照 [examples](examples/README_zh.md)。 -[23/07/31] 我们支持了**数据流式加载**。请使用 `--streaming` 和 `--max_steps 10000` 参数来流式加载数据集。 +[23/07/31] 我们支持了**数据流式加载**。请使用 `streaming: true` 和 `max_steps: 10000` 参数来流式加载数据集。 [23/07/29] 我们在 Hugging Face 发布了两个 13B 指令微调模型。详细内容请查阅我们的 Hugging Face 项目([LLaMA-2](https://huggingface.co/hiyouga/Llama-2-Chinese-13b-chat) / [Baichuan](https://huggingface.co/hiyouga/Baichuan-13B-sft))。 @@ -132,7 +134,7 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/ec36a9dd-37f4-4f72-81bd [23/06/22] 我们对齐了[示例 API](src/api_demo.py) 与 [OpenAI API](https://platform.openai.com/docs/api-reference/chat) 的格式,您可以将微调模型接入**任意基于 ChatGPT 的应用**中。 -[23/06/03] 我们实现了 4 比特的 LoRA 训练(也称 **[QLoRA](https://github.com/artidoro/qlora)**)。请使用 `--quantization_bit 4` 参数进行 4 比特量化微调。 +[23/06/03] 我们实现了 4 比特的 LoRA 训练(也称 **[QLoRA](https://github.com/artidoro/qlora)**)。详细用法请参照 [examples](examples/README_zh.md)。
diff --git a/src/llmtuner/api/chat.py b/src/llmtuner/api/chat.py index 76ddc88d..b7a08f0b 100644 --- a/src/llmtuner/api/chat.py +++ b/src/llmtuner/api/chat.py @@ -21,8 +21,6 @@ from .protocol import ( ) -logger = get_logger(__name__) - if is_fastapi_available(): from fastapi import HTTPException, status @@ -32,6 +30,7 @@ if TYPE_CHECKING: from .protocol import ChatCompletionRequest, ScoreEvaluationRequest +logger = get_logger(__name__) ROLE_MAPPING = { Role.USER: DataRole.USER.value, Role.ASSISTANT: DataRole.ASSISTANT.value, @@ -42,8 +41,7 @@ ROLE_MAPPING = { def _process_request(request: "ChatCompletionRequest") -> Tuple[List[Dict[str, str]], str, str]: - params = dictify(request) - logger.info(f"==== request ====\n{params}") + logger.info("==== request ====\n{}".format(json.dumps(dictify(request), indent=2, ensure_ascii=False))) if len(request.messages) == 0: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid length") diff --git a/src/llmtuner/api/common.py b/src/llmtuner/api/common.py index 3e95d211..5ad9a071 100644 --- a/src/llmtuner/api/common.py +++ b/src/llmtuner/api/common.py @@ -6,11 +6,11 @@ if TYPE_CHECKING: from pydantic import BaseModel -def dictify(data: "BaseModel", **kwargs) -> Dict[str, Any]: +def dictify(data: "BaseModel") -> Dict[str, Any]: try: # pydantic v2 - return data.model_dump(**kwargs) + return data.model_dump(exclude_unset=True) except AttributeError: # pydantic v1 - return data.dict(**kwargs) + return data.dict(exclude_unset=True) def jsonify(data: "BaseModel") -> str: diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py index ada6cfcd..f716102f 100644 --- a/src/llmtuner/data/template.py +++ b/src/llmtuner/data/template.py @@ -308,7 +308,7 @@ def _get_jinja_template(template: "Template", tokenizer: "PreTrainedTokenizer") jinja_template += "{% set system_message = '" + _jinja_escape(template.default_system) + "' %}" jinja_template += ( - "{% if messages[0]['role'] == 'system' %}" "{% set system_message = messages[0]['content'] %}" "{% endif %}" + "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}" ) system_message = _convert_slots_to_jinja(template.format_system.apply(), tokenizer, placeholder="system_message") From 68540734fb4af9e11963bb6a6fe8cade92cde020 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Mon, 13 May 2024 20:09:09 +0800 Subject: [PATCH 13/14] fix #3724 Former-commit-id: 93a02454746bd39e8f284e94a18bead80f545dae --- src/llmtuner/model/utils/longlora.py | 29 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/llmtuner/model/utils/longlora.py b/src/llmtuner/model/utils/longlora.py index a11351f1..c8dc52f5 100644 --- a/src/llmtuner/model/utils/longlora.py +++ b/src/llmtuner/model/utils/longlora.py @@ -41,9 +41,9 @@ def llama_attention_forward( ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: bsz, q_len, _ = hidden_states.size() - query_states = self.q_proj(hidden_states) - key_states = self.k_proj(hidden_states) - value_states = self.v_proj(hidden_states) + query_states: "torch.Tensor" = self.q_proj(hidden_states) + key_states: "torch.Tensor" = self.k_proj(hidden_states) + value_states: "torch.Tensor" = self.v_proj(hidden_states) query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) @@ -87,7 +87,7 @@ def llama_attention_forward( # upcast attention to fp32 attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training) - attn_output = torch.matmul(attn_weights, value_states) # (bsz, :, seq_len, :) or (bsz*n_group, :, groupsz, :) + attn_output = torch.matmul(attn_weights, value_states) # (bsz, :, seq_len, :) or (bsz * n_group, :, groupsz, :) attn_output = attn_output.transpose(1, 2).contiguous() if getattr(self.config, "group_size_ratio", None) and self.training: # shift back @@ -125,9 +125,9 @@ def llama_flash_attention_2_forward( bsz, q_len, _ = hidden_states.size() - query_states = self.q_proj(hidden_states) - key_states = self.k_proj(hidden_states) - value_states = self.v_proj(hidden_states) + query_states: "torch.Tensor" = self.q_proj(hidden_states) + key_states: "torch.Tensor" = self.k_proj(hidden_states) + value_states: "torch.Tensor" = self.v_proj(hidden_states) query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) @@ -233,9 +233,9 @@ def llama_sdpa_attention_forward( bsz, q_len, _ = hidden_states.size() - query_states = self.q_proj(hidden_states) - key_states = self.k_proj(hidden_states) - value_states = self.v_proj(hidden_states) + query_states: "torch.Tensor" = self.q_proj(hidden_states) + key_states: "torch.Tensor" = self.k_proj(hidden_states) + value_states: "torch.Tensor" = self.v_proj(hidden_states) query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) @@ -270,11 +270,12 @@ def llama_sdpa_attention_forward( causal_mask = attention_mask if attention_mask is not None: - causal_mask = causal_mask[:, :, :, :groupsz] + causal_mask = causal_mask[:, :, :, : key_states.shape[-2]] - query_states = query_states.contiguous() - key_states = key_states.contiguous() - value_states = value_states.contiguous() + if query_states.device.type == "cuda" and causal_mask is not None: + query_states = query_states.contiguous() + key_states = key_states.contiguous() + value_states = value_states.contiguous() attn_output = torch.nn.functional.scaled_dot_product_attention( query_states, From 3318b6e188fea0c9484a43f1747338f1fd032156 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Mon, 13 May 2024 20:39:36 +0800 Subject: [PATCH 14/14] update examples Former-commit-id: dae83f419919305cb23bb2b9da1277a1616179c5 --- examples/README.md | 12 ++++++------ examples/README_zh.md | 12 ++++++------ examples/extras/badam/llama3_lora_sft.yaml | 2 +- examples/extras/fsdp_qlora/llama3_lora_sft.yaml | 5 ++++- examples/extras/galore/llama3_full_sft.yaml | 2 +- examples/extras/llama_pro/llama3_freeze_sft.yaml | 4 ++-- examples/extras/loraplus/llama3_lora_sft.yaml | 4 ++-- examples/extras/mod/llama3_full_sft.yaml | 2 +- examples/full_multi_gpu/llama3_full_sft.yaml | 2 +- examples/lora_multi_gpu/llama3_lora_sft.yaml | 2 +- examples/lora_multi_gpu/llama3_lora_sft_ds.yaml | 2 +- examples/lora_single_gpu/llama3_lora_dpo.yaml | 2 +- examples/lora_single_gpu/llama3_lora_orpo.yaml | 2 +- examples/lora_single_gpu/llama3_lora_pretrain.yaml | 2 +- examples/lora_single_gpu/llama3_lora_reward.yaml | 2 +- examples/lora_single_gpu/llama3_lora_sft.yaml | 2 +- examples/lora_single_gpu/llama3_preprocess.yaml | 1 - examples/lora_single_gpu/llava1_5_lora_sft.yaml | 2 +- examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml | 2 +- examples/qlora_single_gpu/llama3_lora_sft_awq.yaml | 2 +- .../llama3_lora_sft_bitsandbytes.yaml | 5 +---- examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml | 2 +- 22 files changed, 36 insertions(+), 37 deletions(-) diff --git a/examples/README.md b/examples/README.md index ce19f9d1..0838314a 100644 --- a/examples/README.md +++ b/examples/README.md @@ -28,6 +28,12 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml ``` +#### Multimodal Supervised Fine-Tuning + +```bash +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml +``` + #### Reward Modeling ```bash @@ -52,12 +58,6 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_orpo.yaml ``` -#### Multimodal Supervised Fine-Tuning - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml -``` - #### Preprocess Dataset It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset. diff --git a/examples/README_zh.md b/examples/README_zh.md index 91bdcda9..7fe43954 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -28,6 +28,12 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml ``` +#### 多模态指令监督微调 + +```bash +CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml +``` + #### 奖励模型训练 ```bash @@ -52,12 +58,6 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_orpo.yaml ``` -#### 多模态指令监督微调 - -```bash -CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml -``` - #### 预处理数据集 对于大数据集有帮助,在配置中使用 `tokenized_path` 以加载预处理后的数据集。 diff --git a/examples/extras/badam/llama3_lora_sft.yaml b/examples/extras/badam/llama3_lora_sft.yaml index 9f1f1976..5e8994bc 100644 --- a/examples/extras/badam/llama3_lora_sft.yaml +++ b/examples/extras/badam/llama3_lora_sft.yaml @@ -15,7 +15,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -36,6 +35,7 @@ warmup_steps: 0.1 pure_bf16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml index 64bf1356..1fd8f16a 100644 --- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml +++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml @@ -8,12 +8,14 @@ do_train: true finetuning_type: lora lora_target: q_proj,v_proj +# ddp +ddp_timeout: 180000000 + # dataset dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -34,6 +36,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/extras/galore/llama3_full_sft.yaml b/examples/extras/galore/llama3_full_sft.yaml index 5aec8af9..3bc074c5 100644 --- a/examples/extras/galore/llama3_full_sft.yaml +++ b/examples/extras/galore/llama3_full_sft.yaml @@ -16,7 +16,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -37,6 +36,7 @@ warmup_steps: 0.1 pure_bf16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml index a54be8b8..4d92cdad 100644 --- a/examples/extras/llama_pro/llama3_freeze_sft.yaml +++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml @@ -14,7 +14,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -32,9 +31,10 @@ learning_rate: 0.0001 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_steps: 0.1 -pure_bf16: true +fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml index dfb7058b..0956aa71 100644 --- a/examples/extras/loraplus/llama3_lora_sft.yaml +++ b/examples/extras/loraplus/llama3_lora_sft.yaml @@ -13,7 +13,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -31,9 +30,10 @@ learning_rate: 0.0001 num_train_epochs: 3.0 lr_scheduler_type: cosine warmup_steps: 0.1 -pure_bf16: true +fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/extras/mod/llama3_full_sft.yaml b/examples/extras/mod/llama3_full_sft.yaml index 5f80521d..5dc8c061 100644 --- a/examples/extras/mod/llama3_full_sft.yaml +++ b/examples/extras/mod/llama3_full_sft.yaml @@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -34,6 +33,7 @@ warmup_steps: 0.1 pure_bf16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/full_multi_gpu/llama3_full_sft.yaml b/examples/full_multi_gpu/llama3_full_sft.yaml index ef35e441..2d8031f1 100644 --- a/examples/full_multi_gpu/llama3_full_sft.yaml +++ b/examples/full_multi_gpu/llama3_full_sft.yaml @@ -15,7 +15,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -36,6 +35,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml index d9690679..6cc06f8a 100644 --- a/examples/lora_multi_gpu/llama3_lora_sft.yaml +++ b/examples/lora_multi_gpu/llama3_lora_sft.yaml @@ -15,7 +15,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -36,6 +35,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml index 26955167..5a7348c1 100644 --- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml +++ b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml @@ -16,7 +16,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -37,6 +36,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/lora_single_gpu/llama3_lora_dpo.yaml index f71f752d..16c6d0c9 100644 --- a/examples/lora_single_gpu/llama3_lora_dpo.yaml +++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml @@ -13,7 +13,6 @@ dataset: orca_rlhf template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -34,6 +33,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_lora_orpo.yaml b/examples/lora_single_gpu/llama3_lora_orpo.yaml index 5d78d260..bc42bdd4 100644 --- a/examples/lora_single_gpu/llama3_lora_orpo.yaml +++ b/examples/lora_single_gpu/llama3_lora_orpo.yaml @@ -12,7 +12,6 @@ dataset: orca_rlhf template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -33,6 +32,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/lora_single_gpu/llama3_lora_pretrain.yaml index 64245b71..48425b15 100644 --- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml +++ b/examples/lora_single_gpu/llama3_lora_pretrain.yaml @@ -11,7 +11,6 @@ lora_target: q_proj,v_proj dataset: c4_demo cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -32,6 +31,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/lora_single_gpu/llama3_lora_reward.yaml index f190f4ac..ecaf8d72 100644 --- a/examples/lora_single_gpu/llama3_lora_reward.yaml +++ b/examples/lora_single_gpu/llama3_lora_reward.yaml @@ -12,7 +12,6 @@ dataset: orca_rlhf template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -33,6 +32,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/lora_single_gpu/llama3_lora_sft.yaml index f99df305..0e5e30b3 100644 --- a/examples/lora_single_gpu/llama3_lora_sft.yaml +++ b/examples/lora_single_gpu/llama3_lora_sft.yaml @@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -33,6 +32,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/lora_single_gpu/llama3_preprocess.yaml b/examples/lora_single_gpu/llama3_preprocess.yaml index 0b3dc599..4c45c1cd 100644 --- a/examples/lora_single_gpu/llama3_preprocess.yaml +++ b/examples/lora_single_gpu/llama3_preprocess.yaml @@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 tokenized_path: saves/llama3-8b/dataset/sft diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/lora_single_gpu/llava1_5_lora_sft.yaml index 96c2701a..84d2a672 100644 --- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml +++ b/examples/lora_single_gpu/llava1_5_lora_sft.yaml @@ -13,7 +13,6 @@ dataset: mllm_demo template: vicuna cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -34,6 +33,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml index 11f1d277..a1d5f95d 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml @@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -33,6 +32,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml index 4b070d45..8941d6b2 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml @@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -33,6 +32,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml index 7bc31bde..885fcd83 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml @@ -8,15 +8,11 @@ do_train: true finetuning_type: lora lora_target: q_proj,v_proj -# ddp -ddp_timeout: 180000000 - # dataset dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -37,6 +33,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500 diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml index 2f8cfe45..87a404a0 100644 --- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml +++ b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml @@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en template: llama3 cutoff_len: 1024 max_samples: 1000 -val_size: 0.1 overwrite_cache: true preprocessing_num_workers: 16 @@ -33,6 +32,7 @@ warmup_steps: 0.1 fp16: true # eval +val_size: 0.1 per_device_eval_batch_size: 1 evaluation_strategy: steps eval_steps: 500