mirror of
				https://github.com/hiyouga/LLaMA-Factory.git
				synced 2025-11-04 18:02:19 +08:00 
			
		
		
		
	[deps] upgrade vllm (#6857)
Former-commit-id: 4bd50f65a3d62528768561019fda2723d045c7fd
This commit is contained in:
		
							parent
							
								
									528e06ccaa
								
							
						
					
					
						commit
						4d1791e905
					
				@ -1,5 +1,5 @@
 | 
			
		||||
transformers>=4.41.2,<=4.45.2;python_version<'3.10'
 | 
			
		||||
transformers>=4.41.2,<=4.48.2,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10'
 | 
			
		||||
transformers>=4.41.2,<=4.48.3,!=4.46.*,!=4.47.*,!=4.48.0,!=4.48.1,!=4.48.2;python_version<'3.10'
 | 
			
		||||
transformers>=4.41.2,<=4.48.3,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10'
 | 
			
		||||
datasets>=2.16.0,<=3.2.0
 | 
			
		||||
accelerate>=0.34.0,<=1.2.1
 | 
			
		||||
peft>=0.11.1,<=0.12.0
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										4
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								setup.py
									
									
									
									
									
								
							@ -36,7 +36,7 @@ def get_requires() -> List[str]:
 | 
			
		||||
 | 
			
		||||
def get_console_scripts() -> List[str]:
 | 
			
		||||
    console_scripts = ["llamafactory-cli = llamafactory.cli:main"]
 | 
			
		||||
    if os.environ.get("ENABLE_SHORT_CONSOLE", "1").lower() in ["true", "1"]:
 | 
			
		||||
    if os.getenv("ENABLE_SHORT_CONSOLE", "1").lower() in ["true", "y", "1"]:
 | 
			
		||||
        console_scripts.append("lmf = llamafactory.cli:main")
 | 
			
		||||
 | 
			
		||||
    return console_scripts
 | 
			
		||||
@ -54,7 +54,7 @@ extra_require = {
 | 
			
		||||
    "gptq": ["optimum>=1.17.0", "auto-gptq>=0.5.0"],
 | 
			
		||||
    "awq": ["autoawq"],
 | 
			
		||||
    "aqlm": ["aqlm[gpu]>=1.1.0"],
 | 
			
		||||
    "vllm": ["vllm>=0.4.3,<=0.6.5"],
 | 
			
		||||
    "vllm": ["vllm>=0.4.3,<=0.7.2"],
 | 
			
		||||
    "galore": ["galore-torch"],
 | 
			
		||||
    "apollo": ["apollo-torch"],
 | 
			
		||||
    "badam": ["badam>=1.2.1"],
 | 
			
		||||
 | 
			
		||||
@ -20,7 +20,7 @@ Level:
 | 
			
		||||
 | 
			
		||||
Dependency graph:
 | 
			
		||||
  main:
 | 
			
		||||
    transformers>=4.41.2,<=4.48.2,!=4.46.*,!=4.47.*,!=4.48.0
 | 
			
		||||
    transformers>=4.41.2,<=4.48.3,!=4.46.*,!=4.47.*,!=4.48.0
 | 
			
		||||
    datasets>=2.16.0,<=3.2.0
 | 
			
		||||
    accelerate>=0.34.0,<=1.2.1
 | 
			
		||||
    peft>=0.11.1,<=0.12.0
 | 
			
		||||
@ -30,7 +30,7 @@ Dependency graph:
 | 
			
		||||
  longlora:
 | 
			
		||||
    transformers>=4.41.2,<4.48.0
 | 
			
		||||
  packing:
 | 
			
		||||
    transformers>=4.43.0,<=4.48.2
 | 
			
		||||
    transformers>=4.43.0
 | 
			
		||||
 | 
			
		||||
Disable version checking: DISABLE_VERSION_CHECK=1
 | 
			
		||||
Enable VRAM recording: RECORD_VRAM=1
 | 
			
		||||
 | 
			
		||||
@ -24,7 +24,7 @@ from .chat.chat_model import run_chat
 | 
			
		||||
from .eval.evaluator import run_eval
 | 
			
		||||
from .extras import logging
 | 
			
		||||
from .extras.env import VERSION, print_env
 | 
			
		||||
from .extras.misc import get_device_count, use_ray
 | 
			
		||||
from .extras.misc import get_device_count, is_env_enabled, use_ray
 | 
			
		||||
from .train.tuner import export_model, run_exp
 | 
			
		||||
from .webui.interface import run_web_demo, run_web_ui
 | 
			
		||||
 | 
			
		||||
@ -86,7 +86,7 @@ def main():
 | 
			
		||||
    elif command == Command.EXPORT:
 | 
			
		||||
        export_model()
 | 
			
		||||
    elif command == Command.TRAIN:
 | 
			
		||||
        force_torchrun = os.getenv("FORCE_TORCHRUN", "0").lower() in ["true", "1"]
 | 
			
		||||
        force_torchrun = is_env_enabled("FORCE_TORCHRUN")
 | 
			
		||||
        if force_torchrun or (get_device_count() > 1 and not use_ray()):
 | 
			
		||||
            master_addr = os.getenv("MASTER_ADDR", "127.0.0.1")
 | 
			
		||||
            master_port = os.getenv("MASTER_PORT", str(random.randint(20001, 29999)))
 | 
			
		||||
 | 
			
		||||
@ -45,6 +45,8 @@ def print_env() -> None:
 | 
			
		||||
    if is_torch_cuda_available():
 | 
			
		||||
        info["PyTorch version"] += " (GPU)"
 | 
			
		||||
        info["GPU type"] = torch.cuda.get_device_name()
 | 
			
		||||
        info["GPU number"] = torch.cuda.device_count()
 | 
			
		||||
        info["GPU memory"] = f"{torch.cuda.mem_get_info()[1] / (1024**3):.2f}GB"
 | 
			
		||||
 | 
			
		||||
    if is_torch_npu_available():
 | 
			
		||||
        info["PyTorch version"] += " (NPU)"
 | 
			
		||||
@ -59,7 +61,7 @@ def print_env() -> None:
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        import bitsandbytes
 | 
			
		||||
        import bitsandbytes  # type: ignore
 | 
			
		||||
 | 
			
		||||
        info["Bitsandbytes version"] = bitsandbytes.__version__
 | 
			
		||||
    except Exception:
 | 
			
		||||
 | 
			
		||||
@ -78,7 +78,7 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
 | 
			
		||||
    r"""
 | 
			
		||||
    Optionally checks the package version.
 | 
			
		||||
    """
 | 
			
		||||
    if os.getenv("DISABLE_VERSION_CHECK", "0").lower() in ["true", "1"] and not mandatory:
 | 
			
		||||
    if is_env_enabled("DISABLE_VERSION_CHECK") and not mandatory:
 | 
			
		||||
        logger.warning_rank0_once("Version checking has been disabled, may lead to unexpected behaviors.")
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
@ -94,7 +94,7 @@ def check_dependencies() -> None:
 | 
			
		||||
    r"""
 | 
			
		||||
    Checks the version of the required packages.
 | 
			
		||||
    """
 | 
			
		||||
    check_version("transformers>=4.41.2,<=4.48.2,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
 | 
			
		||||
    check_version("transformers>=4.41.2,<=4.48.3,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
 | 
			
		||||
    check_version("datasets>=2.16.0,<=3.2.0")
 | 
			
		||||
    check_version("accelerate>=0.34.0,<=1.2.1")
 | 
			
		||||
    check_version("peft>=0.11.1,<=0.12.0")
 | 
			
		||||
@ -226,6 +226,13 @@ def is_gpu_or_npu_available() -> bool:
 | 
			
		||||
    return is_torch_npu_available() or is_torch_cuda_available()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def is_env_enabled(env_var: str, default: str = "0") -> bool:
 | 
			
		||||
    r"""
 | 
			
		||||
    Checks if the environment variable is enabled.
 | 
			
		||||
    """
 | 
			
		||||
    return os.getenv(env_var, default).lower() in ["true", "y", "1"]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def numpify(inputs: Union["NDArray", "torch.Tensor"]) -> "NDArray":
 | 
			
		||||
    r"""
 | 
			
		||||
    Casts a torch tensor or a numpy array to a numpy array.
 | 
			
		||||
@ -244,7 +251,7 @@ def skip_check_imports() -> None:
 | 
			
		||||
    r"""
 | 
			
		||||
    Avoids flash attention import error in custom model files.
 | 
			
		||||
    """
 | 
			
		||||
    if os.getenv("FORCE_CHECK_IMPORTS", "0").lower() not in ["true", "1"]:
 | 
			
		||||
    if not is_env_enabled("FORCE_CHECK_IMPORTS"):
 | 
			
		||||
        transformers.dynamic_module_utils.check_imports = get_relative_imports
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -290,12 +297,12 @@ def try_download_model_from_other_hub(model_args: "ModelArguments") -> str:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def use_modelscope() -> bool:
 | 
			
		||||
    return os.getenv("USE_MODELSCOPE_HUB", "0").lower() in ["true", "1"]
 | 
			
		||||
    return is_env_enabled("USE_MODELSCOPE_HUB")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def use_openmind() -> bool:
 | 
			
		||||
    return os.getenv("USE_OPENMIND_HUB", "0").lower() in ["true", "1"]
 | 
			
		||||
    return is_env_enabled("USE_OPENMIND_HUB")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def use_ray() -> bool:
 | 
			
		||||
    return os.getenv("USE_RAY", "0").lower() in ["true", "1"]
 | 
			
		||||
    return is_env_enabled("USE_RAY")
 | 
			
		||||
 | 
			
		||||
@ -32,7 +32,7 @@ from transformers.utils import is_torch_bf16_gpu_available, is_torch_npu_availab
 | 
			
		||||
 | 
			
		||||
from ..extras import logging
 | 
			
		||||
from ..extras.constants import CHECKPOINT_NAMES
 | 
			
		||||
from ..extras.misc import check_dependencies, check_version, get_current_device
 | 
			
		||||
from ..extras.misc import check_dependencies, check_version, get_current_device, is_env_enabled
 | 
			
		||||
from .data_args import DataArguments
 | 
			
		||||
from .evaluation_args import EvaluationArguments
 | 
			
		||||
from .finetuning_args import FinetuningArguments
 | 
			
		||||
@ -136,7 +136,7 @@ def _check_extra_dependencies(
 | 
			
		||||
        check_version("mixture-of-depth>=1.1.6", mandatory=True)
 | 
			
		||||
 | 
			
		||||
    if model_args.infer_backend == "vllm":
 | 
			
		||||
        check_version("vllm>=0.4.3,<=0.6.5")
 | 
			
		||||
        check_version("vllm>=0.4.3,<=0.7.2")
 | 
			
		||||
        check_version("vllm", mandatory=True)
 | 
			
		||||
 | 
			
		||||
    if finetuning_args.use_galore:
 | 
			
		||||
@ -162,19 +162,19 @@ def _check_extra_dependencies(
 | 
			
		||||
 | 
			
		||||
def _parse_train_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> _TRAIN_CLS:
 | 
			
		||||
    parser = HfArgumentParser(_TRAIN_ARGS)
 | 
			
		||||
    allow_extra_keys = os.getenv("ALLOW_EXTRA_ARGS", "0").lower() in ["true", "1"]
 | 
			
		||||
    allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS")
 | 
			
		||||
    return _parse_args(parser, args, allow_extra_keys=allow_extra_keys)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _parse_infer_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> _INFER_CLS:
 | 
			
		||||
    parser = HfArgumentParser(_INFER_ARGS)
 | 
			
		||||
    allow_extra_keys = os.getenv("ALLOW_EXTRA_ARGS", "0").lower() in ["true", "1"]
 | 
			
		||||
    allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS")
 | 
			
		||||
    return _parse_args(parser, args, allow_extra_keys=allow_extra_keys)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _parse_eval_args(args: Optional[Union[Dict[str, Any], List[str]]] = None) -> _EVAL_CLS:
 | 
			
		||||
    parser = HfArgumentParser(_EVAL_ARGS)
 | 
			
		||||
    allow_extra_keys = os.getenv("ALLOW_EXTRA_ARGS", "0").lower() in ["true", "1"]
 | 
			
		||||
    allow_extra_keys = is_env_enabled("ALLOW_EXTRA_ARGS")
 | 
			
		||||
    return _parse_args(parser, args, allow_extra_keys=allow_extra_keys)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -118,6 +118,6 @@ def configure_packing(model_args: "ModelArguments", is_trainable: bool) -> None:
 | 
			
		||||
    if not is_trainable or not model_args.block_diag_attn:
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    check_version("transformers>=4.43.0,<=4.48.2")
 | 
			
		||||
    check_version("transformers>=4.43.0")
 | 
			
		||||
    transformers.modeling_flash_attention_utils._get_unpad_data = get_unpad_data
 | 
			
		||||
    logger.info_rank0("Using block diagonal attention for sequence packing without cross-attention.")
 | 
			
		||||
 | 
			
		||||
@ -12,7 +12,6 @@
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
from types import MethodType
 | 
			
		||||
from typing import TYPE_CHECKING, Any, Dict
 | 
			
		||||
 | 
			
		||||
@ -23,7 +22,7 @@ from transformers.integrations import is_deepspeed_zero3_enabled
 | 
			
		||||
from transformers.modeling_utils import is_fsdp_enabled
 | 
			
		||||
 | 
			
		||||
from ..extras import logging
 | 
			
		||||
from ..extras.misc import infer_optim_dtype
 | 
			
		||||
from ..extras.misc import infer_optim_dtype, is_env_enabled
 | 
			
		||||
from ..extras.packages import is_transformers_version_greater_than
 | 
			
		||||
from .model_utils.attention import configure_attn_implementation, print_attn_implementation
 | 
			
		||||
from .model_utils.checkpointing import prepare_model_for_training
 | 
			
		||||
@ -102,8 +101,7 @@ def patch_config(
 | 
			
		||||
            model_args.compute_dtype = infer_optim_dtype(model_dtype=getattr(config, "torch_dtype", None))
 | 
			
		||||
 | 
			
		||||
    if is_torch_npu_available():
 | 
			
		||||
        use_jit_compile = os.environ.get("JIT_COMPILE", "0").lower() in ["true", "1"]
 | 
			
		||||
        torch.npu.set_compile_mode(jit_compile=use_jit_compile)
 | 
			
		||||
        torch.npu.set_compile_mode(jit_compile=is_env_enabled("JIT_COMPILE"))
 | 
			
		||||
 | 
			
		||||
    configure_attn_implementation(config, model_args, is_trainable)
 | 
			
		||||
    configure_rope(config, model_args, is_trainable)
 | 
			
		||||
 | 
			
		||||
@ -35,7 +35,7 @@ from typing_extensions import override
 | 
			
		||||
 | 
			
		||||
from ..extras import logging
 | 
			
		||||
from ..extras.constants import TRAINER_LOG, V_HEAD_SAFE_WEIGHTS_NAME, V_HEAD_WEIGHTS_NAME
 | 
			
		||||
from ..extras.misc import get_peak_memory, use_ray
 | 
			
		||||
from ..extras.misc import get_peak_memory, is_env_enabled, use_ray
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if is_safetensors_available():
 | 
			
		||||
@ -193,7 +193,7 @@ class LogCallback(TrainerCallback):
 | 
			
		||||
        self.aborted = False
 | 
			
		||||
        self.do_train = False
 | 
			
		||||
        # Web UI
 | 
			
		||||
        self.webui_mode = os.environ.get("LLAMABOARD_ENABLED", "0").lower() in ["true", "1"]
 | 
			
		||||
        self.webui_mode = is_env_enabled("LLAMABOARD_ENABLED")
 | 
			
		||||
        if self.webui_mode and not use_ray():
 | 
			
		||||
            signal.signal(signal.SIGABRT, self._set_abort)
 | 
			
		||||
            self.logger_handler = logging.LoggerHandler(os.environ.get("LLAMABOARD_WORKDIR"))
 | 
			
		||||
@ -299,7 +299,7 @@ class LogCallback(TrainerCallback):
 | 
			
		||||
            logs["throughput"] = round(state.num_input_tokens_seen / (time.time() - self.start_time), 2)
 | 
			
		||||
            logs["total_tokens"] = state.num_input_tokens_seen
 | 
			
		||||
 | 
			
		||||
        if os.environ.get("RECORD_VRAM", "0").lower() in ["true", "1"]:
 | 
			
		||||
        if is_env_enabled("RECORD_VRAM"):
 | 
			
		||||
            vram_allocated, vram_reserved = get_peak_memory()
 | 
			
		||||
            logs["vram_allocated"] = round(vram_allocated / (1024**3), 2)
 | 
			
		||||
            logs["vram_reserved"] = round(vram_reserved / (1024**3), 2)
 | 
			
		||||
 | 
			
		||||
@ -15,6 +15,7 @@
 | 
			
		||||
import os
 | 
			
		||||
import platform
 | 
			
		||||
 | 
			
		||||
from ..extras.misc import is_env_enabled
 | 
			
		||||
from ..extras.packages import is_gradio_available
 | 
			
		||||
from .common import save_config
 | 
			
		||||
from .components import (
 | 
			
		||||
@ -87,14 +88,14 @@ def create_web_demo() -> "gr.Blocks":
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def run_web_ui() -> None:
 | 
			
		||||
    gradio_ipv6 = os.getenv("GRADIO_IPV6", "0").lower() in ["true", "1"]
 | 
			
		||||
    gradio_share = os.getenv("GRADIO_SHARE", "0").lower() in ["true", "1"]
 | 
			
		||||
    gradio_ipv6 = is_env_enabled("GRADIO_IPV6")
 | 
			
		||||
    gradio_share = is_env_enabled("GRADIO_SHARE")
 | 
			
		||||
    server_name = os.getenv("GRADIO_SERVER_NAME", "[::]" if gradio_ipv6 else "0.0.0.0")
 | 
			
		||||
    create_ui().queue().launch(share=gradio_share, server_name=server_name, inbrowser=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def run_web_demo() -> None:
 | 
			
		||||
    gradio_ipv6 = os.getenv("GRADIO_IPV6", "0").lower() in ["true", "1"]
 | 
			
		||||
    gradio_share = os.getenv("GRADIO_SHARE", "0").lower() in ["true", "1"]
 | 
			
		||||
    gradio_ipv6 = is_env_enabled("GRADIO_IPV6")
 | 
			
		||||
    gradio_share = is_env_enabled("GRADIO_SHARE")
 | 
			
		||||
    server_name = os.getenv("GRADIO_SERVER_NAME", "[::]" if gradio_ipv6 else "0.0.0.0")
 | 
			
		||||
    create_web_demo().queue().launch(share=gradio_share, server_name=server_name, inbrowser=True)
 | 
			
		||||
 | 
			
		||||
@ -14,12 +14,13 @@
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
from llamafactory.extras.misc import is_env_enabled
 | 
			
		||||
from llamafactory.webui.interface import create_ui
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
    gradio_ipv6 = os.getenv("GRADIO_IPV6", "0").lower() in ["true", "1"]
 | 
			
		||||
    gradio_share = os.getenv("GRADIO_SHARE", "0").lower() in ["true", "1"]
 | 
			
		||||
    gradio_ipv6 = is_env_enabled("GRADIO_IPV6")
 | 
			
		||||
    gradio_share = is_env_enabled("GRADIO_SHARE")
 | 
			
		||||
    server_name = os.getenv("GRADIO_SERVER_NAME", "[::]" if gradio_ipv6 else "0.0.0.0")
 | 
			
		||||
    create_ui().queue().launch(share=gradio_share, server_name=server_name, inbrowser=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user