diff --git a/requirements.txt b/requirements.txt index 93e83530..0cc71ae4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ transformers>=4.41.2,<=4.43.4 -datasets>=2.16.0,<=2.20.0 -accelerate>=0.30.1,<=0.32.0 +datasets>=2.16.0,<=2.21.0 +accelerate>=0.30.1,<=0.33.0 peft>=0.11.1,<=0.12.0 trl>=0.8.6,<=0.9.6 gradio>=4.0.0 diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index 7b602a92..5cd86134 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -21,8 +21,8 @@ Level: Dependency graph: main: transformers>=4.41.2,<=4.43.4 - datasets>=2.16.0,<=2.20.0 - accelerate>=0.30.1,<=0.32.0 + datasets>=2.16.0,<=2.21.0 + accelerate>=0.30.1,<=0.33.0 peft>=0.11.1,<=0.12.0 trl>=0.8.6,<=0.9.6 attention: diff --git a/src/llamafactory/extras/misc.py b/src/llamafactory/extras/misc.py index 5f06a900..8908b807 100644 --- a/src/llamafactory/extras/misc.py +++ b/src/llamafactory/extras/misc.py @@ -80,8 +80,8 @@ def check_dependencies() -> None: logger.warning("Version checking has been disabled, may lead to unexpected behaviors.") else: require_version("transformers>=4.41.2,<=4.43.4", "To fix: pip install transformers>=4.41.2,<=4.43.4") - require_version("datasets>=2.16.0,<=2.20.0", "To fix: pip install datasets>=2.16.0,<=2.20.0") - require_version("accelerate>=0.30.1,<=0.32.0", "To fix: pip install accelerate>=0.30.1,<=0.32.0") + require_version("datasets>=2.16.0,<=2.21.0", "To fix: pip install datasets>=2.16.0,<=2.21.0") + require_version("accelerate>=0.30.1,<=0.33.0", "To fix: pip install accelerate>=0.30.1,<=0.33.0") require_version("peft>=0.11.1,<=0.12.0", "To fix: pip install peft>=0.11.1,<=0.12.0") require_version("trl>=0.8.6,<=0.9.6", "To fix: pip install trl>=0.8.6,<=0.9.6") diff --git a/src/llamafactory/train/callbacks.py b/src/llamafactory/train/callbacks.py index c9612e6e..94e1541c 100644 --- a/src/llamafactory/train/callbacks.py +++ b/src/llamafactory/train/callbacks.py @@ -75,8 +75,7 @@ def fix_valuehead_checkpoint( state_dict: Dict[str, torch.Tensor] = torch.load(path_to_checkpoint, map_location="cpu") os.remove(path_to_checkpoint) - decoder_state_dict = {} - v_head_state_dict = {} + decoder_state_dict, v_head_state_dict = {}, {} for name, param in state_dict.items(): if name.startswith("v_head."): v_head_state_dict[name] = param