mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-16 00:28:10 +08:00
[version] support transformers 449 (#6982)
* support transformers 449 * fix mm plugin Former-commit-id: e9118a9df0839d24f6ddff5a0b55ef101a1d3d22
This commit is contained in:
parent
be33ef67fb
commit
1d675a287d
@ -390,7 +390,7 @@ huggingface-cli login
|
|||||||
| ------------ | ------- | --------- |
|
| ------------ | ------- | --------- |
|
||||||
| python | 3.9 | 3.10 |
|
| python | 3.9 | 3.10 |
|
||||||
| torch | 1.13.1 | 2.4.0 |
|
| torch | 1.13.1 | 2.4.0 |
|
||||||
| transformers | 4.41.2 | 4.45.2 |
|
| transformers | 4.41.2 | 4.49.0 |
|
||||||
| datasets | 2.16.0 | 3.2.0 |
|
| datasets | 2.16.0 | 3.2.0 |
|
||||||
| accelerate | 0.34.0 | 1.2.1 |
|
| accelerate | 0.34.0 | 1.2.1 |
|
||||||
| peft | 0.11.1 | 0.12.0 |
|
| peft | 0.11.1 | 0.12.0 |
|
||||||
@ -399,9 +399,9 @@ huggingface-cli login
|
|||||||
| Optional | Minimum | Recommend |
|
| Optional | Minimum | Recommend |
|
||||||
| ------------ | ------- | --------- |
|
| ------------ | ------- | --------- |
|
||||||
| CUDA | 11.6 | 12.2 |
|
| CUDA | 11.6 | 12.2 |
|
||||||
| deepspeed | 0.10.0 | 0.14.0 |
|
| deepspeed | 0.10.0 | 0.16.2 |
|
||||||
| bitsandbytes | 0.39.0 | 0.43.1 |
|
| bitsandbytes | 0.39.0 | 0.43.1 |
|
||||||
| vllm | 0.4.3 | 0.6.6 |
|
| vllm | 0.4.3 | 0.7.2 |
|
||||||
| flash-attn | 2.3.0 | 2.7.2 |
|
| flash-attn | 2.3.0 | 2.7.2 |
|
||||||
|
|
||||||
### Hardware Requirement
|
### Hardware Requirement
|
||||||
|
@ -392,7 +392,7 @@ huggingface-cli login
|
|||||||
| ------------ | ------- | --------- |
|
| ------------ | ------- | --------- |
|
||||||
| python | 3.9 | 3.10 |
|
| python | 3.9 | 3.10 |
|
||||||
| torch | 1.13.1 | 2.4.0 |
|
| torch | 1.13.1 | 2.4.0 |
|
||||||
| transformers | 4.41.2 | 4.45.2 |
|
| transformers | 4.41.2 | 4.49.0 |
|
||||||
| datasets | 2.16.0 | 3.2.0 |
|
| datasets | 2.16.0 | 3.2.0 |
|
||||||
| accelerate | 0.34.0 | 1.2.1 |
|
| accelerate | 0.34.0 | 1.2.1 |
|
||||||
| peft | 0.11.1 | 0.12.0 |
|
| peft | 0.11.1 | 0.12.0 |
|
||||||
@ -401,9 +401,9 @@ huggingface-cli login
|
|||||||
| 可选项 | 至少 | 推荐 |
|
| 可选项 | 至少 | 推荐 |
|
||||||
| ------------ | ------- | --------- |
|
| ------------ | ------- | --------- |
|
||||||
| CUDA | 11.6 | 12.2 |
|
| CUDA | 11.6 | 12.2 |
|
||||||
| deepspeed | 0.10.0 | 0.14.0 |
|
| deepspeed | 0.10.0 | 0.16.2 |
|
||||||
| bitsandbytes | 0.39.0 | 0.43.1 |
|
| bitsandbytes | 0.39.0 | 0.43.1 |
|
||||||
| vllm | 0.4.3 | 0.6.6 |
|
| vllm | 0.4.3 | 0.7.2 |
|
||||||
| flash-attn | 2.3.0 | 2.7.2 |
|
| flash-attn | 2.3.0 | 2.7.2 |
|
||||||
|
|
||||||
### 硬件依赖
|
### 硬件依赖
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
transformers>=4.41.2,<=4.48.3,!=4.46.*,!=4.47.*,!=4.48.0,!=4.48.1,!=4.48.2;python_version<'3.10'
|
transformers>=4.41.2,<=4.49.0,!=4.46.*,!=4.47.*,!=4.48.*;python_version<'3.10'
|
||||||
transformers>=4.41.2,<=4.48.3,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10'
|
transformers>=4.41.2,<=4.49.0,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10'
|
||||||
datasets>=2.16.0,<=3.2.0
|
datasets>=2.16.0,<=3.2.0
|
||||||
accelerate>=0.34.0,<=1.2.1
|
accelerate>=0.34.0,<=1.2.1
|
||||||
peft>=0.11.1,<=0.12.0
|
peft>=0.11.1,<=0.12.0
|
||||||
|
@ -20,7 +20,7 @@ Level:
|
|||||||
|
|
||||||
Dependency graph:
|
Dependency graph:
|
||||||
main:
|
main:
|
||||||
transformers>=4.41.2,<=4.48.3,!=4.46.*,!=4.47.*,!=4.48.0
|
transformers>=4.41.2,<=4.49.0,!=4.46.*,!=4.47.*,!=4.48.0
|
||||||
datasets>=2.16.0,<=3.2.0
|
datasets>=2.16.0,<=3.2.0
|
||||||
accelerate>=0.34.0,<=1.2.1
|
accelerate>=0.34.0,<=1.2.1
|
||||||
peft>=0.11.1,<=0.12.0
|
peft>=0.11.1,<=0.12.0
|
||||||
|
@ -187,8 +187,6 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
|
|||||||
mm_inputs["cross_attention_mask"] = F.pad(cross_attention_mask, (0, 0, 0, 0, 0, seq_len - orig_len))
|
mm_inputs["cross_attention_mask"] = F.pad(cross_attention_mask, (0, 0, 0, 0, 0, seq_len - orig_len))
|
||||||
|
|
||||||
features.update(mm_inputs)
|
features.update(mm_inputs)
|
||||||
if isinstance(features.get("pixel_values"), list): # for pixtral inputs
|
|
||||||
features = features.data # use default_collate() instead of BatchEncoding.to()
|
|
||||||
|
|
||||||
if "image_bound" in features: # for minicpmv inputs
|
if "image_bound" in features: # for minicpmv inputs
|
||||||
bsz, seq_length = features["input_ids"].shape
|
bsz, seq_length = features["input_ids"].shape
|
||||||
|
@ -380,10 +380,8 @@ class LlavaNextPlugin(BasePlugin):
|
|||||||
num_image_tokens = 0
|
num_image_tokens = 0
|
||||||
messages = deepcopy(messages)
|
messages = deepcopy(messages)
|
||||||
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
|
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
|
||||||
if "image_sizes" in mm_inputs:
|
|
||||||
image_sizes = iter(mm_inputs["image_sizes"])
|
|
||||||
|
|
||||||
if "pixel_values" in mm_inputs:
|
if "pixel_values" in mm_inputs:
|
||||||
|
image_sizes = iter(mm_inputs["image_sizes"].tolist())
|
||||||
height, width = get_image_size(to_numpy_array(mm_inputs["pixel_values"][0][0]))
|
height, width = get_image_size(to_numpy_array(mm_inputs["pixel_values"][0][0]))
|
||||||
|
|
||||||
for message in messages:
|
for message in messages:
|
||||||
@ -439,7 +437,7 @@ class LlavaNextVideoPlugin(BasePlugin):
|
|||||||
messages = deepcopy(messages)
|
messages = deepcopy(messages)
|
||||||
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
|
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
|
||||||
if "pixel_values" in mm_inputs:
|
if "pixel_values" in mm_inputs:
|
||||||
image_sizes = iter(mm_inputs["image_sizes"])
|
image_sizes = iter(mm_inputs["image_sizes"].tolist())
|
||||||
height, width = get_image_size(to_numpy_array(mm_inputs["pixel_values"][0][0]))
|
height, width = get_image_size(to_numpy_array(mm_inputs["pixel_values"][0][0]))
|
||||||
for message in messages:
|
for message in messages:
|
||||||
content = message["content"]
|
content = message["content"]
|
||||||
@ -916,16 +914,14 @@ class PixtralPlugin(BasePlugin):
|
|||||||
num_image_tokens = 0
|
num_image_tokens = 0
|
||||||
messages = deepcopy(messages)
|
messages = deepcopy(messages)
|
||||||
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
|
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
|
||||||
image_input_sizes = mm_inputs.get("image_sizes", None)
|
if "pixel_values" in mm_inputs:
|
||||||
|
image_sizes = iter(mm_inputs["image_sizes"].tolist())
|
||||||
|
|
||||||
for message in messages:
|
for message in messages:
|
||||||
content = message["content"]
|
content = message["content"]
|
||||||
while IMAGE_PLACEHOLDER in content:
|
while IMAGE_PLACEHOLDER in content:
|
||||||
if image_input_sizes is None:
|
|
||||||
raise ValueError("Cannot get image input sizes.")
|
|
||||||
|
|
||||||
if self.expand_mm_tokens:
|
if self.expand_mm_tokens:
|
||||||
image_size = image_input_sizes[0][num_image_tokens]
|
height, width = next(image_sizes)
|
||||||
height, width = image_size
|
|
||||||
num_height_tokens = height // patch_size
|
num_height_tokens = height // patch_size
|
||||||
num_width_tokens = width // patch_size
|
num_width_tokens = width // patch_size
|
||||||
replace_tokens = [[image_token] * num_width_tokens + [image_break_token]] * num_height_tokens
|
replace_tokens = [[image_token] * num_width_tokens + [image_break_token]] * num_height_tokens
|
||||||
@ -959,9 +955,6 @@ class PixtralPlugin(BasePlugin):
|
|||||||
) -> Dict[str, Union[List[int], "torch.Tensor"]]:
|
) -> Dict[str, Union[List[int], "torch.Tensor"]]:
|
||||||
self._validate_input(images, videos, audios)
|
self._validate_input(images, videos, audios)
|
||||||
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
|
mm_inputs = self._get_mm_inputs(images, videos, audios, processor)
|
||||||
if mm_inputs.get("pixel_values"):
|
|
||||||
mm_inputs["pixel_values"] = mm_inputs["pixel_values"][0]
|
|
||||||
|
|
||||||
mm_inputs.pop("image_sizes", None)
|
mm_inputs.pop("image_sizes", None)
|
||||||
return mm_inputs
|
return mm_inputs
|
||||||
|
|
||||||
|
@ -94,7 +94,7 @@ def check_dependencies() -> None:
|
|||||||
r"""
|
r"""
|
||||||
Checks the version of the required packages.
|
Checks the version of the required packages.
|
||||||
"""
|
"""
|
||||||
check_version("transformers>=4.41.2,<=4.48.3,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
|
check_version("transformers>=4.41.2,<=4.49.0,!=4.46.0,!=4.46.1,!=4.46.2,!=4.46.3,!=4.47.0,!=4.47.1,!=4.48.0")
|
||||||
check_version("datasets>=2.16.0,<=3.2.0")
|
check_version("datasets>=2.16.0,<=3.2.0")
|
||||||
check_version("accelerate>=0.34.0,<=1.2.1")
|
check_version("accelerate>=0.34.0,<=1.2.1")
|
||||||
check_version("peft>=0.11.1,<=0.12.0")
|
check_version("peft>=0.11.1,<=0.12.0")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user