Merge branch 'hiyouga:main' into pixtral-patch

Former-commit-id: 0d3106e9fad565fbe56b8de57dd6ea373944eb99
2025-11-09 14:55:40 +08:00 · 2024-10-23 15:30:03 +08:00 · 2024-10-23 15:30:03 +08:00 · a7a5a5671f
commit a7a5a5671f
parent 00545ebbe5 bdb77bc85a
4 changed files with 5 additions and 4 deletions
--- a/assets/wechat.jpg
+++ b/assets/wechat.jpg
--- a/assets/wechat_npu.jpg
+++ b/assets/wechat_npu.jpg
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 from collections import OrderedDict, defaultdict
 from enum import Enum
 from typing import Dict, Optional
@ -47,7 +48,7 @@ FILEEXT2TYPE = {
 IGNORE_INDEX = -100
-IMAGE_PLACEHOLDER = "<image>"
+IMAGE_PLACEHOLDER = os.environ.get("IMAGE_PLACEHOLDER", "<image>")
 LAYERNORM_NAMES = {"norm", "ln"}
@ -95,7 +96,7 @@ SUPPORTED_CLASS_FOR_BLOCK_DIAG_ATTN = {
 SUPPORTED_CLASS_FOR_S2ATTN = {"llama"}
-VIDEO_PLACEHOLDER = "<video>"
+VIDEO_PLACEHOLDER = os.environ.get("VIDEO_PLACEHOLDER", "<video>")
 V_HEAD_WEIGHTS_NAME = "value_head.bin"
--- a/src/llamafactory/train/test_utils.py
+++ b/src/llamafactory/train/test_utils.py
@ -37,9 +37,9 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module", diff_k
    assert set(state_dict_a.keys()) == set(state_dict_b.keys())
    for name in state_dict_a.keys():
        if any(key in name for key in diff_keys):
-            assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-3, atol=1e-4) is False
+            assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-2, atol=1e-3) is False
        else:
-            assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-3, atol=1e-4) is True
+            assert torch.allclose(state_dict_a[name], state_dict_b[name], rtol=1e-2, atol=1e-3) is True
 def check_lora_model(model: "LoraModel") -> Tuple[Set[str], Set[str]]: