[inference] fix stop token for object detection (#6624)

* fix stop token

* update minicpm data pipeline

* fix npu qlora examples

Former-commit-id: e3e2c8c689
This commit is contained in:
hoshi-hiyouga
2025-01-13 21:34:20 +08:00
committed by GitHub
parent 089c7d5e51
commit d8cba9464f
15 changed files with 101 additions and 45 deletions

View File

@@ -120,6 +120,12 @@ def test_jinja_template(use_fast: bool):
assert tokenizer.apply_chat_template(MESSAGES) == ref_tokenizer.apply_chat_template(MESSAGES)
def test_get_stop_token_ids():
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
assert set(template.get_stop_token_ids(tokenizer)) == {128008, 128009}
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
@pytest.mark.parametrize("use_fast", [True, False])
def test_gemma_template(use_fast: bool):