[misc] support export ollama modelfile (#6899)

* support export ollama modelfile

* update config

* add system and num ctx

Former-commit-id: 9184a6e0ed
This commit is contained in:
hoshi-hiyouga
2025-02-11 19:52:25 +08:00
committed by GitHub
parent 2e954d8fd2
commit c6be9e242c
14 changed files with 126 additions and 224 deletions

View File

@@ -119,6 +119,22 @@ def test_jinja_template(use_fast: bool):
assert tokenizer.apply_chat_template(MESSAGES) == ref_tokenizer.apply_chat_template(MESSAGES)
def test_ollama_modelfile():
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
assert template.get_ollama_modelfile(tokenizer) == (
"FROM .\n\n"
'TEMPLATE """<|begin_of_text|>'
"{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}"
'{{ range .Messages }}{{ if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Content }}'
"<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
'{{ else if eq .Role "assistant" }}{{ .Content }}<|eot_id|>{{ end }}{{ end }}"""\n\n'
'PARAMETER stop "<|eom_id|>"\n'
'PARAMETER stop "<|eot_id|>"\n'
"PARAMETER num_ctx 4096\n"
)
def test_get_stop_token_ids():
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))