From 044668af1027483af1cb3692a557734fc15573b2 Mon Sep 17 00:00:00 2001 From: BUAADreamer <1428195643@qq.com> Date: Fri, 26 Apr 2024 23:18:58 +0800 Subject: [PATCH] add llava_150k en/zh mllm sft data Former-commit-id: a17787201082951ae39c3c10436be4c16346f16a --- data/dataset_info.json | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/data/dataset_info.json b/data/dataset_info.json index d053be1d..18c4699a 100644 --- a/data/dataset_info.json +++ b/data/dataset_info.json @@ -73,6 +73,36 @@ "assistant_tag": "assistant" } }, + "llava_150k_en": { + "hf_hub_url": "BUAADreamer/llava-en-zh-300k", + "subset": "en", + "formatting": "sharegpt", + "columns": { + "messages": "messages", + "images": "images" + }, + "tags": { + "role_tag": "role", + "content_tag": "content", + "user_tag": "user", + "assistant_tag": "assistant" + } + }, + "llava_150k_zh": { + "hf_hub_url": "BUAADreamer/llava-en-zh-300k", + "subset": "zh", + "formatting": "sharegpt", + "columns": { + "messages": "messages", + "images": "images" + }, + "tags": { + "role_tag": "role", + "content_tag": "content", + "user_tag": "user", + "assistant_tag": "assistant" + } + }, "example": { "script_url": "example_dataset", "columns": {