add orca_dpo_pairs dataset

Former-commit-id: 3271af2afc
This commit is contained in:
hiyouga
2024-03-20 20:09:06 +08:00
parent e8cf2794cd
commit 6646e18c02
5 changed files with 29 additions and 12 deletions

View File

@@ -2,6 +2,7 @@ import os
import json
import datasets
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
_DESCRIPTION = "BELLE multiturn chat dataset."
@@ -15,9 +16,9 @@ _CITATION = """\
}
"""
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M"
_HOMEPAGE = "{}/datasets/BelleGroup/multiturn_chat_0.8M".format(_HF_ENDPOINT)
_LICENSE = "gpl-3.0"
_URL = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
_URL = "{}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json".format(_HF_ENDPOINT)
class BelleMultiturn(datasets.GeneratorBasedBuilder):