mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-15 19:30:36 +08:00
use pre-commit
Former-commit-id: 7cfede95df22a9ff236788f04159b6b16b8d04bb
This commit is contained in:
@@ -4999,4 +4999,4 @@
|
||||
"input": "Time waits for no one.",
|
||||
"output": "No one can stop time from moving forward."
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
@@ -4999,4 +4999,4 @@
|
||||
"input": "",
|
||||
"output": "安第斯山脉位于南美洲,横跨七个国家,包括委内瑞拉,哥伦比亚,厄瓜多尔,秘鲁,玻利维亚,智利和阿根廷。安第斯山脉是世界上最长的山脉之一,全长约7,000千米(4,350英里),其山脉沿着南美洲西海岸蜿蜒延伸,平均海拔约为4,000米(13,000英尺)。在其南部,安第斯山脉宽度达到700千米(430英里),在其北部宽度约为500千米(310英里)。"
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
@@ -17,9 +17,9 @@ _CITATION = """\
|
||||
}
|
||||
"""
|
||||
|
||||
_HOMEPAGE = "{}/datasets/BelleGroup/multiturn_chat_0.8M".format(_HF_ENDPOINT)
|
||||
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M"
|
||||
_LICENSE = "gpl-3.0"
|
||||
_URL = "{}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json".format(_HF_ENDPOINT)
|
||||
_URL = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
|
||||
|
||||
|
||||
class BelleMultiturn(datasets.GeneratorBasedBuilder):
|
||||
@@ -38,7 +38,7 @@ class BelleMultiturn(datasets.GeneratorBasedBuilder):
|
||||
return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
|
||||
|
||||
def _generate_examples(self, filepath: str):
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
with open(filepath, encoding="utf-8") as f:
|
||||
for key, row in enumerate(f):
|
||||
data = json.loads(row)
|
||||
conversations = []
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -625,4 +625,4 @@
|
||||
},
|
||||
"folder": "python"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5055,4 +5055,4 @@
|
||||
"value": "C. 参与讨论"
|
||||
}
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
@@ -9155,4 +9155,4 @@
|
||||
],
|
||||
"tools": "[]"
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
@@ -9019,4 +9019,4 @@
|
||||
],
|
||||
"tools": "[]"
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
@@ -8,9 +8,9 @@ import datasets
|
||||
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
|
||||
_DESCRIPTION = "Human preference data about helpfulness and harmlessness."
|
||||
_CITATION = ""
|
||||
_HOMEPAGE = "{}/datasets/Anthropic/hh-rlhf".format(_HF_ENDPOINT)
|
||||
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf"
|
||||
_LICENSE = "mit"
|
||||
_URL = "{}/datasets/Anthropic/hh-rlhf/resolve/main/".format(_HF_ENDPOINT)
|
||||
_URL = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf/resolve/main/"
|
||||
_URLS = {
|
||||
"train": [
|
||||
_URL + "harmless-base/train.jsonl.gz",
|
||||
@@ -53,7 +53,7 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
|
||||
def _generate_examples(self, filepaths: List[str]):
|
||||
key = 0
|
||||
for filepath in filepaths:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
with open(filepath, encoding="utf-8") as f:
|
||||
for row in f:
|
||||
data = json.loads(row)
|
||||
chosen = data["chosen"]
|
||||
|
||||
@@ -454,4 +454,4 @@
|
||||
"input": "",
|
||||
"output": "抱歉,我不是 OpenAI 开发的 ChatGPT,我是 {{author}} 开发的 {{name}},旨在为用户提供智能化的回答和帮助。"
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
@@ -5395,4 +5395,4 @@
|
||||
],
|
||||
"label": false
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
@@ -137,4 +137,4 @@
|
||||
"mllm_demo_data/3.jpg"
|
||||
]
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
@@ -44,4 +44,4 @@
|
||||
"mllm_demo_data/3.mp4"
|
||||
]
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
@@ -20,9 +20,9 @@ _CITATION = """\
|
||||
}
|
||||
"""
|
||||
|
||||
_HOMEPAGE = "{}/datasets/stingning/ultrachat".format(_HF_ENDPOINT)
|
||||
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
|
||||
_LICENSE = "cc-by-nc-4.0"
|
||||
_BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl".format(_HF_ENDPOINT)
|
||||
_BASE_DATA_URL = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl"
|
||||
|
||||
|
||||
class UltraChat(datasets.GeneratorBasedBuilder):
|
||||
@@ -42,7 +42,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
|
||||
|
||||
def _generate_examples(self, filepaths: List[str]):
|
||||
for filepath in filepaths:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
with open(filepath, encoding="utf-8") as f:
|
||||
for row in f:
|
||||
try:
|
||||
data = json.loads(row)
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user