use pre-commit

Former-commit-id: 7cfede95df22a9ff236788f04159b6b16b8d04bb
This commit is contained in:
hiyouga
2024-10-29 09:07:46 +00:00
parent 625a884707
commit dbbfb5f5dc
85 changed files with 1047 additions and 1063 deletions

View File

@@ -4999,4 +4999,4 @@
"input": "Time waits for no one.",
"output": "No one can stop time from moving forward."
}
]
]

View File

@@ -4999,4 +4999,4 @@
"input": "",
"output": "安第斯山脉位于南美洲横跨七个国家包括委内瑞拉哥伦比亚厄瓜多尔秘鲁玻利维亚智利和阿根廷。安第斯山脉是世界上最长的山脉之一全长约7,000千米4,350英里其山脉沿着南美洲西海岸蜿蜒延伸平均海拔约为4,000米13,000英尺。在其南部安第斯山脉宽度达到700千米430英里在其北部宽度约为500千米310英里。"
}
]
]

View File

@@ -17,9 +17,9 @@ _CITATION = """\
}
"""
_HOMEPAGE = "{}/datasets/BelleGroup/multiturn_chat_0.8M".format(_HF_ENDPOINT)
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M"
_LICENSE = "gpl-3.0"
_URL = "{}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json".format(_HF_ENDPOINT)
_URL = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
class BelleMultiturn(datasets.GeneratorBasedBuilder):
@@ -38,7 +38,7 @@ class BelleMultiturn(datasets.GeneratorBasedBuilder):
return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
def _generate_examples(self, filepath: str):
with open(filepath, "r", encoding="utf-8") as f:
with open(filepath, encoding="utf-8") as f:
for key, row in enumerate(f):
data = json.loads(row)
conversations = []

File diff suppressed because one or more lines are too long

View File

@@ -625,4 +625,4 @@
},
"folder": "python"
}
}
}

View File

@@ -5055,4 +5055,4 @@
"value": "C. 参与讨论"
}
}
]
]

View File

@@ -9155,4 +9155,4 @@
],
"tools": "[]"
}
]
]

View File

@@ -9019,4 +9019,4 @@
],
"tools": "[]"
}
]
]

View File

@@ -8,9 +8,9 @@ import datasets
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
_DESCRIPTION = "Human preference data about helpfulness and harmlessness."
_CITATION = ""
_HOMEPAGE = "{}/datasets/Anthropic/hh-rlhf".format(_HF_ENDPOINT)
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf"
_LICENSE = "mit"
_URL = "{}/datasets/Anthropic/hh-rlhf/resolve/main/".format(_HF_ENDPOINT)
_URL = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf/resolve/main/"
_URLS = {
"train": [
_URL + "harmless-base/train.jsonl.gz",
@@ -53,7 +53,7 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
def _generate_examples(self, filepaths: List[str]):
key = 0
for filepath in filepaths:
with open(filepath, "r", encoding="utf-8") as f:
with open(filepath, encoding="utf-8") as f:
for row in f:
data = json.loads(row)
chosen = data["chosen"]

View File

@@ -454,4 +454,4 @@
"input": "",
"output": "抱歉,我不是 OpenAI 开发的 ChatGPT我是 {{author}} 开发的 {{name}},旨在为用户提供智能化的回答和帮助。"
}
]
]

View File

@@ -5395,4 +5395,4 @@
],
"label": false
}
]
]

View File

@@ -137,4 +137,4 @@
"mllm_demo_data/3.jpg"
]
}
]
]

View File

@@ -44,4 +44,4 @@
"mllm_demo_data/3.mp4"
]
}
]
]

View File

@@ -20,9 +20,9 @@ _CITATION = """\
}
"""
_HOMEPAGE = "{}/datasets/stingning/ultrachat".format(_HF_ENDPOINT)
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
_LICENSE = "cc-by-nc-4.0"
_BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl".format(_HF_ENDPOINT)
_BASE_DATA_URL = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl"
class UltraChat(datasets.GeneratorBasedBuilder):
@@ -42,7 +42,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
def _generate_examples(self, filepaths: List[str]):
for filepath in filepaths:
with open(filepath, "r", encoding="utf-8") as f:
with open(filepath, encoding="utf-8") as f:
for row in f:
try:
data = json.loads(row)

File diff suppressed because one or more lines are too long