use pre-commit

Former-commit-id: 21db8ed2f4
This commit is contained in:
hiyouga
2024-10-29 09:07:46 +00:00
parent 163cf2ba5c
commit 0d8aa6e6ef
86 changed files with 1048 additions and 1064 deletions

View File

@@ -4999,4 +4999,4 @@
"input": "Time waits for no one.",
"output": "No one can stop time from moving forward."
}
]
]

View File

@@ -4999,4 +4999,4 @@
"input": "",
"output": "安第斯山脉位于南美洲横跨七个国家包括委内瑞拉哥伦比亚厄瓜多尔秘鲁玻利维亚智利和阿根廷。安第斯山脉是世界上最长的山脉之一全长约7,000千米4,350英里其山脉沿着南美洲西海岸蜿蜒延伸平均海拔约为4,000米13,000英尺。在其南部安第斯山脉宽度达到700千米430英里在其北部宽度约为500千米310英里。"
}
]
]

View File

@@ -17,9 +17,9 @@ _CITATION = """\
}
"""
_HOMEPAGE = "{}/datasets/BelleGroup/multiturn_chat_0.8M".format(_HF_ENDPOINT)
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M"
_LICENSE = "gpl-3.0"
_URL = "{}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json".format(_HF_ENDPOINT)
_URL = f"{_HF_ENDPOINT}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
class BelleMultiturn(datasets.GeneratorBasedBuilder):
@@ -38,7 +38,7 @@ class BelleMultiturn(datasets.GeneratorBasedBuilder):
return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
def _generate_examples(self, filepath: str):
with open(filepath, "r", encoding="utf-8") as f:
with open(filepath, encoding="utf-8") as f:
for key, row in enumerate(f):
data = json.loads(row)
conversations = []

File diff suppressed because one or more lines are too long

View File

@@ -625,4 +625,4 @@
},
"folder": "python"
}
}
}

View File

@@ -7223,4 +7223,4 @@
"value": "Abstraction is a principle in object-oriented programming that refers to the process of focusing on the essential features of an object or concept without emphasizing the details that are not important for its functionality. It enables programmers to create classes that represent the objects they want to work with, and specifies only the behavior of those objects, leaving out unnecessary details. Abstraction helps to make the code more maintainable, modular, and scalable. It also improves the productivity of developers by reducing the amount of code they need to write."
}
}
]
]

View File

@@ -5055,4 +5055,4 @@
"value": "C. 参与讨论"
}
}
]
]

View File

@@ -9155,4 +9155,4 @@
],
"tools": "[]"
}
]
]

View File

@@ -9019,4 +9019,4 @@
],
"tools": "[]"
}
]
]

View File

@@ -8,9 +8,9 @@ import datasets
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
_DESCRIPTION = "Human preference data about helpfulness and harmlessness."
_CITATION = ""
_HOMEPAGE = "{}/datasets/Anthropic/hh-rlhf".format(_HF_ENDPOINT)
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf"
_LICENSE = "mit"
_URL = "{}/datasets/Anthropic/hh-rlhf/resolve/main/".format(_HF_ENDPOINT)
_URL = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf/resolve/main/"
_URLS = {
"train": [
_URL + "harmless-base/train.jsonl.gz",
@@ -53,7 +53,7 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
def _generate_examples(self, filepaths: List[str]):
key = 0
for filepath in filepaths:
with open(filepath, "r", encoding="utf-8") as f:
with open(filepath, encoding="utf-8") as f:
for row in f:
data = json.loads(row)
chosen = data["chosen"]

View File

@@ -454,4 +454,4 @@
"input": "",
"output": "抱歉,我不是 OpenAI 开发的 ChatGPT我是 {{author}} 开发的 {{name}},旨在为用户提供智能化的回答和帮助。"
}
]
]

View File

@@ -5395,4 +5395,4 @@
],
"label": false
}
]
]

View File

@@ -137,4 +137,4 @@
"mllm_demo_data/3.jpg"
]
}
]
]

View File

@@ -44,4 +44,4 @@
"mllm_demo_data/3.mp4"
]
}
]
]

View File

@@ -20,9 +20,9 @@ _CITATION = """\
}
"""
_HOMEPAGE = "{}/datasets/stingning/ultrachat".format(_HF_ENDPOINT)
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
_LICENSE = "cc-by-nc-4.0"
_BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl".format(_HF_ENDPOINT)
_BASE_DATA_URL = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl"
class UltraChat(datasets.GeneratorBasedBuilder):
@@ -42,7 +42,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
def _generate_examples(self, filepaths: List[str]):
for filepath in filepaths:
with open(filepath, "r", encoding="utf-8") as f:
with open(filepath, encoding="utf-8") as f:
for row in f:
try:
data = json.loads(row)

File diff suppressed because one or more lines are too long