mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-14 10:56:56 +08:00
166
evaluation/ceval/ceval.py
Normal file
166
evaluation/ceval/ceval.py
Normal file
@@ -0,0 +1,166 @@
|
||||
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
|
||||
import datasets
|
||||
import pandas as pd
|
||||
|
||||
|
||||
_CITATION = """\
|
||||
@article{huang2023ceval,
|
||||
title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},
|
||||
author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},
|
||||
journal={arXiv preprint arXiv:2305.08322},
|
||||
year={2023}
|
||||
}
|
||||
"""
|
||||
|
||||
_DESCRIPTION = """\
|
||||
C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
|
||||
"""
|
||||
|
||||
_HOMEPAGE = "https://cevalbenchmark.com"
|
||||
|
||||
_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
|
||||
|
||||
_URL = "ceval.zip"
|
||||
|
||||
task_list = [
|
||||
"computer_network",
|
||||
"operating_system",
|
||||
"computer_architecture",
|
||||
"college_programming",
|
||||
"college_physics",
|
||||
"college_chemistry",
|
||||
"advanced_mathematics",
|
||||
"probability_and_statistics",
|
||||
"discrete_mathematics",
|
||||
"electrical_engineer",
|
||||
"metrology_engineer",
|
||||
"high_school_mathematics",
|
||||
"high_school_physics",
|
||||
"high_school_chemistry",
|
||||
"high_school_biology",
|
||||
"middle_school_mathematics",
|
||||
"middle_school_biology",
|
||||
"middle_school_physics",
|
||||
"middle_school_chemistry",
|
||||
"veterinary_medicine",
|
||||
"college_economics",
|
||||
"business_administration",
|
||||
"marxism",
|
||||
"mao_zedong_thought",
|
||||
"education_science",
|
||||
"teacher_qualification",
|
||||
"high_school_politics",
|
||||
"high_school_geography",
|
||||
"middle_school_politics",
|
||||
"middle_school_geography",
|
||||
"modern_chinese_history",
|
||||
"ideological_and_moral_cultivation",
|
||||
"logic",
|
||||
"law",
|
||||
"chinese_language_and_literature",
|
||||
"art_studies",
|
||||
"professional_tour_guide",
|
||||
"legal_professional",
|
||||
"high_school_chinese",
|
||||
"high_school_history",
|
||||
"middle_school_history",
|
||||
"civil_servant",
|
||||
"sports_science",
|
||||
"plant_protection",
|
||||
"basic_medicine",
|
||||
"clinical_medicine",
|
||||
"urban_and_rural_planner",
|
||||
"accountant",
|
||||
"fire_engineer",
|
||||
"environmental_impact_assessment_engineer",
|
||||
"tax_accountant",
|
||||
"physician",
|
||||
]
|
||||
|
||||
|
||||
class CevalExamConfig(datasets.BuilderConfig):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
|
||||
|
||||
|
||||
class CevalExam(datasets.GeneratorBasedBuilder):
|
||||
BUILDER_CONFIGS = [
|
||||
CevalExamConfig(
|
||||
name=task_name,
|
||||
)
|
||||
for task_name in task_list
|
||||
]
|
||||
|
||||
def _info(self):
|
||||
features = datasets.Features(
|
||||
{
|
||||
"id": datasets.Value("int32"),
|
||||
"question": datasets.Value("string"),
|
||||
"A": datasets.Value("string"),
|
||||
"B": datasets.Value("string"),
|
||||
"C": datasets.Value("string"),
|
||||
"D": datasets.Value("string"),
|
||||
"answer": datasets.Value("string"),
|
||||
"explanation": datasets.Value("string"),
|
||||
}
|
||||
)
|
||||
return datasets.DatasetInfo(
|
||||
description=_DESCRIPTION,
|
||||
features=features,
|
||||
homepage=_HOMEPAGE,
|
||||
license=_LICENSE,
|
||||
citation=_CITATION,
|
||||
)
|
||||
|
||||
def _split_generators(self, dl_manager):
|
||||
data_dir = dl_manager.download_and_extract(_URL)
|
||||
task_name = self.config.name
|
||||
return [
|
||||
datasets.SplitGenerator(
|
||||
name=datasets.Split.TEST,
|
||||
gen_kwargs={
|
||||
"filepath": os.path.join(
|
||||
data_dir, "test", f"{task_name}_test.csv"
|
||||
),
|
||||
},
|
||||
),
|
||||
datasets.SplitGenerator(
|
||||
name=datasets.Split.VALIDATION,
|
||||
gen_kwargs={
|
||||
"filepath": os.path.join(
|
||||
data_dir, "val", f"{task_name}_val.csv"
|
||||
),
|
||||
},
|
||||
),
|
||||
datasets.SplitGenerator(
|
||||
name=datasets.Split.TRAIN,
|
||||
gen_kwargs={
|
||||
"filepath": os.path.join(
|
||||
data_dir, "dev", f"{task_name}_dev.csv"
|
||||
),
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
def _generate_examples(self, filepath):
|
||||
df = pd.read_csv(filepath, encoding="utf-8")
|
||||
for i, instance in enumerate(df.to_dict(orient="records")):
|
||||
if "answer" not in instance.keys():
|
||||
instance["answer"] = ""
|
||||
if "explanation" not in instance.keys():
|
||||
instance["explanation"] = ""
|
||||
yield i, instance
|
||||
BIN
evaluation/ceval/ceval.zip
Normal file
BIN
evaluation/ceval/ceval.zip
Normal file
Binary file not shown.
210
evaluation/ceval/mapping.json
Normal file
210
evaluation/ceval/mapping.json
Normal file
@@ -0,0 +1,210 @@
|
||||
{
|
||||
"accountant": {
|
||||
"name": "注册会计师",
|
||||
"category": "Other"
|
||||
},
|
||||
"advanced_mathematics": {
|
||||
"name": "高等数学",
|
||||
"category": "STEM"
|
||||
},
|
||||
"art_studies": {
|
||||
"name": "艺术学",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"basic_medicine": {
|
||||
"name": "基础医学",
|
||||
"category": "Other"
|
||||
},
|
||||
"business_administration": {
|
||||
"name": "工商管理",
|
||||
"category": "Social Sciences"
|
||||
},
|
||||
"chinese_language_and_literature": {
|
||||
"name": "中国语言文学",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"civil_servant": {
|
||||
"name": "公务员",
|
||||
"category": "Other"
|
||||
},
|
||||
"clinical_medicine": {
|
||||
"name": "临床医学",
|
||||
"category": "Other"
|
||||
},
|
||||
"college_chemistry": {
|
||||
"name": "大学化学",
|
||||
"category": "STEM"
|
||||
},
|
||||
"college_economics": {
|
||||
"name": "大学经济学",
|
||||
"category": "Social Sciences"
|
||||
},
|
||||
"college_physics": {
|
||||
"name": "大学物理",
|
||||
"category": "STEM"
|
||||
},
|
||||
"college_programming": {
|
||||
"name": "大学编程",
|
||||
"category": "STEM"
|
||||
},
|
||||
"computer_architecture": {
|
||||
"name": "计算机组成",
|
||||
"category": "STEM"
|
||||
},
|
||||
"computer_network": {
|
||||
"name": "计算机网络",
|
||||
"category": "STEM"
|
||||
},
|
||||
"discrete_mathematics": {
|
||||
"name": "离散数学",
|
||||
"category": "STEM"
|
||||
},
|
||||
"education_science": {
|
||||
"name": "教育学",
|
||||
"category": "Social Sciences"
|
||||
},
|
||||
"electrical_engineer": {
|
||||
"name": "注册电气工程师",
|
||||
"category": "STEM"
|
||||
},
|
||||
"environmental_impact_assessment_engineer": {
|
||||
"name": "环境影响评价工程师",
|
||||
"category": "Other"
|
||||
},
|
||||
"fire_engineer": {
|
||||
"name": "注册消防工程师",
|
||||
"category": "Other"
|
||||
},
|
||||
"high_school_biology": {
|
||||
"name": "高中生物",
|
||||
"category": "STEM"
|
||||
},
|
||||
"high_school_chemistry": {
|
||||
"name": "高中化学",
|
||||
"category": "STEM"
|
||||
},
|
||||
"high_school_chinese": {
|
||||
"name": "高中语文",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"high_school_geography": {
|
||||
"name": "高中地理",
|
||||
"category": "Social Sciences"
|
||||
},
|
||||
"high_school_history": {
|
||||
"name": "高中历史",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"high_school_mathematics": {
|
||||
"name": "高中数学",
|
||||
"category": "STEM"
|
||||
},
|
||||
"high_school_physics": {
|
||||
"name": "高中物理",
|
||||
"category": "STEM"
|
||||
},
|
||||
"high_school_politics": {
|
||||
"name": "高中政治",
|
||||
"category": "Social Sciences"
|
||||
},
|
||||
"ideological_and_moral_cultivation": {
|
||||
"name": "思想道德修养与法律基础",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"law": {
|
||||
"name": "法学",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"legal_professional": {
|
||||
"name": "法律职业资格",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"logic": {
|
||||
"name": "逻辑学",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"mao_zedong_thought": {
|
||||
"name": "毛泽东思想和中国特色社会主义理论体系概论",
|
||||
"category": "Social Sciences"
|
||||
},
|
||||
"marxism": {
|
||||
"name": "马克思主义基本原理",
|
||||
"category": "Social Sciences"
|
||||
},
|
||||
"metrology_engineer": {
|
||||
"name": "注册计量师",
|
||||
"category": "STEM"
|
||||
},
|
||||
"middle_school_biology": {
|
||||
"name": "初中生物",
|
||||
"category": "STEM"
|
||||
},
|
||||
"middle_school_chemistry": {
|
||||
"name": "初中化学",
|
||||
"category": "STEM"
|
||||
},
|
||||
"middle_school_geography": {
|
||||
"name": "初中地理",
|
||||
"category": "Social Sciences"
|
||||
},
|
||||
"middle_school_history": {
|
||||
"name": "初中历史",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"middle_school_mathematics": {
|
||||
"name": "初中数学",
|
||||
"category": "STEM"
|
||||
},
|
||||
"middle_school_physics": {
|
||||
"name": "初中物理",
|
||||
"category": "STEM"
|
||||
},
|
||||
"middle_school_politics": {
|
||||
"name": "初中政治",
|
||||
"category": "Social Sciences"
|
||||
},
|
||||
"modern_chinese_history": {
|
||||
"name": "近代史纲要",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"operating_system": {
|
||||
"name": "操作系统",
|
||||
"category": "STEM"
|
||||
},
|
||||
"physician": {
|
||||
"name": "医师资格",
|
||||
"category": "Other"
|
||||
},
|
||||
"plant_protection": {
|
||||
"name": "植物保护",
|
||||
"category": "Other"
|
||||
},
|
||||
"probability_and_statistics": {
|
||||
"name": "概率统计",
|
||||
"category": "STEM"
|
||||
},
|
||||
"professional_tour_guide": {
|
||||
"name": "导游资格",
|
||||
"category": "Humanities"
|
||||
},
|
||||
"sports_science": {
|
||||
"name": "体育学",
|
||||
"category": "Other"
|
||||
},
|
||||
"tax_accountant": {
|
||||
"name": "税务师",
|
||||
"category": "Other"
|
||||
},
|
||||
"teacher_qualification": {
|
||||
"name": "教师资格",
|
||||
"category": "Social Sciences"
|
||||
},
|
||||
"urban_and_rural_planner": {
|
||||
"name": "注册城乡规划师",
|
||||
"category": "Other"
|
||||
},
|
||||
"veterinary_medicine": {
|
||||
"name": "兽医学",
|
||||
"category": "STEM"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user