mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-11-05 18:32:14 +08:00
Compare commits
No commits in common. "7d60b840ef1c25d9bed2c13d71d0ef79624553c6" and "6f743571b191de3309517cf36238343a497666ce" have entirely different histories.
7d60b840ef
...
6f743571b1
File diff suppressed because one or more lines are too long
163
evaluation/ceval/ceval.py
Normal file
163
evaluation/ceval/ceval.py
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
# Copyright 2025 the LlamaFactory team.
|
||||||
|
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import datasets
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
_CITATION = """\
|
||||||
|
@article{huang2023ceval,
|
||||||
|
title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},
|
||||||
|
author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and others},
|
||||||
|
journal={arXiv preprint arXiv:2305.08322},
|
||||||
|
year={2023}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
_DESCRIPTION = """\
|
||||||
|
C-Eval is a comprehensive Chinese evaluation suite for foundation models.
|
||||||
|
It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_HOMEPAGE = "https://cevalbenchmark.com"
|
||||||
|
|
||||||
|
_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
|
||||||
|
|
||||||
|
_URL = "ceval.zip"
|
||||||
|
|
||||||
|
task_list = [
|
||||||
|
"computer_network",
|
||||||
|
"operating_system",
|
||||||
|
"computer_architecture",
|
||||||
|
"college_programming",
|
||||||
|
"college_physics",
|
||||||
|
"college_chemistry",
|
||||||
|
"advanced_mathematics",
|
||||||
|
"probability_and_statistics",
|
||||||
|
"discrete_mathematics",
|
||||||
|
"electrical_engineer",
|
||||||
|
"metrology_engineer",
|
||||||
|
"high_school_mathematics",
|
||||||
|
"high_school_physics",
|
||||||
|
"high_school_chemistry",
|
||||||
|
"high_school_biology",
|
||||||
|
"middle_school_mathematics",
|
||||||
|
"middle_school_biology",
|
||||||
|
"middle_school_physics",
|
||||||
|
"middle_school_chemistry",
|
||||||
|
"veterinary_medicine",
|
||||||
|
"college_economics",
|
||||||
|
"business_administration",
|
||||||
|
"marxism",
|
||||||
|
"mao_zedong_thought",
|
||||||
|
"education_science",
|
||||||
|
"teacher_qualification",
|
||||||
|
"high_school_politics",
|
||||||
|
"high_school_geography",
|
||||||
|
"middle_school_politics",
|
||||||
|
"middle_school_geography",
|
||||||
|
"modern_chinese_history",
|
||||||
|
"ideological_and_moral_cultivation",
|
||||||
|
"logic",
|
||||||
|
"law",
|
||||||
|
"chinese_language_and_literature",
|
||||||
|
"art_studies",
|
||||||
|
"professional_tour_guide",
|
||||||
|
"legal_professional",
|
||||||
|
"high_school_chinese",
|
||||||
|
"high_school_history",
|
||||||
|
"middle_school_history",
|
||||||
|
"civil_servant",
|
||||||
|
"sports_science",
|
||||||
|
"plant_protection",
|
||||||
|
"basic_medicine",
|
||||||
|
"clinical_medicine",
|
||||||
|
"urban_and_rural_planner",
|
||||||
|
"accountant",
|
||||||
|
"fire_engineer",
|
||||||
|
"environmental_impact_assessment_engineer",
|
||||||
|
"tax_accountant",
|
||||||
|
"physician",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class CevalConfig(datasets.BuilderConfig):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class Ceval(datasets.GeneratorBasedBuilder):
|
||||||
|
BUILDER_CONFIGS = [
|
||||||
|
CevalConfig(
|
||||||
|
name=task_name,
|
||||||
|
)
|
||||||
|
for task_name in task_list
|
||||||
|
]
|
||||||
|
|
||||||
|
def _info(self):
|
||||||
|
features = datasets.Features(
|
||||||
|
{
|
||||||
|
"id": datasets.Value("int32"),
|
||||||
|
"question": datasets.Value("string"),
|
||||||
|
"A": datasets.Value("string"),
|
||||||
|
"B": datasets.Value("string"),
|
||||||
|
"C": datasets.Value("string"),
|
||||||
|
"D": datasets.Value("string"),
|
||||||
|
"answer": datasets.Value("string"),
|
||||||
|
"explanation": datasets.Value("string"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return datasets.DatasetInfo(
|
||||||
|
description=_DESCRIPTION,
|
||||||
|
features=features,
|
||||||
|
homepage=_HOMEPAGE,
|
||||||
|
license=_LICENSE,
|
||||||
|
citation=_CITATION,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _split_generators(self, dl_manager):
|
||||||
|
data_dir = dl_manager.download_and_extract(_URL)
|
||||||
|
task_name = self.config.name
|
||||||
|
return [
|
||||||
|
datasets.SplitGenerator(
|
||||||
|
name=datasets.Split.TEST,
|
||||||
|
gen_kwargs={
|
||||||
|
"filepath": os.path.join(data_dir, "test", f"{task_name}_test.csv"),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
datasets.SplitGenerator(
|
||||||
|
name=datasets.Split.VALIDATION,
|
||||||
|
gen_kwargs={
|
||||||
|
"filepath": os.path.join(data_dir, "val", f"{task_name}_val.csv"),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
datasets.SplitGenerator(
|
||||||
|
name=datasets.Split.TRAIN,
|
||||||
|
gen_kwargs={
|
||||||
|
"filepath": os.path.join(data_dir, "dev", f"{task_name}_dev.csv"),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
def _generate_examples(self, filepath):
|
||||||
|
df = pd.read_csv(filepath, encoding="utf-8")
|
||||||
|
for i, instance in enumerate(df.to_dict(orient="records")):
|
||||||
|
if "answer" not in instance.keys():
|
||||||
|
instance["answer"] = ""
|
||||||
|
if "explanation" not in instance.keys():
|
||||||
|
instance["explanation"] = ""
|
||||||
|
yield i, instance
|
||||||
210
evaluation/ceval/mapping.json
Normal file
210
evaluation/ceval/mapping.json
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
{
|
||||||
|
"accountant": {
|
||||||
|
"name": "注册会计师",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"advanced_mathematics": {
|
||||||
|
"name": "高等数学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"art_studies": {
|
||||||
|
"name": "艺术学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"basic_medicine": {
|
||||||
|
"name": "基础医学",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"business_administration": {
|
||||||
|
"name": "工商管理",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"chinese_language_and_literature": {
|
||||||
|
"name": "中国语言文学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"civil_servant": {
|
||||||
|
"name": "公务员",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"clinical_medicine": {
|
||||||
|
"name": "临床医学",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"college_chemistry": {
|
||||||
|
"name": "大学化学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"college_economics": {
|
||||||
|
"name": "大学经济学",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"college_physics": {
|
||||||
|
"name": "大学物理",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"college_programming": {
|
||||||
|
"name": "大学编程",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"computer_architecture": {
|
||||||
|
"name": "计算机组成",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"computer_network": {
|
||||||
|
"name": "计算机网络",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"discrete_mathematics": {
|
||||||
|
"name": "离散数学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"education_science": {
|
||||||
|
"name": "教育学",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"electrical_engineer": {
|
||||||
|
"name": "注册电气工程师",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"environmental_impact_assessment_engineer": {
|
||||||
|
"name": "环境影响评价工程师",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"fire_engineer": {
|
||||||
|
"name": "注册消防工程师",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"high_school_biology": {
|
||||||
|
"name": "高中生物",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_chemistry": {
|
||||||
|
"name": "高中化学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_chinese": {
|
||||||
|
"name": "高中语文",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"high_school_geography": {
|
||||||
|
"name": "高中地理",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"high_school_history": {
|
||||||
|
"name": "高中历史",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"high_school_mathematics": {
|
||||||
|
"name": "高中数学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_physics": {
|
||||||
|
"name": "高中物理",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_politics": {
|
||||||
|
"name": "高中政治",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"ideological_and_moral_cultivation": {
|
||||||
|
"name": "思想道德修养与法律基础",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"law": {
|
||||||
|
"name": "法学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"legal_professional": {
|
||||||
|
"name": "法律职业资格",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"logic": {
|
||||||
|
"name": "逻辑学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"mao_zedong_thought": {
|
||||||
|
"name": "毛泽东思想和中国特色社会主义理论体系概论",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"marxism": {
|
||||||
|
"name": "马克思主义基本原理",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"metrology_engineer": {
|
||||||
|
"name": "注册计量师",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"middle_school_biology": {
|
||||||
|
"name": "初中生物",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"middle_school_chemistry": {
|
||||||
|
"name": "初中化学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"middle_school_geography": {
|
||||||
|
"name": "初中地理",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"middle_school_history": {
|
||||||
|
"name": "初中历史",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"middle_school_mathematics": {
|
||||||
|
"name": "初中数学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"middle_school_physics": {
|
||||||
|
"name": "初中物理",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"middle_school_politics": {
|
||||||
|
"name": "初中政治",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"modern_chinese_history": {
|
||||||
|
"name": "近代史纲要",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"operating_system": {
|
||||||
|
"name": "操作系统",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"physician": {
|
||||||
|
"name": "医师资格",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"plant_protection": {
|
||||||
|
"name": "植物保护",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"probability_and_statistics": {
|
||||||
|
"name": "概率统计",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"professional_tour_guide": {
|
||||||
|
"name": "导游资格",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"sports_science": {
|
||||||
|
"name": "体育学",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"tax_accountant": {
|
||||||
|
"name": "税务师",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"teacher_qualification": {
|
||||||
|
"name": "教师资格",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"urban_and_rural_planner": {
|
||||||
|
"name": "注册城乡规划师",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"veterinary_medicine": {
|
||||||
|
"name": "兽医学",
|
||||||
|
"category": "STEM"
|
||||||
|
}
|
||||||
|
}
|
||||||
170
evaluation/cmmlu/cmmlu.py
Normal file
170
evaluation/cmmlu/cmmlu.py
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
# Copyright 2025 the LlamaFactory team.
|
||||||
|
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import datasets
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
_CITATION = """\
|
||||||
|
@article{li2023cmmlu,
|
||||||
|
title={CMMLU: Measuring massive multitask language understanding in Chinese},
|
||||||
|
author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and others,
|
||||||
|
journal={arXiv preprint arXiv:2306.09212},
|
||||||
|
year={2023}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
_DESCRIPTION = """\
|
||||||
|
CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge
|
||||||
|
and reasoning abilities of LLMs within the Chinese language and cultural context.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_HOMEPAGE = "https://github.com/haonan-li/CMMLU"
|
||||||
|
|
||||||
|
_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
|
||||||
|
|
||||||
|
_URL = "cmmlu.zip"
|
||||||
|
|
||||||
|
task_list = [
|
||||||
|
"agronomy",
|
||||||
|
"anatomy",
|
||||||
|
"ancient_chinese",
|
||||||
|
"arts",
|
||||||
|
"astronomy",
|
||||||
|
"business_ethics",
|
||||||
|
"chinese_civil_service_exam",
|
||||||
|
"chinese_driving_rule",
|
||||||
|
"chinese_food_culture",
|
||||||
|
"chinese_foreign_policy",
|
||||||
|
"chinese_history",
|
||||||
|
"chinese_literature",
|
||||||
|
"chinese_teacher_qualification",
|
||||||
|
"clinical_knowledge",
|
||||||
|
"college_actuarial_science",
|
||||||
|
"college_education",
|
||||||
|
"college_engineering_hydrology",
|
||||||
|
"college_law",
|
||||||
|
"college_mathematics",
|
||||||
|
"college_medical_statistics",
|
||||||
|
"college_medicine",
|
||||||
|
"computer_science",
|
||||||
|
"computer_security",
|
||||||
|
"conceptual_physics",
|
||||||
|
"construction_project_management",
|
||||||
|
"economics",
|
||||||
|
"education",
|
||||||
|
"electrical_engineering",
|
||||||
|
"elementary_chinese",
|
||||||
|
"elementary_commonsense",
|
||||||
|
"elementary_information_and_technology",
|
||||||
|
"elementary_mathematics",
|
||||||
|
"ethnology",
|
||||||
|
"food_science",
|
||||||
|
"genetics",
|
||||||
|
"global_facts",
|
||||||
|
"high_school_biology",
|
||||||
|
"high_school_chemistry",
|
||||||
|
"high_school_geography",
|
||||||
|
"high_school_mathematics",
|
||||||
|
"high_school_physics",
|
||||||
|
"high_school_politics",
|
||||||
|
"human_sexuality",
|
||||||
|
"international_law",
|
||||||
|
"journalism",
|
||||||
|
"jurisprudence",
|
||||||
|
"legal_and_moral_basis",
|
||||||
|
"logical",
|
||||||
|
"machine_learning",
|
||||||
|
"management",
|
||||||
|
"marketing",
|
||||||
|
"marxist_theory",
|
||||||
|
"modern_chinese",
|
||||||
|
"nutrition",
|
||||||
|
"philosophy",
|
||||||
|
"professional_accounting",
|
||||||
|
"professional_law",
|
||||||
|
"professional_medicine",
|
||||||
|
"professional_psychology",
|
||||||
|
"public_relations",
|
||||||
|
"security_study",
|
||||||
|
"sociology",
|
||||||
|
"sports_science",
|
||||||
|
"traditional_chinese_medicine",
|
||||||
|
"virology",
|
||||||
|
"world_history",
|
||||||
|
"world_religions",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class CMMLUConfig(datasets.BuilderConfig):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(version=datasets.Version("1.0.1"), **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class CMMLU(datasets.GeneratorBasedBuilder):
|
||||||
|
BUILDER_CONFIGS = [
|
||||||
|
CMMLUConfig(
|
||||||
|
name=task_name,
|
||||||
|
)
|
||||||
|
for task_name in task_list
|
||||||
|
]
|
||||||
|
|
||||||
|
def _info(self):
|
||||||
|
features = datasets.Features(
|
||||||
|
{
|
||||||
|
"question": datasets.Value("string"),
|
||||||
|
"A": datasets.Value("string"),
|
||||||
|
"B": datasets.Value("string"),
|
||||||
|
"C": datasets.Value("string"),
|
||||||
|
"D": datasets.Value("string"),
|
||||||
|
"answer": datasets.Value("string"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return datasets.DatasetInfo(
|
||||||
|
description=_DESCRIPTION,
|
||||||
|
features=features,
|
||||||
|
homepage=_HOMEPAGE,
|
||||||
|
license=_LICENSE,
|
||||||
|
citation=_CITATION,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _split_generators(self, dl_manager):
|
||||||
|
data_dir = dl_manager.download_and_extract(_URL)
|
||||||
|
task_name = self.config.name
|
||||||
|
return [
|
||||||
|
datasets.SplitGenerator(
|
||||||
|
name=datasets.Split.TEST,
|
||||||
|
gen_kwargs={
|
||||||
|
"filepath": os.path.join(data_dir, f"test/{task_name}.csv"),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
datasets.SplitGenerator(
|
||||||
|
name=datasets.Split.TRAIN,
|
||||||
|
gen_kwargs={
|
||||||
|
"filepath": os.path.join(data_dir, f"dev/{task_name}.csv"),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
def _generate_examples(self, filepath):
|
||||||
|
df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8")
|
||||||
|
for i, instance in enumerate(df.to_dict(orient="records")):
|
||||||
|
question = instance.pop("Question", "")
|
||||||
|
answer = instance.pop("Answer", "")
|
||||||
|
instance["question"] = question
|
||||||
|
instance["answer"] = answer
|
||||||
|
yield i, instance
|
||||||
270
evaluation/cmmlu/mapping.json
Normal file
270
evaluation/cmmlu/mapping.json
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
{
|
||||||
|
"agronomy": {
|
||||||
|
"name": "农学",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"anatomy": {
|
||||||
|
"name": "解剖学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"ancient_chinese": {
|
||||||
|
"name": "古汉语",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"arts": {
|
||||||
|
"name": "艺术学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"astronomy": {
|
||||||
|
"name": "天文学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"business_ethics": {
|
||||||
|
"name": "商业伦理",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"chinese_civil_service_exam": {
|
||||||
|
"name": "中国公务员考试",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"chinese_driving_rule": {
|
||||||
|
"name": "中国驾驶规则",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"chinese_food_culture": {
|
||||||
|
"name": "中国饮食文化",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"chinese_foreign_policy": {
|
||||||
|
"name": "中国外交政策",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"chinese_history": {
|
||||||
|
"name": "中国历史",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"chinese_literature": {
|
||||||
|
"name": "中国文学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"chinese_teacher_qualification": {
|
||||||
|
"name": "中国教师资格",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"college_actuarial_science": {
|
||||||
|
"name": "大学精算学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"college_education": {
|
||||||
|
"name": "大学教育学",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"college_engineering_hydrology": {
|
||||||
|
"name": "大学工程水文学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"college_law": {
|
||||||
|
"name": "大学法律",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"college_mathematics": {
|
||||||
|
"name": "大学数学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"college_medical_statistics": {
|
||||||
|
"name": "大学医学统计",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"clinical_knowledge": {
|
||||||
|
"name": "临床知识",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"college_medicine": {
|
||||||
|
"name": "大学医学",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"computer_science": {
|
||||||
|
"name": "计算机科学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"computer_security": {
|
||||||
|
"name": "计算机安全",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"conceptual_physics": {
|
||||||
|
"name": "概念物理学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"construction_project_management": {
|
||||||
|
"name": "建设工程管理",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"economics": {
|
||||||
|
"name": "经济学",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"education": {
|
||||||
|
"name": "教育学",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"elementary_chinese": {
|
||||||
|
"name": "小学语文",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"elementary_commonsense": {
|
||||||
|
"name": "小学常识",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"elementary_information_and_technology": {
|
||||||
|
"name": "小学信息技术",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"electrical_engineering": {
|
||||||
|
"name": "电气工程",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"elementary_mathematics": {
|
||||||
|
"name": "初等数学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"ethnology": {
|
||||||
|
"name": "民族学",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"food_science": {
|
||||||
|
"name": "食品科学",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"genetics": {
|
||||||
|
"name": "遗传学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"global_facts": {
|
||||||
|
"name": "全球事实",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"high_school_biology": {
|
||||||
|
"name": "高中生物",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_chemistry": {
|
||||||
|
"name": "高中化学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_geography": {
|
||||||
|
"name": "高中地理",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"high_school_mathematics": {
|
||||||
|
"name": "高中数学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_physics": {
|
||||||
|
"name": "高中物理学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_politics": {
|
||||||
|
"name": "高中政治",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"human_sexuality": {
|
||||||
|
"name": "人类性行为",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"international_law": {
|
||||||
|
"name": "国际法学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"journalism": {
|
||||||
|
"name": "新闻学",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"jurisprudence": {
|
||||||
|
"name": "法理学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"legal_and_moral_basis": {
|
||||||
|
"name": "法律与道德基础",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"logical": {
|
||||||
|
"name": "逻辑学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"machine_learning": {
|
||||||
|
"name": "机器学习",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"management": {
|
||||||
|
"name": "管理学",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"marketing": {
|
||||||
|
"name": "市场营销",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"marxist_theory": {
|
||||||
|
"name": "马克思主义理论",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"modern_chinese": {
|
||||||
|
"name": "现代汉语",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"nutrition": {
|
||||||
|
"name": "营养学",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"philosophy": {
|
||||||
|
"name": "哲学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"professional_accounting": {
|
||||||
|
"name": "专业会计",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"professional_law": {
|
||||||
|
"name": "专业法学",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"professional_medicine": {
|
||||||
|
"name": "专业医学",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"professional_psychology": {
|
||||||
|
"name": "专业心理学",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"public_relations": {
|
||||||
|
"name": "公共关系",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"security_study": {
|
||||||
|
"name": "安全研究",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"sociology": {
|
||||||
|
"name": "社会学",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"sports_science": {
|
||||||
|
"name": "体育学",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"traditional_chinese_medicine": {
|
||||||
|
"name": "中医中药",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"virology": {
|
||||||
|
"name": "病毒学",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"world_history": {
|
||||||
|
"name": "世界历史",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"world_religions": {
|
||||||
|
"name": "世界宗教",
|
||||||
|
"category": "Humanities"
|
||||||
|
}
|
||||||
|
}
|
||||||
230
evaluation/mmlu/mapping.json
Normal file
230
evaluation/mmlu/mapping.json
Normal file
@ -0,0 +1,230 @@
|
|||||||
|
{
|
||||||
|
"abstract_algebra": {
|
||||||
|
"name": "abstract algebra",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"anatomy": {
|
||||||
|
"name": "anatomy",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"astronomy": {
|
||||||
|
"name": "astronomy",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"business_ethics": {
|
||||||
|
"name": "business ethics",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"clinical_knowledge": {
|
||||||
|
"name": "clinical knowledge",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"college_biology": {
|
||||||
|
"name": "college biology",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"college_chemistry": {
|
||||||
|
"name": "college chemistry",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"college_computer_science": {
|
||||||
|
"name": "college computer science",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"college_mathematics": {
|
||||||
|
"name": "college mathematics",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"college_medicine": {
|
||||||
|
"name": "college medicine",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"college_physics": {
|
||||||
|
"name": "college physics",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"computer_security": {
|
||||||
|
"name": "computer security",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"conceptual_physics": {
|
||||||
|
"name": "conceptual physics",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"econometrics": {
|
||||||
|
"name": "econometrics",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"electrical_engineering": {
|
||||||
|
"name": "electrical engineering",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"elementary_mathematics": {
|
||||||
|
"name": "elementary mathematics",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"formal_logic": {
|
||||||
|
"name": "formal logic",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"global_facts": {
|
||||||
|
"name": "global facts",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"high_school_biology": {
|
||||||
|
"name": "high school biology",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_chemistry": {
|
||||||
|
"name": "high school chemistry",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_computer_science": {
|
||||||
|
"name": "high school computer science",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_european_history": {
|
||||||
|
"name": "high school european history",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"high_school_geography": {
|
||||||
|
"name": "high school geography",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"high_school_government_and_politics": {
|
||||||
|
"name": "high school government and politics",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"high_school_macroeconomics": {
|
||||||
|
"name": "high school macroeconomics",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"high_school_mathematics": {
|
||||||
|
"name": "high school mathematics",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_microeconomics": {
|
||||||
|
"name": "high school microeconomics",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"high_school_physics": {
|
||||||
|
"name": "high school physics",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_psychology": {
|
||||||
|
"name": "high school psychology",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"high_school_statistics": {
|
||||||
|
"name": "high school statistics",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"high_school_us_history": {
|
||||||
|
"name": "high school us history",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"high_school_world_history": {
|
||||||
|
"name": "high school world history",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"human_aging": {
|
||||||
|
"name": "human aging",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"human_sexuality": {
|
||||||
|
"name": "human sexuality",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"international_law": {
|
||||||
|
"name": "international law",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"jurisprudence": {
|
||||||
|
"name": "jurisprudence",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"logical_fallacies": {
|
||||||
|
"name": "logical fallacies",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"machine_learning": {
|
||||||
|
"name": "machine learning",
|
||||||
|
"category": "STEM"
|
||||||
|
},
|
||||||
|
"management": {
|
||||||
|
"name": "management",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"marketing": {
|
||||||
|
"name": "marketing",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"medical_genetics": {
|
||||||
|
"name": "medical genetics",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"miscellaneous": {
|
||||||
|
"name": "miscellaneous",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"moral_disputes": {
|
||||||
|
"name": "moral disputes",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"moral_scenarios": {
|
||||||
|
"name": "moral scenarios",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"nutrition": {
|
||||||
|
"name": "nutrition",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"philosophy": {
|
||||||
|
"name": "philosophy",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"prehistory": {
|
||||||
|
"name": "prehistory",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"professional_accounting": {
|
||||||
|
"name": "professional accounting",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"professional_law": {
|
||||||
|
"name": "professional law",
|
||||||
|
"category": "Humanities"
|
||||||
|
},
|
||||||
|
"professional_medicine": {
|
||||||
|
"name": "professional medicine",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"professional_psychology": {
|
||||||
|
"name": "professional psychology",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"public_relations": {
|
||||||
|
"name": "public relations",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"security_studies": {
|
||||||
|
"name": "security studies",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"sociology": {
|
||||||
|
"name": "sociology",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"us_foreign_policy": {
|
||||||
|
"name": "us foreign policy",
|
||||||
|
"category": "Social Sciences"
|
||||||
|
},
|
||||||
|
"virology": {
|
||||||
|
"name": "virology",
|
||||||
|
"category": "Other"
|
||||||
|
},
|
||||||
|
"world_religions": {
|
||||||
|
"name": "world religions",
|
||||||
|
"category": "Humanities"
|
||||||
|
}
|
||||||
|
}
|
||||||
163
evaluation/mmlu/mmlu.py
Normal file
163
evaluation/mmlu/mmlu.py
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
# Copyright 2025 the LlamaFactory team.
|
||||||
|
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
import datasets
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
_CITATION = """\
|
||||||
|
@article{hendryckstest2021,
|
||||||
|
title={Measuring Massive Multitask Language Understanding},
|
||||||
|
author={Dan Hendrycks and Collin Burns and others},
|
||||||
|
journal={Proceedings of the International Conference on Learning Representations (ICLR)},
|
||||||
|
year={2021}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
_DESCRIPTION = """\
|
||||||
|
Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart,
|
||||||
|
Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021).
|
||||||
|
"""
|
||||||
|
|
||||||
|
_HOMEPAGE = "https://github.com/hendrycks/test"
|
||||||
|
|
||||||
|
_LICENSE = "MIT"
|
||||||
|
|
||||||
|
_URL = "mmlu.zip"
|
||||||
|
|
||||||
|
task_list = [
|
||||||
|
"high_school_european_history",
|
||||||
|
"business_ethics",
|
||||||
|
"clinical_knowledge",
|
||||||
|
"medical_genetics",
|
||||||
|
"high_school_us_history",
|
||||||
|
"high_school_physics",
|
||||||
|
"high_school_world_history",
|
||||||
|
"virology",
|
||||||
|
"high_school_microeconomics",
|
||||||
|
"econometrics",
|
||||||
|
"college_computer_science",
|
||||||
|
"high_school_biology",
|
||||||
|
"abstract_algebra",
|
||||||
|
"professional_accounting",
|
||||||
|
"philosophy",
|
||||||
|
"professional_medicine",
|
||||||
|
"nutrition",
|
||||||
|
"global_facts",
|
||||||
|
"machine_learning",
|
||||||
|
"security_studies",
|
||||||
|
"public_relations",
|
||||||
|
"professional_psychology",
|
||||||
|
"prehistory",
|
||||||
|
"anatomy",
|
||||||
|
"human_sexuality",
|
||||||
|
"college_medicine",
|
||||||
|
"high_school_government_and_politics",
|
||||||
|
"college_chemistry",
|
||||||
|
"logical_fallacies",
|
||||||
|
"high_school_geography",
|
||||||
|
"elementary_mathematics",
|
||||||
|
"human_aging",
|
||||||
|
"college_mathematics",
|
||||||
|
"high_school_psychology",
|
||||||
|
"formal_logic",
|
||||||
|
"high_school_statistics",
|
||||||
|
"international_law",
|
||||||
|
"high_school_mathematics",
|
||||||
|
"high_school_computer_science",
|
||||||
|
"conceptual_physics",
|
||||||
|
"miscellaneous",
|
||||||
|
"high_school_chemistry",
|
||||||
|
"marketing",
|
||||||
|
"professional_law",
|
||||||
|
"management",
|
||||||
|
"college_physics",
|
||||||
|
"jurisprudence",
|
||||||
|
"world_religions",
|
||||||
|
"sociology",
|
||||||
|
"us_foreign_policy",
|
||||||
|
"high_school_macroeconomics",
|
||||||
|
"computer_security",
|
||||||
|
"moral_scenarios",
|
||||||
|
"moral_disputes",
|
||||||
|
"electrical_engineering",
|
||||||
|
"astronomy",
|
||||||
|
"college_biology",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class MMLUConfig(datasets.BuilderConfig):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class MMLU(datasets.GeneratorBasedBuilder):
|
||||||
|
BUILDER_CONFIGS = [
|
||||||
|
MMLUConfig(
|
||||||
|
name=task_name,
|
||||||
|
)
|
||||||
|
for task_name in task_list
|
||||||
|
]
|
||||||
|
|
||||||
|
def _info(self):
|
||||||
|
features = datasets.Features(
|
||||||
|
{
|
||||||
|
"question": datasets.Value("string"),
|
||||||
|
"A": datasets.Value("string"),
|
||||||
|
"B": datasets.Value("string"),
|
||||||
|
"C": datasets.Value("string"),
|
||||||
|
"D": datasets.Value("string"),
|
||||||
|
"answer": datasets.Value("string"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return datasets.DatasetInfo(
|
||||||
|
description=_DESCRIPTION,
|
||||||
|
features=features,
|
||||||
|
homepage=_HOMEPAGE,
|
||||||
|
license=_LICENSE,
|
||||||
|
citation=_CITATION,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _split_generators(self, dl_manager):
|
||||||
|
data_dir = dl_manager.download_and_extract(_URL)
|
||||||
|
task_name = self.config.name
|
||||||
|
return [
|
||||||
|
datasets.SplitGenerator(
|
||||||
|
name=datasets.Split.TEST,
|
||||||
|
gen_kwargs={
|
||||||
|
"filepath": os.path.join(data_dir, "data", "test", f"{task_name}_test.csv"),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
datasets.SplitGenerator(
|
||||||
|
name=datasets.Split.VALIDATION,
|
||||||
|
gen_kwargs={
|
||||||
|
"filepath": os.path.join(data_dir, "data", "val", f"{task_name}_val.csv"),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
datasets.SplitGenerator(
|
||||||
|
name=datasets.Split.TRAIN,
|
||||||
|
gen_kwargs={
|
||||||
|
"filepath": os.path.join(data_dir, "data", "dev", f"{task_name}_dev.csv"),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
def _generate_examples(self, filepath):
|
||||||
|
df = pd.read_csv(filepath, header=None)
|
||||||
|
df.columns = ["question", "A", "B", "C", "D", "answer"]
|
||||||
|
|
||||||
|
yield from enumerate(df.to_dict(orient="records"))
|
||||||
@ -25,27 +25,64 @@ USAGE = (
|
|||||||
+ "| Usage: |\n"
|
+ "| Usage: |\n"
|
||||||
+ "| llamafactory-cli api -h: launch an OpenAI-style API server |\n"
|
+ "| llamafactory-cli api -h: launch an OpenAI-style API server |\n"
|
||||||
+ "| llamafactory-cli chat -h: launch a chat interface in CLI |\n"
|
+ "| llamafactory-cli chat -h: launch a chat interface in CLI |\n"
|
||||||
|
+ "| llamafactory-cli eval -h: evaluate models |\n"
|
||||||
+ "| llamafactory-cli export -h: merge LoRA adapters and export model |\n"
|
+ "| llamafactory-cli export -h: merge LoRA adapters and export model |\n"
|
||||||
+ "| llamafactory-cli train -h: train models |\n"
|
+ "| llamafactory-cli train -h: train models |\n"
|
||||||
+ "| llamafactory-cli webchat -h: launch a chat interface in Web UI |\n"
|
+ "| llamafactory-cli webchat -h: launch a chat interface in Web UI |\n"
|
||||||
+ "| llamafactory-cli webui: launch LlamaBoard |\n"
|
+ "| llamafactory-cli webui: launch LlamaBoard |\n"
|
||||||
+ "| llamafactory-cli env: show environment info |\n"
|
|
||||||
+ "| llamafactory-cli version: show version info |\n"
|
+ "| llamafactory-cli version: show version info |\n"
|
||||||
+ "| Hint: You can use `lmf` as a shortcut for `llamafactory-cli`. |\n"
|
|
||||||
+ "-" * 70
|
+ "-" * 70
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _run_api():
|
||||||
|
from .api.app import run_api
|
||||||
|
|
||||||
|
return run_api()
|
||||||
|
|
||||||
|
|
||||||
|
def _run_chat():
|
||||||
|
from .chat.chat_model import run_chat
|
||||||
|
|
||||||
|
return run_chat()
|
||||||
|
|
||||||
|
|
||||||
|
def _run_eval():
|
||||||
|
from .eval.evaluator import run_eval
|
||||||
|
|
||||||
|
return run_eval()
|
||||||
|
|
||||||
|
|
||||||
|
def _export_model():
|
||||||
|
from .train.tuner import export_model
|
||||||
|
|
||||||
|
return export_model()
|
||||||
|
|
||||||
|
|
||||||
|
def _run_exp():
|
||||||
|
from .train.tuner import run_exp
|
||||||
|
|
||||||
|
return run_exp()
|
||||||
|
|
||||||
|
|
||||||
|
def _run_web_demo():
|
||||||
|
from .webui.interface import run_web_demo
|
||||||
|
|
||||||
|
return run_web_demo()
|
||||||
|
|
||||||
|
|
||||||
|
def _run_web_ui():
|
||||||
|
from .webui.interface import run_web_ui
|
||||||
|
|
||||||
|
return run_web_ui()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
from . import launcher
|
||||||
from .extras import logging
|
from .extras import logging
|
||||||
from .extras.env import VERSION, print_env
|
from .extras.env import VERSION, print_env
|
||||||
from .extras.misc import find_available_port, get_device_count, is_env_enabled, use_ray
|
from .extras.misc import find_available_port, get_device_count, is_env_enabled, use_ray
|
||||||
|
|
||||||
if is_env_enabled("USE_V1"):
|
|
||||||
from .v1 import launcher
|
|
||||||
else:
|
|
||||||
from . import launcher
|
|
||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
WELCOME = (
|
WELCOME = (
|
||||||
@ -61,14 +98,14 @@ def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
COMMAND_MAP = {
|
COMMAND_MAP = {
|
||||||
"api": launcher.run_api,
|
"api": _run_api,
|
||||||
"chat": launcher.run_chat,
|
"chat": _run_chat,
|
||||||
"env": print_env,
|
"env": print_env,
|
||||||
"eval": launcher.run_eval,
|
"eval": _run_eval,
|
||||||
"export": launcher.export_model,
|
"export": _export_model,
|
||||||
"train": launcher.run_exp,
|
"train": _run_exp,
|
||||||
"webchat": launcher.run_web_demo,
|
"webchat": _run_web_demo,
|
||||||
"webui": launcher.run_web_ui,
|
"webui": _run_web_ui,
|
||||||
"version": partial(print, WELCOME),
|
"version": partial(print, WELCOME),
|
||||||
"help": partial(print, USAGE),
|
"help": partial(print, USAGE),
|
||||||
}
|
}
|
||||||
|
|||||||
@ -15,22 +15,22 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
|
||||||
|
import accelerate
|
||||||
|
import datasets
|
||||||
|
import peft
|
||||||
|
import torch
|
||||||
|
import transformers
|
||||||
|
import trl
|
||||||
|
from transformers.utils import is_torch_cuda_available, is_torch_npu_available
|
||||||
|
|
||||||
|
|
||||||
VERSION = "0.9.4.dev0"
|
VERSION = "0.9.4.dev0"
|
||||||
|
|
||||||
|
|
||||||
def print_env() -> None:
|
def print_env() -> None:
|
||||||
import os
|
|
||||||
import platform
|
|
||||||
|
|
||||||
import accelerate
|
|
||||||
import datasets
|
|
||||||
import peft
|
|
||||||
import torch
|
|
||||||
import transformers
|
|
||||||
import trl
|
|
||||||
from transformers.utils import is_torch_cuda_available, is_torch_npu_available
|
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
"`llamafactory` version": VERSION,
|
"`llamafactory` version": VERSION,
|
||||||
"Platform": platform.platform(),
|
"Platform": platform.platform(),
|
||||||
|
|||||||
@ -12,46 +12,12 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
from llamafactory.train.tuner import run_exp # use absolute import
|
||||||
def run_api():
|
|
||||||
from llamafactory.api.app import run_api as _run_api
|
|
||||||
|
|
||||||
_run_api()
|
|
||||||
|
|
||||||
|
|
||||||
def run_chat():
|
def launch():
|
||||||
from llamafactory.chat.chat_model import run_chat as _run_chat
|
run_exp()
|
||||||
|
|
||||||
return _run_chat()
|
|
||||||
|
|
||||||
|
|
||||||
def run_eval():
|
|
||||||
raise NotImplementedError("Evaluation will be deprecated in the future.")
|
|
||||||
|
|
||||||
|
|
||||||
def export_model():
|
|
||||||
from llamafactory.train.tuner import export_model as _export_model
|
|
||||||
|
|
||||||
return _export_model()
|
|
||||||
|
|
||||||
|
|
||||||
def run_exp():
|
|
||||||
from llamafactory.train.tuner import run_exp as _run_exp
|
|
||||||
|
|
||||||
return _run_exp() # use absolute import
|
|
||||||
|
|
||||||
|
|
||||||
def run_web_demo():
|
|
||||||
from llamafactory.webui.interface import run_web_demo as _run_web_demo
|
|
||||||
|
|
||||||
return _run_web_demo()
|
|
||||||
|
|
||||||
|
|
||||||
def run_web_ui():
|
|
||||||
from llamafactory.webui.interface import run_web_ui as _run_web_ui
|
|
||||||
|
|
||||||
return _run_web_ui()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
run_exp()
|
launch()
|
||||||
|
|||||||
@ -1,33 +0,0 @@
|
|||||||
# Copyright 2025 the LlamaFactory team.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
|
|
||||||
def run_train():
|
|
||||||
raise NotImplementedError("Please use `llamafactory-cli sft` or `llamafactory-cli rm`.")
|
|
||||||
|
|
||||||
|
|
||||||
def run_chat():
|
|
||||||
from llamafactory.v1.core.chat_sampler import Sampler
|
|
||||||
|
|
||||||
Sampler().cli()
|
|
||||||
|
|
||||||
|
|
||||||
def run_sft():
|
|
||||||
from llamafactory.v1.train.sft import SFTTrainer
|
|
||||||
|
|
||||||
SFTTrainer().run()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
run_train()
|
|
||||||
Loading…
x
Reference in New Issue
Block a user