Compare commits

...

No commits in common. "7d60b840ef1c25d9bed2c13d71d0ef79624553c6" and "6f743571b191de3309517cf36238343a497666ce" have entirely different histories.

30 changed files with 1272 additions and 7322 deletions

File diff suppressed because one or more lines are too long

163
evaluation/ceval/ceval.py Normal file
View File

@ -0,0 +1,163 @@
# Copyright 2025 the LlamaFactory team.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import datasets
import pandas as pd
_CITATION = """\
@article{huang2023ceval,
title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},
author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and others},
journal={arXiv preprint arXiv:2305.08322},
year={2023}
}
"""
_DESCRIPTION = """\
C-Eval is a comprehensive Chinese evaluation suite for foundation models.
It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
"""
_HOMEPAGE = "https://cevalbenchmark.com"
_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
_URL = "ceval.zip"
task_list = [
"computer_network",
"operating_system",
"computer_architecture",
"college_programming",
"college_physics",
"college_chemistry",
"advanced_mathematics",
"probability_and_statistics",
"discrete_mathematics",
"electrical_engineer",
"metrology_engineer",
"high_school_mathematics",
"high_school_physics",
"high_school_chemistry",
"high_school_biology",
"middle_school_mathematics",
"middle_school_biology",
"middle_school_physics",
"middle_school_chemistry",
"veterinary_medicine",
"college_economics",
"business_administration",
"marxism",
"mao_zedong_thought",
"education_science",
"teacher_qualification",
"high_school_politics",
"high_school_geography",
"middle_school_politics",
"middle_school_geography",
"modern_chinese_history",
"ideological_and_moral_cultivation",
"logic",
"law",
"chinese_language_and_literature",
"art_studies",
"professional_tour_guide",
"legal_professional",
"high_school_chinese",
"high_school_history",
"middle_school_history",
"civil_servant",
"sports_science",
"plant_protection",
"basic_medicine",
"clinical_medicine",
"urban_and_rural_planner",
"accountant",
"fire_engineer",
"environmental_impact_assessment_engineer",
"tax_accountant",
"physician",
]
class CevalConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
class Ceval(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [
CevalConfig(
name=task_name,
)
for task_name in task_list
]
def _info(self):
features = datasets.Features(
{
"id": datasets.Value("int32"),
"question": datasets.Value("string"),
"A": datasets.Value("string"),
"B": datasets.Value("string"),
"C": datasets.Value("string"),
"D": datasets.Value("string"),
"answer": datasets.Value("string"),
"explanation": datasets.Value("string"),
}
)
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
data_dir = dl_manager.download_and_extract(_URL)
task_name = self.config.name
return [
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
"filepath": os.path.join(data_dir, "test", f"{task_name}_test.csv"),
},
),
datasets.SplitGenerator(
name=datasets.Split.VALIDATION,
gen_kwargs={
"filepath": os.path.join(data_dir, "val", f"{task_name}_val.csv"),
},
),
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
gen_kwargs={
"filepath": os.path.join(data_dir, "dev", f"{task_name}_dev.csv"),
},
),
]
def _generate_examples(self, filepath):
df = pd.read_csv(filepath, encoding="utf-8")
for i, instance in enumerate(df.to_dict(orient="records")):
if "answer" not in instance.keys():
instance["answer"] = ""
if "explanation" not in instance.keys():
instance["explanation"] = ""
yield i, instance

View File

@ -0,0 +1,210 @@
{
"accountant": {
"name": "注册会计师",
"category": "Other"
},
"advanced_mathematics": {
"name": "高等数学",
"category": "STEM"
},
"art_studies": {
"name": "艺术学",
"category": "Humanities"
},
"basic_medicine": {
"name": "基础医学",
"category": "Other"
},
"business_administration": {
"name": "工商管理",
"category": "Social Sciences"
},
"chinese_language_and_literature": {
"name": "中国语言文学",
"category": "Humanities"
},
"civil_servant": {
"name": "公务员",
"category": "Other"
},
"clinical_medicine": {
"name": "临床医学",
"category": "Other"
},
"college_chemistry": {
"name": "大学化学",
"category": "STEM"
},
"college_economics": {
"name": "大学经济学",
"category": "Social Sciences"
},
"college_physics": {
"name": "大学物理",
"category": "STEM"
},
"college_programming": {
"name": "大学编程",
"category": "STEM"
},
"computer_architecture": {
"name": "计算机组成",
"category": "STEM"
},
"computer_network": {
"name": "计算机网络",
"category": "STEM"
},
"discrete_mathematics": {
"name": "离散数学",
"category": "STEM"
},
"education_science": {
"name": "教育学",
"category": "Social Sciences"
},
"electrical_engineer": {
"name": "注册电气工程师",
"category": "STEM"
},
"environmental_impact_assessment_engineer": {
"name": "环境影响评价工程师",
"category": "Other"
},
"fire_engineer": {
"name": "注册消防工程师",
"category": "Other"
},
"high_school_biology": {
"name": "高中生物",
"category": "STEM"
},
"high_school_chemistry": {
"name": "高中化学",
"category": "STEM"
},
"high_school_chinese": {
"name": "高中语文",
"category": "Humanities"
},
"high_school_geography": {
"name": "高中地理",
"category": "Social Sciences"
},
"high_school_history": {
"name": "高中历史",
"category": "Humanities"
},
"high_school_mathematics": {
"name": "高中数学",
"category": "STEM"
},
"high_school_physics": {
"name": "高中物理",
"category": "STEM"
},
"high_school_politics": {
"name": "高中政治",
"category": "Social Sciences"
},
"ideological_and_moral_cultivation": {
"name": "思想道德修养与法律基础",
"category": "Humanities"
},
"law": {
"name": "法学",
"category": "Humanities"
},
"legal_professional": {
"name": "法律职业资格",
"category": "Humanities"
},
"logic": {
"name": "逻辑学",
"category": "Humanities"
},
"mao_zedong_thought": {
"name": "毛泽东思想和中国特色社会主义理论体系概论",
"category": "Social Sciences"
},
"marxism": {
"name": "马克思主义基本原理",
"category": "Social Sciences"
},
"metrology_engineer": {
"name": "注册计量师",
"category": "STEM"
},
"middle_school_biology": {
"name": "初中生物",
"category": "STEM"
},
"middle_school_chemistry": {
"name": "初中化学",
"category": "STEM"
},
"middle_school_geography": {
"name": "初中地理",
"category": "Social Sciences"
},
"middle_school_history": {
"name": "初中历史",
"category": "Humanities"
},
"middle_school_mathematics": {
"name": "初中数学",
"category": "STEM"
},
"middle_school_physics": {
"name": "初中物理",
"category": "STEM"
},
"middle_school_politics": {
"name": "初中政治",
"category": "Social Sciences"
},
"modern_chinese_history": {
"name": "近代史纲要",
"category": "Humanities"
},
"operating_system": {
"name": "操作系统",
"category": "STEM"
},
"physician": {
"name": "医师资格",
"category": "Other"
},
"plant_protection": {
"name": "植物保护",
"category": "Other"
},
"probability_and_statistics": {
"name": "概率统计",
"category": "STEM"
},
"professional_tour_guide": {
"name": "导游资格",
"category": "Humanities"
},
"sports_science": {
"name": "体育学",
"category": "Other"
},
"tax_accountant": {
"name": "税务师",
"category": "Other"
},
"teacher_qualification": {
"name": "教师资格",
"category": "Social Sciences"
},
"urban_and_rural_planner": {
"name": "注册城乡规划师",
"category": "Other"
},
"veterinary_medicine": {
"name": "兽医学",
"category": "STEM"
}
}

170
evaluation/cmmlu/cmmlu.py Normal file
View File

@ -0,0 +1,170 @@
# Copyright 2025 the LlamaFactory team.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import datasets
import pandas as pd
_CITATION = """\
@article{li2023cmmlu,
title={CMMLU: Measuring massive multitask language understanding in Chinese},
author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and others,
journal={arXiv preprint arXiv:2306.09212},
year={2023}
}
"""
_DESCRIPTION = """\
CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge
and reasoning abilities of LLMs within the Chinese language and cultural context.
"""
_HOMEPAGE = "https://github.com/haonan-li/CMMLU"
_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
_URL = "cmmlu.zip"
task_list = [
"agronomy",
"anatomy",
"ancient_chinese",
"arts",
"astronomy",
"business_ethics",
"chinese_civil_service_exam",
"chinese_driving_rule",
"chinese_food_culture",
"chinese_foreign_policy",
"chinese_history",
"chinese_literature",
"chinese_teacher_qualification",
"clinical_knowledge",
"college_actuarial_science",
"college_education",
"college_engineering_hydrology",
"college_law",
"college_mathematics",
"college_medical_statistics",
"college_medicine",
"computer_science",
"computer_security",
"conceptual_physics",
"construction_project_management",
"economics",
"education",
"electrical_engineering",
"elementary_chinese",
"elementary_commonsense",
"elementary_information_and_technology",
"elementary_mathematics",
"ethnology",
"food_science",
"genetics",
"global_facts",
"high_school_biology",
"high_school_chemistry",
"high_school_geography",
"high_school_mathematics",
"high_school_physics",
"high_school_politics",
"human_sexuality",
"international_law",
"journalism",
"jurisprudence",
"legal_and_moral_basis",
"logical",
"machine_learning",
"management",
"marketing",
"marxist_theory",
"modern_chinese",
"nutrition",
"philosophy",
"professional_accounting",
"professional_law",
"professional_medicine",
"professional_psychology",
"public_relations",
"security_study",
"sociology",
"sports_science",
"traditional_chinese_medicine",
"virology",
"world_history",
"world_religions",
]
class CMMLUConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version("1.0.1"), **kwargs)
class CMMLU(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [
CMMLUConfig(
name=task_name,
)
for task_name in task_list
]
def _info(self):
features = datasets.Features(
{
"question": datasets.Value("string"),
"A": datasets.Value("string"),
"B": datasets.Value("string"),
"C": datasets.Value("string"),
"D": datasets.Value("string"),
"answer": datasets.Value("string"),
}
)
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
data_dir = dl_manager.download_and_extract(_URL)
task_name = self.config.name
return [
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
"filepath": os.path.join(data_dir, f"test/{task_name}.csv"),
},
),
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
gen_kwargs={
"filepath": os.path.join(data_dir, f"dev/{task_name}.csv"),
},
),
]
def _generate_examples(self, filepath):
df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8")
for i, instance in enumerate(df.to_dict(orient="records")):
question = instance.pop("Question", "")
answer = instance.pop("Answer", "")
instance["question"] = question
instance["answer"] = answer
yield i, instance

View File

@ -0,0 +1,270 @@
{
"agronomy": {
"name": "农学",
"category": "Other"
},
"anatomy": {
"name": "解剖学",
"category": "STEM"
},
"ancient_chinese": {
"name": "古汉语",
"category": "Social Sciences"
},
"arts": {
"name": "艺术学",
"category": "Humanities"
},
"astronomy": {
"name": "天文学",
"category": "STEM"
},
"business_ethics": {
"name": "商业伦理",
"category": "Social Sciences"
},
"chinese_civil_service_exam": {
"name": "中国公务员考试",
"category": "Social Sciences"
},
"chinese_driving_rule": {
"name": "中国驾驶规则",
"category": "Other"
},
"chinese_food_culture": {
"name": "中国饮食文化",
"category": "Social Sciences"
},
"chinese_foreign_policy": {
"name": "中国外交政策",
"category": "Social Sciences"
},
"chinese_history": {
"name": "中国历史",
"category": "Humanities"
},
"chinese_literature": {
"name": "中国文学",
"category": "Humanities"
},
"chinese_teacher_qualification": {
"name": "中国教师资格",
"category": "Social Sciences"
},
"college_actuarial_science": {
"name": "大学精算学",
"category": "STEM"
},
"college_education": {
"name": "大学教育学",
"category": "Social Sciences"
},
"college_engineering_hydrology": {
"name": "大学工程水文学",
"category": "STEM"
},
"college_law": {
"name": "大学法律",
"category": "Humanities"
},
"college_mathematics": {
"name": "大学数学",
"category": "STEM"
},
"college_medical_statistics": {
"name": "大学医学统计",
"category": "STEM"
},
"clinical_knowledge": {
"name": "临床知识",
"category": "Other"
},
"college_medicine": {
"name": "大学医学",
"category": "Other"
},
"computer_science": {
"name": "计算机科学",
"category": "STEM"
},
"computer_security": {
"name": "计算机安全",
"category": "Other"
},
"conceptual_physics": {
"name": "概念物理学",
"category": "STEM"
},
"construction_project_management": {
"name": "建设工程管理",
"category": "Other"
},
"economics": {
"name": "经济学",
"category": "Social Sciences"
},
"education": {
"name": "教育学",
"category": "Social Sciences"
},
"elementary_chinese": {
"name": "小学语文",
"category": "Social Sciences"
},
"elementary_commonsense": {
"name": "小学常识",
"category": "Other"
},
"elementary_information_and_technology": {
"name": "小学信息技术",
"category": "Other"
},
"electrical_engineering": {
"name": "电气工程",
"category": "STEM"
},
"elementary_mathematics": {
"name": "初等数学",
"category": "STEM"
},
"ethnology": {
"name": "民族学",
"category": "Social Sciences"
},
"food_science": {
"name": "食品科学",
"category": "Other"
},
"genetics": {
"name": "遗传学",
"category": "STEM"
},
"global_facts": {
"name": "全球事实",
"category": "Humanities"
},
"high_school_biology": {
"name": "高中生物",
"category": "STEM"
},
"high_school_chemistry": {
"name": "高中化学",
"category": "STEM"
},
"high_school_geography": {
"name": "高中地理",
"category": "Social Sciences"
},
"high_school_mathematics": {
"name": "高中数学",
"category": "STEM"
},
"high_school_physics": {
"name": "高中物理学",
"category": "STEM"
},
"high_school_politics": {
"name": "高中政治",
"category": "Social Sciences"
},
"human_sexuality": {
"name": "人类性行为",
"category": "Other"
},
"international_law": {
"name": "国际法学",
"category": "Humanities"
},
"journalism": {
"name": "新闻学",
"category": "Social Sciences"
},
"jurisprudence": {
"name": "法理学",
"category": "Humanities"
},
"legal_and_moral_basis": {
"name": "法律与道德基础",
"category": "Other"
},
"logical": {
"name": "逻辑学",
"category": "Humanities"
},
"machine_learning": {
"name": "机器学习",
"category": "STEM"
},
"management": {
"name": "管理学",
"category": "Social Sciences"
},
"marketing": {
"name": "市场营销",
"category": "Social Sciences"
},
"marxist_theory": {
"name": "马克思主义理论",
"category": "Humanities"
},
"modern_chinese": {
"name": "现代汉语",
"category": "Social Sciences"
},
"nutrition": {
"name": "营养学",
"category": "Other"
},
"philosophy": {
"name": "哲学",
"category": "Humanities"
},
"professional_accounting": {
"name": "专业会计",
"category": "Social Sciences"
},
"professional_law": {
"name": "专业法学",
"category": "Humanities"
},
"professional_medicine": {
"name": "专业医学",
"category": "Other"
},
"professional_psychology": {
"name": "专业心理学",
"category": "Social Sciences"
},
"public_relations": {
"name": "公共关系",
"category": "Social Sciences"
},
"security_study": {
"name": "安全研究",
"category": "Social Sciences"
},
"sociology": {
"name": "社会学",
"category": "Social Sciences"
},
"sports_science": {
"name": "体育学",
"category": "Other"
},
"traditional_chinese_medicine": {
"name": "中医中药",
"category": "Other"
},
"virology": {
"name": "病毒学",
"category": "STEM"
},
"world_history": {
"name": "世界历史",
"category": "Humanities"
},
"world_religions": {
"name": "世界宗教",
"category": "Humanities"
}
}

View File

@ -0,0 +1,230 @@
{
"abstract_algebra": {
"name": "abstract algebra",
"category": "STEM"
},
"anatomy": {
"name": "anatomy",
"category": "Other"
},
"astronomy": {
"name": "astronomy",
"category": "STEM"
},
"business_ethics": {
"name": "business ethics",
"category": "Other"
},
"clinical_knowledge": {
"name": "clinical knowledge",
"category": "Other"
},
"college_biology": {
"name": "college biology",
"category": "STEM"
},
"college_chemistry": {
"name": "college chemistry",
"category": "STEM"
},
"college_computer_science": {
"name": "college computer science",
"category": "STEM"
},
"college_mathematics": {
"name": "college mathematics",
"category": "STEM"
},
"college_medicine": {
"name": "college medicine",
"category": "Other"
},
"college_physics": {
"name": "college physics",
"category": "STEM"
},
"computer_security": {
"name": "computer security",
"category": "STEM"
},
"conceptual_physics": {
"name": "conceptual physics",
"category": "STEM"
},
"econometrics": {
"name": "econometrics",
"category": "Social Sciences"
},
"electrical_engineering": {
"name": "electrical engineering",
"category": "STEM"
},
"elementary_mathematics": {
"name": "elementary mathematics",
"category": "STEM"
},
"formal_logic": {
"name": "formal logic",
"category": "Humanities"
},
"global_facts": {
"name": "global facts",
"category": "Other"
},
"high_school_biology": {
"name": "high school biology",
"category": "STEM"
},
"high_school_chemistry": {
"name": "high school chemistry",
"category": "STEM"
},
"high_school_computer_science": {
"name": "high school computer science",
"category": "STEM"
},
"high_school_european_history": {
"name": "high school european history",
"category": "Humanities"
},
"high_school_geography": {
"name": "high school geography",
"category": "Social Sciences"
},
"high_school_government_and_politics": {
"name": "high school government and politics",
"category": "Social Sciences"
},
"high_school_macroeconomics": {
"name": "high school macroeconomics",
"category": "Social Sciences"
},
"high_school_mathematics": {
"name": "high school mathematics",
"category": "STEM"
},
"high_school_microeconomics": {
"name": "high school microeconomics",
"category": "Social Sciences"
},
"high_school_physics": {
"name": "high school physics",
"category": "STEM"
},
"high_school_psychology": {
"name": "high school psychology",
"category": "Social Sciences"
},
"high_school_statistics": {
"name": "high school statistics",
"category": "STEM"
},
"high_school_us_history": {
"name": "high school us history",
"category": "Humanities"
},
"high_school_world_history": {
"name": "high school world history",
"category": "Humanities"
},
"human_aging": {
"name": "human aging",
"category": "Other"
},
"human_sexuality": {
"name": "human sexuality",
"category": "Social Sciences"
},
"international_law": {
"name": "international law",
"category": "Humanities"
},
"jurisprudence": {
"name": "jurisprudence",
"category": "Humanities"
},
"logical_fallacies": {
"name": "logical fallacies",
"category": "Humanities"
},
"machine_learning": {
"name": "machine learning",
"category": "STEM"
},
"management": {
"name": "management",
"category": "Other"
},
"marketing": {
"name": "marketing",
"category": "Other"
},
"medical_genetics": {
"name": "medical genetics",
"category": "Other"
},
"miscellaneous": {
"name": "miscellaneous",
"category": "Other"
},
"moral_disputes": {
"name": "moral disputes",
"category": "Humanities"
},
"moral_scenarios": {
"name": "moral scenarios",
"category": "Humanities"
},
"nutrition": {
"name": "nutrition",
"category": "Other"
},
"philosophy": {
"name": "philosophy",
"category": "Humanities"
},
"prehistory": {
"name": "prehistory",
"category": "Humanities"
},
"professional_accounting": {
"name": "professional accounting",
"category": "Other"
},
"professional_law": {
"name": "professional law",
"category": "Humanities"
},
"professional_medicine": {
"name": "professional medicine",
"category": "Other"
},
"professional_psychology": {
"name": "professional psychology",
"category": "Social Sciences"
},
"public_relations": {
"name": "public relations",
"category": "Social Sciences"
},
"security_studies": {
"name": "security studies",
"category": "Social Sciences"
},
"sociology": {
"name": "sociology",
"category": "Social Sciences"
},
"us_foreign_policy": {
"name": "us foreign policy",
"category": "Social Sciences"
},
"virology": {
"name": "virology",
"category": "Other"
},
"world_religions": {
"name": "world religions",
"category": "Humanities"
}
}

163
evaluation/mmlu/mmlu.py Normal file
View File

@ -0,0 +1,163 @@
# Copyright 2025 the LlamaFactory team.
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import datasets
import pandas as pd
_CITATION = """\
@article{hendryckstest2021,
title={Measuring Massive Multitask Language Understanding},
author={Dan Hendrycks and Collin Burns and others},
journal={Proceedings of the International Conference on Learning Representations (ICLR)},
year={2021}
}
"""
_DESCRIPTION = """\
Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart,
Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021).
"""
_HOMEPAGE = "https://github.com/hendrycks/test"
_LICENSE = "MIT"
_URL = "mmlu.zip"
task_list = [
"high_school_european_history",
"business_ethics",
"clinical_knowledge",
"medical_genetics",
"high_school_us_history",
"high_school_physics",
"high_school_world_history",
"virology",
"high_school_microeconomics",
"econometrics",
"college_computer_science",
"high_school_biology",
"abstract_algebra",
"professional_accounting",
"philosophy",
"professional_medicine",
"nutrition",
"global_facts",
"machine_learning",
"security_studies",
"public_relations",
"professional_psychology",
"prehistory",
"anatomy",
"human_sexuality",
"college_medicine",
"high_school_government_and_politics",
"college_chemistry",
"logical_fallacies",
"high_school_geography",
"elementary_mathematics",
"human_aging",
"college_mathematics",
"high_school_psychology",
"formal_logic",
"high_school_statistics",
"international_law",
"high_school_mathematics",
"high_school_computer_science",
"conceptual_physics",
"miscellaneous",
"high_school_chemistry",
"marketing",
"professional_law",
"management",
"college_physics",
"jurisprudence",
"world_religions",
"sociology",
"us_foreign_policy",
"high_school_macroeconomics",
"computer_security",
"moral_scenarios",
"moral_disputes",
"electrical_engineering",
"astronomy",
"college_biology",
]
class MMLUConfig(datasets.BuilderConfig):
def __init__(self, **kwargs):
super().__init__(version=datasets.Version("1.0.0"), **kwargs)
class MMLU(datasets.GeneratorBasedBuilder):
BUILDER_CONFIGS = [
MMLUConfig(
name=task_name,
)
for task_name in task_list
]
def _info(self):
features = datasets.Features(
{
"question": datasets.Value("string"),
"A": datasets.Value("string"),
"B": datasets.Value("string"),
"C": datasets.Value("string"),
"D": datasets.Value("string"),
"answer": datasets.Value("string"),
}
)
return datasets.DatasetInfo(
description=_DESCRIPTION,
features=features,
homepage=_HOMEPAGE,
license=_LICENSE,
citation=_CITATION,
)
def _split_generators(self, dl_manager):
data_dir = dl_manager.download_and_extract(_URL)
task_name = self.config.name
return [
datasets.SplitGenerator(
name=datasets.Split.TEST,
gen_kwargs={
"filepath": os.path.join(data_dir, "data", "test", f"{task_name}_test.csv"),
},
),
datasets.SplitGenerator(
name=datasets.Split.VALIDATION,
gen_kwargs={
"filepath": os.path.join(data_dir, "data", "val", f"{task_name}_val.csv"),
},
),
datasets.SplitGenerator(
name=datasets.Split.TRAIN,
gen_kwargs={
"filepath": os.path.join(data_dir, "data", "dev", f"{task_name}_dev.csv"),
},
),
]
def _generate_examples(self, filepath):
df = pd.read_csv(filepath, header=None)
df.columns = ["question", "A", "B", "C", "D", "answer"]
yield from enumerate(df.to_dict(orient="records"))

View File

@ -25,27 +25,64 @@ USAGE = (
+ "| Usage: |\n" + "| Usage: |\n"
+ "| llamafactory-cli api -h: launch an OpenAI-style API server |\n" + "| llamafactory-cli api -h: launch an OpenAI-style API server |\n"
+ "| llamafactory-cli chat -h: launch a chat interface in CLI |\n" + "| llamafactory-cli chat -h: launch a chat interface in CLI |\n"
+ "| llamafactory-cli eval -h: evaluate models |\n"
+ "| llamafactory-cli export -h: merge LoRA adapters and export model |\n" + "| llamafactory-cli export -h: merge LoRA adapters and export model |\n"
+ "| llamafactory-cli train -h: train models |\n" + "| llamafactory-cli train -h: train models |\n"
+ "| llamafactory-cli webchat -h: launch a chat interface in Web UI |\n" + "| llamafactory-cli webchat -h: launch a chat interface in Web UI |\n"
+ "| llamafactory-cli webui: launch LlamaBoard |\n" + "| llamafactory-cli webui: launch LlamaBoard |\n"
+ "| llamafactory-cli env: show environment info |\n"
+ "| llamafactory-cli version: show version info |\n" + "| llamafactory-cli version: show version info |\n"
+ "| Hint: You can use `lmf` as a shortcut for `llamafactory-cli`. |\n"
+ "-" * 70 + "-" * 70
) )
def _run_api():
from .api.app import run_api
return run_api()
def _run_chat():
from .chat.chat_model import run_chat
return run_chat()
def _run_eval():
from .eval.evaluator import run_eval
return run_eval()
def _export_model():
from .train.tuner import export_model
return export_model()
def _run_exp():
from .train.tuner import run_exp
return run_exp()
def _run_web_demo():
from .webui.interface import run_web_demo
return run_web_demo()
def _run_web_ui():
from .webui.interface import run_web_ui
return run_web_ui()
def main(): def main():
from . import launcher
from .extras import logging from .extras import logging
from .extras.env import VERSION, print_env from .extras.env import VERSION, print_env
from .extras.misc import find_available_port, get_device_count, is_env_enabled, use_ray from .extras.misc import find_available_port, get_device_count, is_env_enabled, use_ray
if is_env_enabled("USE_V1"):
from .v1 import launcher
else:
from . import launcher
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
WELCOME = ( WELCOME = (
@ -61,14 +98,14 @@ def main():
) )
COMMAND_MAP = { COMMAND_MAP = {
"api": launcher.run_api, "api": _run_api,
"chat": launcher.run_chat, "chat": _run_chat,
"env": print_env, "env": print_env,
"eval": launcher.run_eval, "eval": _run_eval,
"export": launcher.export_model, "export": _export_model,
"train": launcher.run_exp, "train": _run_exp,
"webchat": launcher.run_web_demo, "webchat": _run_web_demo,
"webui": launcher.run_web_ui, "webui": _run_web_ui,
"version": partial(print, WELCOME), "version": partial(print, WELCOME),
"help": partial(print, USAGE), "help": partial(print, USAGE),
} }

View File

@ -15,22 +15,22 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import platform
import accelerate
import datasets
import peft
import torch
import transformers
import trl
from transformers.utils import is_torch_cuda_available, is_torch_npu_available
VERSION = "0.9.4.dev0" VERSION = "0.9.4.dev0"
def print_env() -> None: def print_env() -> None:
import os
import platform
import accelerate
import datasets
import peft
import torch
import transformers
import trl
from transformers.utils import is_torch_cuda_available, is_torch_npu_available
info = { info = {
"`llamafactory` version": VERSION, "`llamafactory` version": VERSION,
"Platform": platform.platform(), "Platform": platform.platform(),

View File

@ -12,46 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from llamafactory.train.tuner import run_exp # use absolute import
def run_api():
from llamafactory.api.app import run_api as _run_api
_run_api()
def run_chat(): def launch():
from llamafactory.chat.chat_model import run_chat as _run_chat run_exp()
return _run_chat()
def run_eval():
raise NotImplementedError("Evaluation will be deprecated in the future.")
def export_model():
from llamafactory.train.tuner import export_model as _export_model
return _export_model()
def run_exp():
from llamafactory.train.tuner import run_exp as _run_exp
return _run_exp() # use absolute import
def run_web_demo():
from llamafactory.webui.interface import run_web_demo as _run_web_demo
return _run_web_demo()
def run_web_ui():
from llamafactory.webui.interface import run_web_ui as _run_web_ui
return _run_web_ui()
if __name__ == "__main__": if __name__ == "__main__":
run_exp() launch()

View File

@ -1,33 +0,0 @@
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
def run_train():
raise NotImplementedError("Please use `llamafactory-cli sft` or `llamafactory-cli rm`.")
def run_chat():
from llamafactory.v1.core.chat_sampler import Sampler
Sampler().cli()
def run_sft():
from llamafactory.v1.train.sft import SFTTrainer
SFTTrainer().run()
if __name__ == "__main__":
run_train()