2025-11-05 18:32:14 +08:00
30 changed files with 1272 additions and 7322 deletions
--- a/data/dpo_en_demo.json
+++ b/data/dpo_en_demo.json
--- a/evaluation/ceval/ceval.py
+++ b/evaluation/ceval/ceval.py
@ -0,0 +1,163 @@
 # Copyright 2025 the LlamaFactory team.
 # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import datasets
 import pandas as pd
 _CITATION = """\
@article{huang2023ceval,
  title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},
  author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and others},
  journal={arXiv preprint arXiv:2305.08322},
  year={2023}
 }
 """
 _DESCRIPTION = """\
 C-Eval is a comprehensive Chinese evaluation suite for foundation models.
 It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
 """
 _HOMEPAGE = "https://cevalbenchmark.com"
 _LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
 _URL = "ceval.zip"
 task_list = [
    "computer_network",
    "operating_system",
    "computer_architecture",
    "college_programming",
    "college_physics",
    "college_chemistry",
    "advanced_mathematics",
    "probability_and_statistics",
    "discrete_mathematics",
    "electrical_engineer",
    "metrology_engineer",
    "high_school_mathematics",
    "high_school_physics",
    "high_school_chemistry",
    "high_school_biology",
    "middle_school_mathematics",
    "middle_school_biology",
    "middle_school_physics",
    "middle_school_chemistry",
    "veterinary_medicine",
    "college_economics",
    "business_administration",
    "marxism",
    "mao_zedong_thought",
    "education_science",
    "teacher_qualification",
    "high_school_politics",
    "high_school_geography",
    "middle_school_politics",
    "middle_school_geography",
    "modern_chinese_history",
    "ideological_and_moral_cultivation",
    "logic",
    "law",
    "chinese_language_and_literature",
    "art_studies",
    "professional_tour_guide",
    "legal_professional",
    "high_school_chinese",
    "high_school_history",
    "middle_school_history",
    "civil_servant",
    "sports_science",
    "plant_protection",
    "basic_medicine",
    "clinical_medicine",
    "urban_and_rural_planner",
    "accountant",
    "fire_engineer",
    "environmental_impact_assessment_engineer",
    "tax_accountant",
    "physician",
 ]
 class CevalConfig(datasets.BuilderConfig):
    def __init__(self, **kwargs):
        super().__init__(version=datasets.Version("1.0.0"), **kwargs)
 class Ceval(datasets.GeneratorBasedBuilder):
    BUILDER_CONFIGS = [
        CevalConfig(
            name=task_name,
        )
        for task_name in task_list
    ]
    def _info(self):
        features = datasets.Features(
            {
                "id": datasets.Value("int32"),
                "question": datasets.Value("string"),
                "A": datasets.Value("string"),
                "B": datasets.Value("string"),
                "C": datasets.Value("string"),
                "D": datasets.Value("string"),
                "answer": datasets.Value("string"),
                "explanation": datasets.Value("string"),
            }
        )
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )
    def _split_generators(self, dl_manager):
        data_dir = dl_manager.download_and_extract(_URL)
        task_name = self.config.name
        return [
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, "test", f"{task_name}_test.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.VALIDATION,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, "val", f"{task_name}_val.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, "dev", f"{task_name}_dev.csv"),
                },
            ),
        ]
    def _generate_examples(self, filepath):
        df = pd.read_csv(filepath, encoding="utf-8")
        for i, instance in enumerate(df.to_dict(orient="records")):
            if "answer" not in instance.keys():
                instance["answer"] = ""
            if "explanation" not in instance.keys():
                instance["explanation"] = ""
            yield i, instance
--- a/evaluation/ceval/mapping.json
+++ b/evaluation/ceval/mapping.json
@ -0,0 +1,210 @@
 {
  "accountant": {
    "name": "注册会计师",
    "category": "Other"
  },
  "advanced_mathematics": {
    "name": "高等数学",
    "category": "STEM"
  },
  "art_studies": {
    "name": "艺术学",
    "category": "Humanities"
  },
  "basic_medicine": {
    "name": "基础医学",
    "category": "Other"
  },
  "business_administration": {
    "name": "工商管理",
    "category": "Social Sciences"
  },
  "chinese_language_and_literature": {
    "name": "中国语言文学",
    "category": "Humanities"
  },
  "civil_servant": {
    "name": "公务员",
    "category": "Other"
  },
  "clinical_medicine": {
    "name": "临床医学",
    "category": "Other"
  },
  "college_chemistry": {
    "name": "大学化学",
    "category": "STEM"
  },
  "college_economics": {
    "name": "大学经济学",
    "category": "Social Sciences"
  },
  "college_physics": {
    "name": "大学物理",
    "category": "STEM"
  },
  "college_programming": {
    "name": "大学编程",
    "category": "STEM"
  },
  "computer_architecture": {
    "name": "计算机组成",
    "category": "STEM"
  },
  "computer_network": {
    "name": "计算机网络",
    "category": "STEM"
  },
  "discrete_mathematics": {
    "name": "离散数学",
    "category": "STEM"
  },
  "education_science": {
    "name": "教育学",
    "category": "Social Sciences"
  },
  "electrical_engineer": {
    "name": "注册电气工程师",
    "category": "STEM"
  },
  "environmental_impact_assessment_engineer": {
    "name": "环境影响评价工程师",
    "category": "Other"
  },
  "fire_engineer": {
    "name": "注册消防工程师",
    "category": "Other"
  },
  "high_school_biology": {
    "name": "高中生物",
    "category": "STEM"
  },
  "high_school_chemistry": {
    "name": "高中化学",
    "category": "STEM"
  },
  "high_school_chinese": {
    "name": "高中语文",
    "category": "Humanities"
  },
  "high_school_geography": {
    "name": "高中地理",
    "category": "Social Sciences"
  },
  "high_school_history": {
    "name": "高中历史",
    "category": "Humanities"
  },
  "high_school_mathematics": {
    "name": "高中数学",
    "category": "STEM"
  },
  "high_school_physics": {
    "name": "高中物理",
    "category": "STEM"
  },
  "high_school_politics": {
    "name": "高中政治",
    "category": "Social Sciences"
  },
  "ideological_and_moral_cultivation": {
    "name": "思想道德修养与法律基础",
    "category": "Humanities"
  },
  "law": {
    "name": "法学",
    "category": "Humanities"
  },
  "legal_professional": {
    "name": "法律职业资格",
    "category": "Humanities"
  },
  "logic": {
    "name": "逻辑学",
    "category": "Humanities"
  },
  "mao_zedong_thought": {
    "name": "毛泽东思想和中国特色社会主义理论体系概论",
    "category": "Social Sciences"
  },
  "marxism": {
    "name": "马克思主义基本原理",
    "category": "Social Sciences"
  },
  "metrology_engineer": {
    "name": "注册计量师",
    "category": "STEM"
  },
  "middle_school_biology": {
    "name": "初中生物",
    "category": "STEM"
  },
  "middle_school_chemistry": {
    "name": "初中化学",
    "category": "STEM"
  },
  "middle_school_geography": {
    "name": "初中地理",
    "category": "Social Sciences"
  },
  "middle_school_history": {
    "name": "初中历史",
    "category": "Humanities"
  },
  "middle_school_mathematics": {
    "name": "初中数学",
    "category": "STEM"
  },
  "middle_school_physics": {
    "name": "初中物理",
    "category": "STEM"
  },
  "middle_school_politics": {
    "name": "初中政治",
    "category": "Social Sciences"
  },
  "modern_chinese_history": {
    "name": "近代史纲要",
    "category": "Humanities"
  },
  "operating_system": {
    "name": "操作系统",
    "category": "STEM"
  },
  "physician": {
    "name": "医师资格",
    "category": "Other"
  },
  "plant_protection": {
    "name": "植物保护",
    "category": "Other"
  },
  "probability_and_statistics": {
    "name": "概率统计",
    "category": "STEM"
  },
  "professional_tour_guide": {
    "name": "导游资格",
    "category": "Humanities"
  },
  "sports_science": {
    "name": "体育学",
    "category": "Other"
  },
  "tax_accountant": {
    "name": "税务师",
    "category": "Other"
  },
  "teacher_qualification": {
    "name": "教师资格",
    "category": "Social Sciences"
  },
  "urban_and_rural_planner": {
    "name": "注册城乡规划师",
    "category": "Other"
  },
  "veterinary_medicine": {
    "name": "兽医学",
    "category": "STEM"
  }
 }
--- a/evaluation/cmmlu/cmmlu.py
+++ b/evaluation/cmmlu/cmmlu.py
@ -0,0 +1,170 @@
 # Copyright 2025 the LlamaFactory team.
 # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import datasets
 import pandas as pd
 _CITATION = """\
@article{li2023cmmlu,
  title={CMMLU: Measuring massive multitask language understanding in Chinese},
  author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and others,
  journal={arXiv preprint arXiv:2306.09212},
  year={2023}
 }
 """
 _DESCRIPTION = """\
 CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge
 and reasoning abilities of LLMs within the Chinese language and cultural context.
 """
 _HOMEPAGE = "https://github.com/haonan-li/CMMLU"
 _LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
 _URL = "cmmlu.zip"
 task_list = [
    "agronomy",
    "anatomy",
    "ancient_chinese",
    "arts",
    "astronomy",
    "business_ethics",
    "chinese_civil_service_exam",
    "chinese_driving_rule",
    "chinese_food_culture",
    "chinese_foreign_policy",
    "chinese_history",
    "chinese_literature",
    "chinese_teacher_qualification",
    "clinical_knowledge",
    "college_actuarial_science",
    "college_education",
    "college_engineering_hydrology",
    "college_law",
    "college_mathematics",
    "college_medical_statistics",
    "college_medicine",
    "computer_science",
    "computer_security",
    "conceptual_physics",
    "construction_project_management",
    "economics",
    "education",
    "electrical_engineering",
    "elementary_chinese",
    "elementary_commonsense",
    "elementary_information_and_technology",
    "elementary_mathematics",
    "ethnology",
    "food_science",
    "genetics",
    "global_facts",
    "high_school_biology",
    "high_school_chemistry",
    "high_school_geography",
    "high_school_mathematics",
    "high_school_physics",
    "high_school_politics",
    "human_sexuality",
    "international_law",
    "journalism",
    "jurisprudence",
    "legal_and_moral_basis",
    "logical",
    "machine_learning",
    "management",
    "marketing",
    "marxist_theory",
    "modern_chinese",
    "nutrition",
    "philosophy",
    "professional_accounting",
    "professional_law",
    "professional_medicine",
    "professional_psychology",
    "public_relations",
    "security_study",
    "sociology",
    "sports_science",
    "traditional_chinese_medicine",
    "virology",
    "world_history",
    "world_religions",
 ]
 class CMMLUConfig(datasets.BuilderConfig):
    def __init__(self, **kwargs):
        super().__init__(version=datasets.Version("1.0.1"), **kwargs)
 class CMMLU(datasets.GeneratorBasedBuilder):
    BUILDER_CONFIGS = [
        CMMLUConfig(
            name=task_name,
        )
        for task_name in task_list
    ]
    def _info(self):
        features = datasets.Features(
            {
                "question": datasets.Value("string"),
                "A": datasets.Value("string"),
                "B": datasets.Value("string"),
                "C": datasets.Value("string"),
                "D": datasets.Value("string"),
                "answer": datasets.Value("string"),
            }
        )
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )
    def _split_generators(self, dl_manager):
        data_dir = dl_manager.download_and_extract(_URL)
        task_name = self.config.name
        return [
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, f"test/{task_name}.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, f"dev/{task_name}.csv"),
                },
            ),
        ]
    def _generate_examples(self, filepath):
        df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8")
        for i, instance in enumerate(df.to_dict(orient="records")):
            question = instance.pop("Question", "")
            answer = instance.pop("Answer", "")
            instance["question"] = question
            instance["answer"] = answer
            yield i, instance
--- a/evaluation/cmmlu/mapping.json
+++ b/evaluation/cmmlu/mapping.json
@ -0,0 +1,270 @@
 {
  "agronomy": {
    "name": "农学",
    "category": "Other"
  },
  "anatomy": {
    "name": "解剖学",
    "category": "STEM"
  },
  "ancient_chinese": {
    "name": "古汉语",
    "category": "Social Sciences"
  },
  "arts": {
    "name": "艺术学",
    "category": "Humanities"
  },
  "astronomy": {
    "name": "天文学",
    "category": "STEM"
  },
  "business_ethics": {
    "name": "商业伦理",
    "category": "Social Sciences"
  },
  "chinese_civil_service_exam": {
    "name": "中国公务员考试",
    "category": "Social Sciences"
  },
  "chinese_driving_rule": {
    "name": "中国驾驶规则",
    "category": "Other"
  },
  "chinese_food_culture": {
    "name": "中国饮食文化",
    "category": "Social Sciences"
  },
  "chinese_foreign_policy": {
    "name": "中国外交政策",
    "category": "Social Sciences"
  },
  "chinese_history": {
    "name": "中国历史",
    "category": "Humanities"
  },
  "chinese_literature": {
    "name": "中国文学",
    "category": "Humanities"
  },
  "chinese_teacher_qualification": {
    "name": "中国教师资格",
    "category": "Social Sciences"
  },
  "college_actuarial_science": {
    "name": "大学精算学",
    "category": "STEM"
  },
  "college_education": {
    "name": "大学教育学",
    "category": "Social Sciences"
  },
  "college_engineering_hydrology": {
    "name": "大学工程水文学",
    "category": "STEM"
  },
  "college_law": {
    "name": "大学法律",
    "category": "Humanities"
  },
  "college_mathematics": {
    "name": "大学数学",
    "category": "STEM"
  },
  "college_medical_statistics": {
    "name": "大学医学统计",
    "category": "STEM"
  },
  "clinical_knowledge": {
    "name": "临床知识",
    "category": "Other"
  },
  "college_medicine": {
    "name": "大学医学",
    "category": "Other"
  },
  "computer_science": {
    "name": "计算机科学",
    "category": "STEM"
  },
  "computer_security": {
    "name": "计算机安全",
    "category": "Other"
  },
  "conceptual_physics": {
    "name": "概念物理学",
    "category": "STEM"
  },
  "construction_project_management": {
    "name": "建设工程管理",
    "category": "Other"
  },
  "economics": {
    "name": "经济学",
    "category": "Social Sciences"
  },
  "education": {
    "name": "教育学",
    "category": "Social Sciences"
  },
  "elementary_chinese": {
    "name": "小学语文",
    "category": "Social Sciences"
  },
  "elementary_commonsense": {
    "name": "小学常识",
    "category": "Other"
  },
  "elementary_information_and_technology": {
    "name": "小学信息技术",
    "category": "Other"
  },
  "electrical_engineering": {
    "name": "电气工程",
    "category": "STEM"
  },
  "elementary_mathematics": {
    "name": "初等数学",
    "category": "STEM"
  },
  "ethnology": {
    "name": "民族学",
    "category": "Social Sciences"
  },
  "food_science": {
    "name": "食品科学",
    "category": "Other"
  },
  "genetics": {
    "name": "遗传学",
    "category": "STEM"
  },
  "global_facts": {
    "name": "全球事实",
    "category": "Humanities"
  },
  "high_school_biology": {
    "name": "高中生物",
    "category": "STEM"
  },
  "high_school_chemistry": {
    "name": "高中化学",
    "category": "STEM"
  },
  "high_school_geography": {
    "name": "高中地理",
    "category": "Social Sciences"
  },
  "high_school_mathematics": {
    "name": "高中数学",
    "category": "STEM"
  },
  "high_school_physics": {
    "name": "高中物理学",
    "category": "STEM"
  },
  "high_school_politics": {
    "name": "高中政治",
    "category": "Social Sciences"
  },
  "human_sexuality": {
    "name": "人类性行为",
    "category": "Other"
  },
  "international_law": {
    "name": "国际法学",
    "category": "Humanities"
  },
  "journalism": {
    "name": "新闻学",
    "category": "Social Sciences"
  },
  "jurisprudence": {
    "name": "法理学",
    "category": "Humanities"
  },
  "legal_and_moral_basis": {
    "name": "法律与道德基础",
    "category": "Other"
  },
  "logical": {
    "name": "逻辑学",
    "category": "Humanities"
  },
  "machine_learning": {
    "name": "机器学习",
    "category": "STEM"
  },
  "management": {
    "name": "管理学",
    "category": "Social Sciences"
  },
  "marketing": {
    "name": "市场营销",
    "category": "Social Sciences"
  },
  "marxist_theory": {
    "name": "马克思主义理论",
    "category": "Humanities"
  },
  "modern_chinese": {
    "name": "现代汉语",
    "category": "Social Sciences"
  },
  "nutrition": {
    "name": "营养学",
    "category": "Other"
  },
  "philosophy": {
    "name": "哲学",
    "category": "Humanities"
  },
  "professional_accounting": {
    "name": "专业会计",
    "category": "Social Sciences"
  },
  "professional_law": {
    "name": "专业法学",
    "category": "Humanities"
  },
  "professional_medicine": {
    "name": "专业医学",
    "category": "Other"
  },
  "professional_psychology": {
    "name": "专业心理学",
    "category": "Social Sciences"
  },
  "public_relations": {
    "name": "公共关系",
    "category": "Social Sciences"
  },
  "security_study": {
    "name": "安全研究",
    "category": "Social Sciences"
  },
  "sociology": {
    "name": "社会学",
    "category": "Social Sciences"
  },
  "sports_science": {
    "name": "体育学",
    "category": "Other"
  },
  "traditional_chinese_medicine": {
    "name": "中医中药",
    "category": "Other"
  },
  "virology": {
    "name": "病毒学",
    "category": "STEM"
  },
  "world_history": {
    "name": "世界历史",
    "category": "Humanities"
  },
  "world_religions": {
    "name": "世界宗教",
    "category": "Humanities"
  }
 }
--- a/evaluation/mmlu/mapping.json
+++ b/evaluation/mmlu/mapping.json
@ -0,0 +1,230 @@
 {
  "abstract_algebra": {
    "name": "abstract algebra",
    "category": "STEM"
  },
  "anatomy": {
    "name": "anatomy",
    "category": "Other"
  },
  "astronomy": {
    "name": "astronomy",
    "category": "STEM"
  },
  "business_ethics": {
    "name": "business ethics",
    "category": "Other"
  },
  "clinical_knowledge": {
    "name": "clinical knowledge",
    "category": "Other"
  },
  "college_biology": {
    "name": "college biology",
    "category": "STEM"
  },
  "college_chemistry": {
    "name": "college chemistry",
    "category": "STEM"
  },
  "college_computer_science": {
    "name": "college computer science",
    "category": "STEM"
  },
  "college_mathematics": {
    "name": "college mathematics",
    "category": "STEM"
  },
  "college_medicine": {
    "name": "college medicine",
    "category": "Other"
  },
  "college_physics": {
    "name": "college physics",
    "category": "STEM"
  },
  "computer_security": {
    "name": "computer security",
    "category": "STEM"
  },
  "conceptual_physics": {
    "name": "conceptual physics",
    "category": "STEM"
  },
  "econometrics": {
    "name": "econometrics",
    "category": "Social Sciences"
  },
  "electrical_engineering": {
    "name": "electrical engineering",
    "category": "STEM"
  },
  "elementary_mathematics": {
    "name": "elementary mathematics",
    "category": "STEM"
  },
  "formal_logic": {
    "name": "formal logic",
    "category": "Humanities"
  },
  "global_facts": {
    "name": "global facts",
    "category": "Other"
  },
  "high_school_biology": {
    "name": "high school biology",
    "category": "STEM"
  },
  "high_school_chemistry": {
    "name": "high school chemistry",
    "category": "STEM"
  },
  "high_school_computer_science": {
    "name": "high school computer science",
    "category": "STEM"
  },
  "high_school_european_history": {
    "name": "high school european history",
    "category": "Humanities"
  },
  "high_school_geography": {
    "name": "high school geography",
    "category": "Social Sciences"
  },
  "high_school_government_and_politics": {
    "name": "high school government and politics",
    "category": "Social Sciences"
  },
  "high_school_macroeconomics": {
    "name": "high school macroeconomics",
    "category": "Social Sciences"
  },
  "high_school_mathematics": {
    "name": "high school mathematics",
    "category": "STEM"
  },
  "high_school_microeconomics": {
    "name": "high school microeconomics",
    "category": "Social Sciences"
  },
  "high_school_physics": {
    "name": "high school physics",
    "category": "STEM"
  },
  "high_school_psychology": {
    "name": "high school psychology",
    "category": "Social Sciences"
  },
  "high_school_statistics": {
    "name": "high school statistics",
    "category": "STEM"
  },
  "high_school_us_history": {
    "name": "high school us history",
    "category": "Humanities"
  },
  "high_school_world_history": {
    "name": "high school world history",
    "category": "Humanities"
  },
  "human_aging": {
    "name": "human aging",
    "category": "Other"
  },
  "human_sexuality": {
    "name": "human sexuality",
    "category": "Social Sciences"
  },
  "international_law": {
    "name": "international law",
    "category": "Humanities"
  },
  "jurisprudence": {
    "name": "jurisprudence",
    "category": "Humanities"
  },
  "logical_fallacies": {
    "name": "logical fallacies",
    "category": "Humanities"
  },
  "machine_learning": {
    "name": "machine learning",
    "category": "STEM"
  },
  "management": {
    "name": "management",
    "category": "Other"
  },
  "marketing": {
    "name": "marketing",
    "category": "Other"
  },
  "medical_genetics": {
    "name": "medical genetics",
    "category": "Other"
  },
  "miscellaneous": {
    "name": "miscellaneous",
    "category": "Other"
  },
  "moral_disputes": {
    "name": "moral disputes",
    "category": "Humanities"
  },
  "moral_scenarios": {
    "name": "moral scenarios",
    "category": "Humanities"
  },
  "nutrition": {
    "name": "nutrition",
    "category": "Other"
  },
  "philosophy": {
    "name": "philosophy",
    "category": "Humanities"
  },
  "prehistory": {
    "name": "prehistory",
    "category": "Humanities"
  },
  "professional_accounting": {
    "name": "professional accounting",
    "category": "Other"
  },
  "professional_law": {
    "name": "professional law",
    "category": "Humanities"
  },
  "professional_medicine": {
    "name": "professional medicine",
    "category": "Other"
  },
  "professional_psychology": {
    "name": "professional psychology",
    "category": "Social Sciences"
  },
  "public_relations": {
    "name": "public relations",
    "category": "Social Sciences"
  },
  "security_studies": {
    "name": "security studies",
    "category": "Social Sciences"
  },
  "sociology": {
    "name": "sociology",
    "category": "Social Sciences"
  },
  "us_foreign_policy": {
    "name": "us foreign policy",
    "category": "Social Sciences"
  },
  "virology": {
    "name": "virology",
    "category": "Other"
  },
  "world_religions": {
    "name": "world religions",
    "category": "Humanities"
  }
 }
--- a/evaluation/mmlu/mmlu.py
+++ b/evaluation/mmlu/mmlu.py
@ -0,0 +1,163 @@
 # Copyright 2025 the LlamaFactory team.
 # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import datasets
 import pandas as pd
 _CITATION = """\
@article{hendryckstest2021,
  title={Measuring Massive Multitask Language Understanding},
  author={Dan Hendrycks and Collin Burns and others},
  journal={Proceedings of the International Conference on Learning Representations (ICLR)},
  year={2021}
 }
 """
 _DESCRIPTION = """\
 Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart,
 Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021).
 """
 _HOMEPAGE = "https://github.com/hendrycks/test"
 _LICENSE = "MIT"
 _URL = "mmlu.zip"
 task_list = [
    "high_school_european_history",
    "business_ethics",
    "clinical_knowledge",
    "medical_genetics",
    "high_school_us_history",
    "high_school_physics",
    "high_school_world_history",
    "virology",
    "high_school_microeconomics",
    "econometrics",
    "college_computer_science",
    "high_school_biology",
    "abstract_algebra",
    "professional_accounting",
    "philosophy",
    "professional_medicine",
    "nutrition",
    "global_facts",
    "machine_learning",
    "security_studies",
    "public_relations",
    "professional_psychology",
    "prehistory",
    "anatomy",
    "human_sexuality",
    "college_medicine",
    "high_school_government_and_politics",
    "college_chemistry",
    "logical_fallacies",
    "high_school_geography",
    "elementary_mathematics",
    "human_aging",
    "college_mathematics",
    "high_school_psychology",
    "formal_logic",
    "high_school_statistics",
    "international_law",
    "high_school_mathematics",
    "high_school_computer_science",
    "conceptual_physics",
    "miscellaneous",
    "high_school_chemistry",
    "marketing",
    "professional_law",
    "management",
    "college_physics",
    "jurisprudence",
    "world_religions",
    "sociology",
    "us_foreign_policy",
    "high_school_macroeconomics",
    "computer_security",
    "moral_scenarios",
    "moral_disputes",
    "electrical_engineering",
    "astronomy",
    "college_biology",
 ]
 class MMLUConfig(datasets.BuilderConfig):
    def __init__(self, **kwargs):
        super().__init__(version=datasets.Version("1.0.0"), **kwargs)
 class MMLU(datasets.GeneratorBasedBuilder):
    BUILDER_CONFIGS = [
        MMLUConfig(
            name=task_name,
        )
        for task_name in task_list
    ]
    def _info(self):
        features = datasets.Features(
            {
                "question": datasets.Value("string"),
                "A": datasets.Value("string"),
                "B": datasets.Value("string"),
                "C": datasets.Value("string"),
                "D": datasets.Value("string"),
                "answer": datasets.Value("string"),
            }
        )
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )
    def _split_generators(self, dl_manager):
        data_dir = dl_manager.download_and_extract(_URL)
        task_name = self.config.name
        return [
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, "data", "test", f"{task_name}_test.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.VALIDATION,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, "data", "val", f"{task_name}_val.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, "data", "dev", f"{task_name}_dev.csv"),
                },
            ),
        ]
    def _generate_examples(self, filepath):
        df = pd.read_csv(filepath, header=None)
        df.columns = ["question", "A", "B", "C", "D", "answer"]
        yield from enumerate(df.to_dict(orient="records"))
--- a/src/llamafactory/cli.py
+++ b/src/llamafactory/cli.py
@ -25,27 +25,64 @@ USAGE = (
    + "| Usage:                                                             |\n"
    + "|   llamafactory-cli api -h: launch an OpenAI-style API server       |\n"
    + "|   llamafactory-cli chat -h: launch a chat interface in CLI         |\n"
    + "|   llamafactory-cli eval -h: evaluate models                        |\n"
    + "|   llamafactory-cli export -h: merge LoRA adapters and export model |\n"
    + "|   llamafactory-cli train -h: train models                          |\n"
    + "|   llamafactory-cli webchat -h: launch a chat interface in Web UI   |\n"
    + "|   llamafactory-cli webui: launch LlamaBoard                        |\n"
    + "|   llamafactory-cli env: show environment info                      |\n"
    + "|   llamafactory-cli version: show version info                      |\n"
    + "| Hint: You can use `lmf` as a shortcut for `llamafactory-cli`.      |\n"
    + "-" * 70
 )
 def _run_api():
    from .api.app import run_api
    return run_api()
 def _run_chat():
    from .chat.chat_model import run_chat
    return run_chat()
 def _run_eval():
    from .eval.evaluator import run_eval
    return run_eval()
 def _export_model():
    from .train.tuner import export_model
    return export_model()
 def _run_exp():
    from .train.tuner import run_exp
    return run_exp()
 def _run_web_demo():
    from .webui.interface import run_web_demo
    return run_web_demo()
 def _run_web_ui():
    from .webui.interface import run_web_ui
    return run_web_ui()
 def main():
    from . import launcher
    from .extras import logging
    from .extras.env import VERSION, print_env
    from .extras.misc import find_available_port, get_device_count, is_env_enabled, use_ray
    if is_env_enabled("USE_V1"):
        from .v1 import launcher
    else:
        from . import launcher
    logger = logging.get_logger(__name__)
    WELCOME = (
@ -61,14 +98,14 @@ def main():
    )
    COMMAND_MAP = {
-        "api": launcher.run_api,
+        "api": _run_api,
-        "chat": launcher.run_chat,
+        "chat": _run_chat,
        "env": print_env,
-        "eval": launcher.run_eval,
+        "eval": _run_eval,
-        "export": launcher.export_model,
+        "export": _export_model,
-        "train": launcher.run_exp,
+        "train": _run_exp,
-        "webchat": launcher.run_web_demo,
+        "webchat": _run_web_demo,
-        "webui": launcher.run_web_ui,
+        "webui": _run_web_ui,
        "version": partial(print, WELCOME),
        "help": partial(print, USAGE),
    }
--- a/src/llamafactory/extras/env.py
+++ b/src/llamafactory/extras/env.py
@ -15,22 +15,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import platform
 import accelerate
 import datasets
 import peft
 import torch
 import transformers
 import trl
 from transformers.utils import is_torch_cuda_available, is_torch_npu_available
 VERSION = "0.9.4.dev0"
 def print_env() -> None:
    import os
    import platform
    import accelerate
    import datasets
    import peft
    import torch
    import transformers
    import trl
    from transformers.utils import is_torch_cuda_available, is_torch_npu_available
    info = {
        "`llamafactory` version": VERSION,
        "Platform": platform.platform(),
--- a/src/llamafactory/launcher.py
+++ b/src/llamafactory/launcher.py
@ -12,46 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+from llamafactory.train.tuner import run_exp  # use absolute import
 def run_api():
    from llamafactory.api.app import run_api as _run_api
    _run_api()
-def run_chat():
+def launch():
-    from llamafactory.chat.chat_model import run_chat as _run_chat
+    run_exp()
    return _run_chat()
 def run_eval():
    raise NotImplementedError("Evaluation will be deprecated in the future.")
 def export_model():
    from llamafactory.train.tuner import export_model as _export_model
    return _export_model()
 def run_exp():
    from llamafactory.train.tuner import run_exp as _run_exp
    return _run_exp()  # use absolute import
 def run_web_demo():
    from llamafactory.webui.interface import run_web_demo as _run_web_demo
    return _run_web_demo()
 def run_web_ui():
    from llamafactory.webui.interface import run_web_ui as _run_web_ui
    return _run_web_ui()
 if __name__ == "__main__":
-    run_exp()
+    launch()
--- a/src/llamafactory/v1/init.py
+++ b/src/llamafactory/v1/init.py
--- a/src/llamafactory/v1/core/init.py
+++ b/src/llamafactory/v1/core/init.py
--- a/src/llamafactory/v1/core/base_trainer.py
+++ b/src/llamafactory/v1/core/base_trainer.py
--- a/src/llamafactory/v1/core/chat_sampler.py
+++ b/src/llamafactory/v1/core/chat_sampler.py
--- a/src/llamafactory/v1/core/data_loader.py
+++ b/src/llamafactory/v1/core/data_loader.py
--- a/src/llamafactory/v1/core/model_engine.py
+++ b/src/llamafactory/v1/core/model_engine.py
--- a/src/llamafactory/v1/launcher.py
+++ b/src/llamafactory/v1/launcher.py
@ -1,33 +0,0 @@
 # Copyright 2025 the LlamaFactory team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 def run_train():
    raise NotImplementedError("Please use `llamafactory-cli sft` or `llamafactory-cli rm`.")
 def run_chat():
    from llamafactory.v1.core.chat_sampler import Sampler
    Sampler().cli()
 def run_sft():
    from llamafactory.v1.train.sft import SFTTrainer
    SFTTrainer().run()
 if __name__ == "__main__":
    run_train()
--- a/src/llamafactory/v1/plugins/init.py
+++ b/src/llamafactory/v1/plugins/init.py
--- a/src/llamafactory/v1/plugins/data_plugins/init.py
+++ b/src/llamafactory/v1/plugins/data_plugins/init.py
--- a/src/llamafactory/v1/plugins/data_plugins/converter.py
+++ b/src/llamafactory/v1/plugins/data_plugins/converter.py
--- a/src/llamafactory/v1/plugins/model_plugins/init.py
+++ b/src/llamafactory/v1/plugins/model_plugins/init.py
--- a/src/llamafactory/v1/plugins/model_plugins/peft.py
+++ b/src/llamafactory/v1/plugins/model_plugins/peft.py
--- a/src/llamafactory/v1/plugins/sampler_plugins/init.py
+++ b/src/llamafactory/v1/plugins/sampler_plugins/init.py
--- a/src/llamafactory/v1/plugins/sampler_plugins/vllm.py
+++ b/src/llamafactory/v1/plugins/sampler_plugins/vllm.py
--- a/src/llamafactory/v1/plugins/trainer_plugins/init.py
+++ b/src/llamafactory/v1/plugins/trainer_plugins/init.py
--- a/src/llamafactory/v1/plugins/trainer_plugins/accelerate.py
+++ b/src/llamafactory/v1/plugins/trainer_plugins/accelerate.py
--- a/src/llamafactory/v1/trainers/init.py
+++ b/src/llamafactory/v1/trainers/init.py
--- a/src/llamafactory/v1/trainers/dpo_trainer.py
+++ b/src/llamafactory/v1/trainers/dpo_trainer.py
--- a/src/llamafactory/v1/trainers/rm_trainer.py
+++ b/src/llamafactory/v1/trainers/rm_trainer.py
--- a/src/llamafactory/v1/trainers/sft_trainer.py
+++ b/src/llamafactory/v1/trainers/sft_trainer.py