fix eval scripts

Former-commit-id: fc3743d0b82c28fbff1170761139e4fa5d2a8939
2026-05-28 02:48:54 +08:00 · 2024-05-05 00:53:07 +08:00
parent 7ef3788ff4
commit 2f5f6722cf
3 changed files with 74 additions and 86 deletions
--- a/evaluation/ceval/ceval.py
+++ b/evaluation/ceval/ceval.py
@@ -19,7 +19,7 @@ import pandas as pd
 _CITATION = """\
@article{huang2023ceval,
-  title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, 
+  title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},
  author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},
  journal={arXiv preprint arXiv:2305.08322},
  year={2023}
@@ -133,25 +133,19 @@ class Ceval(datasets.GeneratorBasedBuilder):
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
-                    "filepath": os.path.join(
+                    "filepath": os.path.join(data_dir, "test", f"{task_name}_test.csv"),
                        data_dir, "test", f"{task_name}_test.csv"
                    ),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.VALIDATION,
                gen_kwargs={
-                    "filepath": os.path.join(
+                    "filepath": os.path.join(data_dir, "val", f"{task_name}_val.csv"),
                        data_dir, "val", f"{task_name}_val.csv"
                    ),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
-                    "filepath": os.path.join(
+                    "filepath": os.path.join(data_dir, "dev", f"{task_name}_dev.csv"),
                        data_dir, "dev", f"{task_name}_dev.csv"
                    ),
                },
            ),
        ]
--- a/evaluation/cmmlu/cmmlu.py
+++ b/evaluation/cmmlu/cmmlu.py
@@ -37,73 +37,73 @@ _LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 Internatio
 _URL = "cmmlu.zip"
 task_list = [
-     'agronomy',
+    "agronomy",
-     'anatomy',
+    "anatomy",
-     'ancient_chinese',
+    "ancient_chinese",
-     'arts',
+    "arts",
-     'astronomy',
+    "astronomy",
-     'business_ethics',
+    "business_ethics",
-     'chinese_civil_service_exam',
+    "chinese_civil_service_exam",
-     'chinese_driving_rule',
+    "chinese_driving_rule",
-     'chinese_food_culture',
+    "chinese_food_culture",
-     'chinese_foreign_policy',
+    "chinese_foreign_policy",
-     'chinese_history',
+    "chinese_history",
-     'chinese_literature',
+    "chinese_literature",
-     'chinese_teacher_qualification',
+    "chinese_teacher_qualification",
-     'clinical_knowledge',
+    "clinical_knowledge",
-     'college_actuarial_science',
+    "college_actuarial_science",
-     'college_education',
+    "college_education",
-     'college_engineering_hydrology',
+    "college_engineering_hydrology",
-     'college_law',
+    "college_law",
-     'college_mathematics',
+    "college_mathematics",
-     'college_medical_statistics',
+    "college_medical_statistics",
-     'college_medicine',
+    "college_medicine",
-     'computer_science',
+    "computer_science",
-     'computer_security',
+    "computer_security",
-     'conceptual_physics',
+    "conceptual_physics",
-     'construction_project_management',
+    "construction_project_management",
-     'economics',
+    "economics",
-     'education',
+    "education",
-     'electrical_engineering',
+    "electrical_engineering",
-     'elementary_chinese',
+    "elementary_chinese",
-     'elementary_commonsense',
+    "elementary_commonsense",
-     'elementary_information_and_technology',
+    "elementary_information_and_technology",
-     'elementary_mathematics',
+    "elementary_mathematics",
-     'ethnology',
+    "ethnology",
-     'food_science',
+    "food_science",
-     'genetics',
+    "genetics",
-     'global_facts',
+    "global_facts",
-     'high_school_biology',
+    "high_school_biology",
-     'high_school_chemistry',
+    "high_school_chemistry",
-     'high_school_geography',
+    "high_school_geography",
-     'high_school_mathematics',
+    "high_school_mathematics",
-     'high_school_physics',
+    "high_school_physics",
-     'high_school_politics',
+    "high_school_politics",
-     'human_sexuality',
+    "human_sexuality",
-     'international_law',
+    "international_law",
-     'journalism',
+    "journalism",
-     'jurisprudence',
+    "jurisprudence",
-     'legal_and_moral_basis',
+    "legal_and_moral_basis",
-     'logical',
+    "logical",
-     'machine_learning',
+    "machine_learning",
-     'management',
+    "management",
-     'marketing',
+    "marketing",
-     'marxist_theory',
+    "marxist_theory",
-     'modern_chinese',
+    "modern_chinese",
-     'nutrition',
+    "nutrition",
-     'philosophy',
+    "philosophy",
-     'professional_accounting',
+    "professional_accounting",
-     'professional_law',
+    "professional_law",
-     'professional_medicine',
+    "professional_medicine",
-     'professional_psychology',
+    "professional_psychology",
-     'public_relations',
+    "public_relations",
-     'security_study',
+    "security_study",
-     'sociology',
+    "sociology",
-     'sports_science',
+    "sports_science",
-     'traditional_chinese_medicine',
+    "traditional_chinese_medicine",
-     'virology',
+    "virology",
-     'world_history',
+    "world_history",
-     'world_religions',
+    "world_religions",
 ]
--- a/evaluation/mmlu/mmlu.py
+++ b/evaluation/mmlu/mmlu.py
@@ -136,25 +136,19 @@ class MMLU(datasets.GeneratorBasedBuilder):
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
-                    "filepath": os.path.join(
+                    "filepath": os.path.join(data_dir, "data", "test", f"{task_name}_test.csv"),
                        data_dir, "data", "test", f"{task_name}_test.csv"
                    ),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.VALIDATION,
                gen_kwargs={
-                    "filepath": os.path.join(
+                    "filepath": os.path.join(data_dir, "data", "val", f"{task_name}_val.csv"),
                        data_dir, "data", "val", f"{task_name}_val.csv"
                    ),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
-                    "filepath": os.path.join(
+                    "filepath": os.path.join(data_dir, "data", "dev", f"{task_name}_dev.csv"),
                        data_dir, "data", "dev", f"{task_name}_dev.csv"
                    ),
                },
            ),
        ]