mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-14 10:56:56 +08:00
add code for reading from multi files in one directory
Former-commit-id: 3dd5f9a874
This commit is contained in:
@@ -1,107 +1,116 @@
|
||||
{
|
||||
"alpaca_en": {
|
||||
"hf_hub_url": "tatsu-lab/alpaca"
|
||||
},
|
||||
"alpaca_zh": {
|
||||
"file_name": "alpaca_data_zh_51k.json",
|
||||
"file_sha1": "e655af3db557a4197f7b0cf92e1986b08fae6311"
|
||||
},
|
||||
"alpaca_gpt4_en": {
|
||||
"file_name": "alpaca_gpt4_data_en.json",
|
||||
"file_sha1": "647f4ad447bd993e4b6b6223d1be15208bab694a"
|
||||
},
|
||||
"alpaca_gpt4_zh": {
|
||||
"file_name": "alpaca_gpt4_data_zh.json",
|
||||
"file_sha1": "3eaa3bda364ccdd59925d7448a698256c31ef845"
|
||||
},
|
||||
"belle_0.5m": {
|
||||
"hf_hub_url": "BelleGroup/train_0.5M_CN"
|
||||
},
|
||||
"belle_1m": {
|
||||
"hf_hub_url": "BelleGroup/train_1M_CN"
|
||||
},
|
||||
"belle_2m": {
|
||||
"hf_hub_url": "BelleGroup/train_2M_CN"
|
||||
},
|
||||
"belle_dialog": {
|
||||
"hf_hub_url": "BelleGroup/generated_chat_0.4M"
|
||||
},
|
||||
"belle_math": {
|
||||
"hf_hub_url": "BelleGroup/school_math_0.25M"
|
||||
},
|
||||
"belle_multiturn": {
|
||||
"hf_hub_url": "BelleGroup/multiturn_chat_0.8M"
|
||||
},
|
||||
"guanaco": {
|
||||
"hf_hub_url": "JosephusCheung/GuanacoDataset"
|
||||
},
|
||||
"firefly": {
|
||||
"hf_hub_url": "YeungNLP/firefly-train-1.1M",
|
||||
"columns": {
|
||||
"prompt": "input",
|
||||
"query": "",
|
||||
"response": "target",
|
||||
"history": ""
|
||||
}
|
||||
},
|
||||
"codealpaca": {
|
||||
"hf_hub_url": "sahil2801/CodeAlpaca-20k"
|
||||
},
|
||||
"alpaca_cot": {
|
||||
"hf_hub_url": "QingyiSi/Alpaca-CoT"
|
||||
},
|
||||
"webqa": {
|
||||
"hf_hub_url": "suolyer/webqa",
|
||||
"columns": {
|
||||
"prompt": "input",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": ""
|
||||
}
|
||||
},
|
||||
"ultra_chat": {
|
||||
"script_url": "ultra_chat",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": "history"
|
||||
}
|
||||
},
|
||||
"example": {
|
||||
"script_url": "example_dataset",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "input",
|
||||
"response": "output",
|
||||
"history": "history"
|
||||
}
|
||||
},
|
||||
"comparison_gpt4_en": {
|
||||
"file_name": "comparison_gpt4_data_en.json",
|
||||
"file_sha1": "eeb295ce0ab011c37af52596460c8a57d07ad19f"
|
||||
},
|
||||
"comparison_gpt4_zh": {
|
||||
"file_name": "comparison_gpt4_data_zh.json",
|
||||
"file_sha1": "b99a41c1c864019d9b0c07dbcd5df0560cf33ce0"
|
||||
},
|
||||
"hh_rlhf_en": {
|
||||
"script_url": "hh_rlhf_en",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": "history"
|
||||
}
|
||||
},
|
||||
"wiki_demo": {
|
||||
"file_name": "wiki_demo.txt",
|
||||
"file_sha1": "b2288edb05b233e5b35250fd4b308a5fa21fa66d",
|
||||
"columns": {
|
||||
"prompt": "text",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
}
|
||||
"alpaca_en": {
|
||||
"hf_hub_url": "tatsu-lab/alpaca"
|
||||
},
|
||||
"alpaca_zh": {
|
||||
"file_name": "alpaca_data_zh_51k.json",
|
||||
"file_sha1": "e655af3db557a4197f7b0cf92e1986b08fae6311"
|
||||
},
|
||||
"alpaca_gpt4_en": {
|
||||
"file_name": "alpaca_gpt4_data_en.json",
|
||||
"file_sha1": "647f4ad447bd993e4b6b6223d1be15208bab694a"
|
||||
},
|
||||
"alpaca_gpt4_zh": {
|
||||
"file_name": "alpaca_gpt4_data_zh.json",
|
||||
"file_sha1": "3eaa3bda364ccdd59925d7448a698256c31ef845"
|
||||
},
|
||||
"belle_0.5m": {
|
||||
"hf_hub_url": "BelleGroup/train_0.5M_CN"
|
||||
},
|
||||
"belle_1m": {
|
||||
"hf_hub_url": "BelleGroup/train_1M_CN"
|
||||
},
|
||||
"belle_2m": {
|
||||
"hf_hub_url": "BelleGroup/train_2M_CN"
|
||||
},
|
||||
"belle_dialog": {
|
||||
"hf_hub_url": "BelleGroup/generated_chat_0.4M"
|
||||
},
|
||||
"belle_math": {
|
||||
"hf_hub_url": "BelleGroup/school_math_0.25M"
|
||||
},
|
||||
"belle_multiturn": {
|
||||
"hf_hub_url": "BelleGroup/multiturn_chat_0.8M"
|
||||
},
|
||||
"guanaco": {
|
||||
"hf_hub_url": "JosephusCheung/GuanacoDataset"
|
||||
},
|
||||
"firefly": {
|
||||
"hf_hub_url": "YeungNLP/firefly-train-1.1M",
|
||||
"columns": {
|
||||
"prompt": "input",
|
||||
"query": "",
|
||||
"response": "target",
|
||||
"history": ""
|
||||
}
|
||||
},
|
||||
"codealpaca": {
|
||||
"hf_hub_url": "sahil2801/CodeAlpaca-20k"
|
||||
},
|
||||
"alpaca_cot": {
|
||||
"hf_hub_url": "QingyiSi/Alpaca-CoT"
|
||||
},
|
||||
"webqa": {
|
||||
"hf_hub_url": "suolyer/webqa",
|
||||
"columns": {
|
||||
"prompt": "input",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": ""
|
||||
}
|
||||
},
|
||||
"ultra_chat": {
|
||||
"script_url": "ultra_chat",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": "history"
|
||||
}
|
||||
},
|
||||
"example": {
|
||||
"script_url": "example_dataset",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "input",
|
||||
"response": "output",
|
||||
"history": "history"
|
||||
}
|
||||
},
|
||||
"comparison_gpt4_en": {
|
||||
"file_name": "comparison_gpt4_data_en.json",
|
||||
"file_sha1": "eeb295ce0ab011c37af52596460c8a57d07ad19f"
|
||||
},
|
||||
"comparison_gpt4_zh": {
|
||||
"file_name": "comparison_gpt4_data_zh.json",
|
||||
"file_sha1": "b99a41c1c864019d9b0c07dbcd5df0560cf33ce0"
|
||||
},
|
||||
"hh_rlhf_en": {
|
||||
"script_url": "hh_rlhf_en",
|
||||
"columns": {
|
||||
"prompt": "instruction",
|
||||
"query": "",
|
||||
"response": "output",
|
||||
"history": "history"
|
||||
}
|
||||
},
|
||||
"wiki_demo": {
|
||||
"file_name": "wiki_demo.txt",
|
||||
"file_sha1": "b2288edb05b233e5b35250fd4b308a5fa21fa66d",
|
||||
"columns": {
|
||||
"prompt": "text",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
}
|
||||
},
|
||||
"pretrain_data": {
|
||||
"file_name": "pretrain_data",
|
||||
"columns": {
|
||||
"prompt": "content",
|
||||
"query": "",
|
||||
"response": "",
|
||||
"history": ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
7
data/pretrain_data/blog.json
Normal file
7
data/pretrain_data/blog.json
Normal file
@@ -0,0 +1,7 @@
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"title": "拥有自己的航空器",
|
||||
"content": "想自己驾驶飞机或从事通用航空事业的人,大都想拥有自己的航空器。\"自己的\"意思包括自己购买、自己制造、可供自己使用(租用或借用)等等。\n花自己的钱买一架飞机来开一开,国内有些人或企业已实现了这个愿望。现在一架国产超轻型的“蜜蜂”飞机售价在l0万元以下,进口的一些单发的双座飞机售价在100万元之内。据估计,全国大约有几十万人具有这种购买能力。\n自己造一架飞机来开也是一个好创意。美国的通航飞机中有l/5是自制的。有的自制飞机甚至还创造了世界飞行纪录。今天自己造飞机比当年莱特兄弟容易多了。飞机的基本构造已无秘密可言,各种飞机部件和材料都不难买到。尤其主要的是,技术进步大大改进了配件的性能,与此同时,配件的重量也下降了很多。莱特兄弟当年使用的12马力汽油发动机比现在30马力的同类产品还重。如果有人有志于此而且具备造飞机的种种条件,应该说这个目标也是可以实现的。有两点值得注意,一是在莱特兄弟造飞机时没有前人经验,全靠自己摸索。现在不同了,航空制造已有了上百年的知识和经验可供后人学习和利用。现在如果谁想自己造飞机就不用闭门造车了。制造者本人首先应该去学习和掌握一些必要知识和经验才行。其次,在莱特兄弟时代,没有国家民航当局,他们的航空活动不受法规约束。今天就不一样了,所有要升空的航空器必须先接受民航当局的鉴定,以保证飞行安全。绝不允许以生命为赌注的任何冒险行为。\n租用飞机也是实现自驾飞机的方式之一。国内也还有另一种形式,即参加飞行驾驶学校接受培训,当然所交的学费价格是不菲的。预计未来在我国必将出现出各类飞行俱乐部。到那时,飞行爱好者可以租用飞机去上天过一把瘾了。"
|
||||
}
|
||||
]
|
||||
7
data/pretrain_data/wiki.json
Normal file
7
data/pretrain_data/wiki.json
Normal file
@@ -0,0 +1,7 @@
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"title": "大卫·亨利",
|
||||
"content": "大卫·亨利\n\n大卫·克莱顿·亨利(David Clayton Henrie,),美国演员。近来在迪士尼频道原创电视影集《少年魔法师》(Wizards of Waverly Place)当中演出贾斯汀·鲁索(Justin Russo)一角。\n\n大卫·亨利出生在加州Mission Viejo,在凤凰城长大。他的胞弟劳伦斯·亨利(Lorenzo Henrie)也是演员。大卫·亨利就读夏安传统学校。家中是信奉罗马天主教。 \n\n大卫在2007年拍摄少年魔法师期间认识女演员露西·海尔(Lucy Hale),之后与其交往,于2009年分手。\n\n10岁时,大卫·亨利和SAG在凤凰城签订了合约,并开始走出去试镜。 9岁的时候,在沙加缅度进行商业拍摄,SAG董事建议大卫·亨利搬到洛杉矶。在10岁那年夏天,他和他的家人搬到了好莱坞。他预定他的前2支商业试镜,扮演主要角色为汉堡王和桂格燕麦。他初演电视节目为Providence。 \n\n到了13岁,大卫有了他的第一次重大突破,在福克斯公司的喜剧The Pitts饰演 Petey Pitt一角。大卫下出作品为的Hallmark movie为Monster Maker,和琳达布莱儿、乔治甘迺迪共同演出,并要求回来Hallmark movie公司。 \n\n在18岁时,大卫得到了迪士尼频道原创系列演出机会,该节目2007年10月12日首播。大卫2008年参加了迪士尼频道的游戏节目。他是绿色团队的队长,隔年,为旋风队队长。他在迪士尼原创电影《少年魔法师》之后在《酷爸的疯狂假期》中有饰演一角。\n"
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user