mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-14 10:56:56 +08:00
46
data/example_dataset/example_dataset.py
Normal file
46
data/example_dataset/example_dataset.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import json
|
||||
import datasets
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
_DESCRIPTION = "An example of dataset for LLaMA."
|
||||
_CITATION = ""
|
||||
_HOMEPAGE = ""
|
||||
_LICENSE = ""
|
||||
_URL = "examples.json"
|
||||
|
||||
|
||||
class ExampleDataset(datasets.GeneratorBasedBuilder):
|
||||
|
||||
VERSION = datasets.Version("0.0.0")
|
||||
|
||||
def _info(self) -> datasets.DatasetInfo:
|
||||
features = datasets.Features({
|
||||
"instruction": datasets.Value("string"),
|
||||
"input": datasets.Value("string"),
|
||||
"output": datasets.Value("string"),
|
||||
"history": datasets.Sequence(datasets.Sequence(datasets.Value("string")))
|
||||
})
|
||||
return datasets.DatasetInfo(
|
||||
description=_DESCRIPTION,
|
||||
features=features,
|
||||
homepage=_HOMEPAGE,
|
||||
license=_LICENSE,
|
||||
citation=_CITATION
|
||||
)
|
||||
|
||||
def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
|
||||
file_path = dl_manager.download(_URL)
|
||||
return [
|
||||
datasets.SplitGenerator(
|
||||
name=datasets.Split.TRAIN,
|
||||
gen_kwargs={
|
||||
"filepath": file_path
|
||||
}
|
||||
)
|
||||
]
|
||||
|
||||
def _generate_examples(self, filepath: str) -> Dict[int, Dict[str, Any]]:
|
||||
example_dataset = json.load(open(filepath, "r", encoding="utf-8"))
|
||||
for key, example in enumerate(example_dataset):
|
||||
yield key, example
|
||||
20
data/example_dataset/examples.json
Normal file
20
data/example_dataset/examples.json
Normal file
@@ -0,0 +1,20 @@
|
||||
[
|
||||
{
|
||||
"instruction": "听起来很不错。人工智能可能在哪些方面面临挑战呢?",
|
||||
"input": "",
|
||||
"output": "人工智能面临的挑战包括数据隐私、安全和道德方面的问题,以及影响就业机会的自动化等问题。",
|
||||
"history": [
|
||||
["你好,你能帮我解答一个问题吗?", "当然,请问有什么问题?"],
|
||||
["我想了解人工智能的未来发展方向,你有什么想法吗?", "人工智能在未来的发展方向可能包括更强大的机器学习算法,更先进的自然语言处理技术,以及更加智能的机器人。"]
|
||||
]
|
||||
},
|
||||
{
|
||||
"instruction": "好的,谢谢你!",
|
||||
"input": "",
|
||||
"output": "不客气,有其他需要帮忙的地方可以继续问我。",
|
||||
"history": [
|
||||
["你好,能告诉我今天天气怎么样吗?", "当然可以,请问您所在的城市是哪里?"],
|
||||
["我在纽约。", "纽约今天晴间多云,气温最高约26摄氏度,最低约18摄氏度,记得注意保暖喔。"]
|
||||
]
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user