mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-16 00:28:10 +08:00
[data] improve error handling (#6128)
* sync from upstream * update * update * fix --------- Co-authored-by: hiyouga <hiyouga@buaa.edu.cn> Former-commit-id: 1569e6096fec07da5583f1a3435b0d23ae09b5ba
This commit is contained in:
parent
8b8fdb3a85
commit
38c9641777
@ -9,22 +9,7 @@ requires-python = ">=3.10"
|
|||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
llamafactory-cli = "llamafactory.cli:main"
|
llamafactory-cli = "llamafactory.cli:main"
|
||||||
|
lmf = "llamafactory.cli:main"
|
||||||
[tool.uv]
|
|
||||||
conflicts = [
|
|
||||||
[
|
|
||||||
{extra = "aqlm" },
|
|
||||||
{extra = "torch-npu" },
|
|
||||||
],
|
|
||||||
[
|
|
||||||
{extra = "torch-npu" },
|
|
||||||
{extra = "liger-kernel" },
|
|
||||||
],
|
|
||||||
[
|
|
||||||
{extra = "vllm" },
|
|
||||||
{extra = "torch-npu" },
|
|
||||||
]
|
|
||||||
]
|
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
target-version = "py38"
|
target-version = "py38"
|
||||||
@ -55,3 +40,19 @@ indent-style = "space"
|
|||||||
docstring-code-format = true
|
docstring-code-format = true
|
||||||
skip-magic-trailing-comma = false
|
skip-magic-trailing-comma = false
|
||||||
line-ending = "auto"
|
line-ending = "auto"
|
||||||
|
|
||||||
|
[tool.uv]
|
||||||
|
conflicts = [
|
||||||
|
[
|
||||||
|
{ extra = "torch-npu" },
|
||||||
|
{ extra = "aqlm" },
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{ extra = "torch-npu" },
|
||||||
|
{ extra = "liger-kernel" },
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{ extra = "torch-npu" },
|
||||||
|
{ extra = "vllm" },
|
||||||
|
]
|
||||||
|
]
|
||||||
|
@ -36,23 +36,25 @@ class DatasetConverter:
|
|||||||
dataset_attr: "DatasetAttr"
|
dataset_attr: "DatasetAttr"
|
||||||
data_args: "DataArguments"
|
data_args: "DataArguments"
|
||||||
|
|
||||||
def _find_medias(self, inputs: Union[Any, Sequence[Any]]) -> Optional[List[Any]]:
|
def _find_medias(self, medias: Union[Any, Sequence[Any]]) -> Optional[List[Any]]:
|
||||||
r"""
|
r"""
|
||||||
Optionally concatenates media path to media dir when loading from local disk.
|
Optionally concatenates media path to media dir when loading from local disk.
|
||||||
"""
|
"""
|
||||||
if not isinstance(inputs, list):
|
if not isinstance(medias, list):
|
||||||
inputs = [inputs]
|
medias = [medias] if medias is not None else []
|
||||||
elif len(inputs) == 0:
|
elif len(medias) == 0:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
inputs = inputs[:]
|
medias = medias[:]
|
||||||
|
|
||||||
if self.dataset_attr.load_from in ["script", "file"]:
|
if self.dataset_attr.load_from in ["script", "file"] and isinstance(medias[0], str):
|
||||||
for i in range(len(inputs)):
|
for i in range(len(medias)):
|
||||||
if isinstance(inputs[i], str) and os.path.isfile(os.path.join(self.data_args.media_dir, inputs[i])):
|
if os.path.isfile(os.path.join(self.data_args.media_dir, medias[i])):
|
||||||
inputs[i] = os.path.join(self.data_args.media_dir, inputs[i])
|
medias[i] = os.path.join(self.data_args.media_dir, medias[i])
|
||||||
|
else:
|
||||||
|
logger.warning_rank0_once(f"Media {medias[i]} does not exist in `media_dir`. Use original path.")
|
||||||
|
|
||||||
return inputs
|
return medias
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def __call__(self, example: Dict[str, Any]) -> Dict[str, Any]:
|
def __call__(self, example: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user