mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-08-01 11:12:50 +08:00
[data] improve error handling (#6128)
* sync from upstream * update * update * fix --------- Co-authored-by: hiyouga <hiyouga@buaa.edu.cn> Former-commit-id: 4c7bfebcf1ed90800f5b0de4cf67b3036cb9dc13
This commit is contained in:
parent
9b852ebe25
commit
1adb46875f
@ -9,22 +9,7 @@ requires-python = ">=3.10"
|
||||
|
||||
[project.scripts]
|
||||
llamafactory-cli = "llamafactory.cli:main"
|
||||
|
||||
[tool.uv]
|
||||
conflicts = [
|
||||
[
|
||||
{extra = "aqlm" },
|
||||
{extra = "torch-npu" },
|
||||
],
|
||||
[
|
||||
{extra = "torch-npu" },
|
||||
{extra = "liger-kernel" },
|
||||
],
|
||||
[
|
||||
{extra = "vllm" },
|
||||
{extra = "torch-npu" },
|
||||
]
|
||||
]
|
||||
lmf = "llamafactory.cli:main"
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py38"
|
||||
@ -55,3 +40,19 @@ indent-style = "space"
|
||||
docstring-code-format = true
|
||||
skip-magic-trailing-comma = false
|
||||
line-ending = "auto"
|
||||
|
||||
[tool.uv]
|
||||
conflicts = [
|
||||
[
|
||||
{ extra = "torch-npu" },
|
||||
{ extra = "aqlm" },
|
||||
],
|
||||
[
|
||||
{ extra = "torch-npu" },
|
||||
{ extra = "liger-kernel" },
|
||||
],
|
||||
[
|
||||
{ extra = "torch-npu" },
|
||||
{ extra = "vllm" },
|
||||
]
|
||||
]
|
||||
|
@ -36,23 +36,25 @@ class DatasetConverter:
|
||||
dataset_attr: "DatasetAttr"
|
||||
data_args: "DataArguments"
|
||||
|
||||
def _find_medias(self, inputs: Union[Any, Sequence[Any]]) -> Optional[List[Any]]:
|
||||
def _find_medias(self, medias: Union[Any, Sequence[Any]]) -> Optional[List[Any]]:
|
||||
r"""
|
||||
Optionally concatenates media path to media dir when loading from local disk.
|
||||
"""
|
||||
if not isinstance(inputs, list):
|
||||
inputs = [inputs]
|
||||
elif len(inputs) == 0:
|
||||
if not isinstance(medias, list):
|
||||
medias = [medias] if medias is not None else []
|
||||
elif len(medias) == 0:
|
||||
return None
|
||||
else:
|
||||
inputs = inputs[:]
|
||||
medias = medias[:]
|
||||
|
||||
if self.dataset_attr.load_from in ["script", "file"]:
|
||||
for i in range(len(inputs)):
|
||||
if isinstance(inputs[i], str) and os.path.isfile(os.path.join(self.data_args.media_dir, inputs[i])):
|
||||
inputs[i] = os.path.join(self.data_args.media_dir, inputs[i])
|
||||
if self.dataset_attr.load_from in ["script", "file"] and isinstance(medias[0], str):
|
||||
for i in range(len(medias)):
|
||||
if os.path.isfile(os.path.join(self.data_args.media_dir, medias[i])):
|
||||
medias[i] = os.path.join(self.data_args.media_dir, medias[i])
|
||||
else:
|
||||
logger.warning_rank0_once(f"Media {medias[i]} does not exist in `media_dir`. Use original path.")
|
||||
|
||||
return inputs
|
||||
return medias
|
||||
|
||||
@abstractmethod
|
||||
def __call__(self, example: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user