[v1] add accelerator (#9607)

This commit is contained in:
Yaowei Zheng
2025-12-12 19:22:06 +08:00
committed by GitHub
parent 4fd94141a4
commit 203069e11c
36 changed files with 941 additions and 443 deletions

View File

@@ -19,7 +19,7 @@ from datasets import load_dataset
from llamafactory.v1.config.data_args import DataArguments
from llamafactory.v1.core.data_engine import DataEngine
from llamafactory.v1.plugins.data_plugins.converter import get_converter
from llamafactory.v1.plugins.data_plugins.converter import DataConverterPlugin
@pytest.mark.parametrize("num_samples", [16])
@@ -49,99 +49,27 @@ def test_alpaca_converter(num_samples: int):
assert data_engine[index] == {"_dataset_name": "tiny_dataset", **expected_data}
def test_sharegpt_converter_invalid():
def test_sharegpt_converter():
example = {
"conversations": [
{
"from": "system",
"value": "Processes historical market data to generate trading signals "
"based on specified technical indicators.",
},
{
"from": "human",
"value": "I possess a detailed dataset, 'Historical_Market_Data.csv'. "
"Could you proceed with these function calls to assist me with the task?",
},
{
"from": "gpt",
"value": "```tool_call\n{'arguments': '{\"data_file\": \"Historical_Market_Data.csv\"]}', "
"'name': 'backtest_trading_signals'}```\n",
},
{
"from": "tool",
"value": '<tool id="D2">\n{"analysis": {"RSI_signals": [{"date": "2025-01-10", '
'"symbol": "AAPL", "signal": "Buy"}]}}}\n</tool>\n',
},
{"from": "system", "value": "System"},
{"from": "human", "value": "User"},
{"from": "gpt", "value": "Assistant"},
]
}
dataset_converter = get_converter("sharegpt")
assert dataset_converter(example) == {"messages": []}
def test_sharegpt_converter_valid():
example = {
"conversations": [
{
"from": "system",
"value": "Processes historical market data to generate trading signals based on "
"specified technical indicators.",
},
{
"from": "human",
"value": "I possess a detailed dataset, 'Historical_Market_Data.csv'. "
"Could you proceed with these function calls to assist me with the task?",
},
{
"from": "gpt",
"value": "```tool_call\n{'arguments': '{\"data_file\": \"Historical_Market_Data.csv\"]}', "
"'name': 'backtest_trading_signals'}```\n",
},
]
}
dataset_converter = get_converter("sharegpt")
expected_data = {
"messages": [
{
"content": [
{
"type": "text",
"value": "Processes historical market data to generate trading signals based on "
"specified technical indicators.",
}
],
"loss_weight": 0.0,
"role": "system",
},
{
"content": [
{
"type": "text",
"value": "I possess a detailed dataset, 'Historical_Market_Data.csv'. "
"Could you proceed with these function calls to assist me with the task?",
}
],
"loss_weight": 0.0,
"role": "user",
},
{
"content": [
{
"type": "text",
"value": "```tool_call\n{'arguments': '{\"data_file\": \"Historical_Market_Data.csv\"]}', "
"'name': 'backtest_trading_signals'}```\n",
}
],
"loss_weight": 1.0,
"role": "assistant",
},
{"content": [{"type": "text", "value": "System"}], "loss_weight": 0.0, "role": "system"},
{"content": [{"type": "text", "value": "User"}], "loss_weight": 0.0, "role": "user"},
{"content": [{"type": "text", "value": "Assistant"}], "loss_weight": 1.0, "role": "assistant"},
]
}
assert dataset_converter(example) == expected_data
assert DataConverterPlugin("sharegpt")(example) == expected_data
@pytest.mark.parametrize("num_samples", [16])
def test_pair_converter(num_samples: int):
data_args = DataArguments(dataset="frozenleaves/tiny-dpo/dataset_info.yaml")
data_args = DataArguments(dataset="llamafactory/tiny-preference-dataset/dataset_info.yaml")
data_engine = DataEngine(data_args)
original_data = load_dataset("HuggingFaceH4/orca_dpo_pairs", split="train_prefs")
indexes = random.choices(range(len(data_engine)), k=num_samples)
@@ -189,6 +117,5 @@ def test_pair_converter(num_samples: int):
if __name__ == "__main__":
test_alpaca_converter(1)
test_sharegpt_converter_invalid()
test_sharegpt_converter_valid()
test_sharegpt_converter()
test_pair_converter(1)

View File

@@ -17,10 +17,13 @@ from unittest.mock import MagicMock, patch
from transformers import AutoModelForCausalLM
from llamafactory.v1.accelerator.helper import get_current_accelerator
class TestKernelPlugin(unittest.TestCase):
@patch("torch.accelerator.current_accelerator")
def test_apply_kernel(self, mock_get_accelerator):
get_current_accelerator.cache_clear()
mock_device = MagicMock()
mock_device.type = "npu"
mock_get_accelerator.return_value = mock_device
@@ -47,6 +50,7 @@ class TestKernelPlugin(unittest.TestCase):
class Test_Use_V1_Kernels(unittest.TestCase):
@patch("torch.accelerator.current_accelerator")
def test_use_v1_kernels(self, mock_get_accelerator):
get_current_accelerator.cache_clear()
mock_device = MagicMock()
mock_device.type = "npu"
mock_get_accelerator.return_value = mock_device