mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-31 19:20:36 +08:00
[ci] add cuda workflow (#9682)
Co-authored-by: frozenleaves <frozen@Mac.local> Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
This commit is contained in:
88
.github/workflows/tests_cuda.yml
vendored
Normal file
88
.github/workflows/tests_cuda.yml
vendored
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
name: tests_cuda
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- "main"
|
||||||
|
paths:
|
||||||
|
- "**/*.py"
|
||||||
|
- "pyproject.toml"
|
||||||
|
- "Makefile"
|
||||||
|
- ".github/workflows/*.yml"
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- "main"
|
||||||
|
paths:
|
||||||
|
- "**/*.py"
|
||||||
|
- "pyproject.toml"
|
||||||
|
- "Makefile"
|
||||||
|
- ".github/workflows/*.yml"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
tests:
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
python:
|
||||||
|
- "3.11"
|
||||||
|
os:
|
||||||
|
- "linux-x86_64-gpu-2"
|
||||||
|
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.python }}
|
||||||
|
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v7
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python }}
|
||||||
|
github-token: ${{ github.token }}
|
||||||
|
enable-cache: false
|
||||||
|
|
||||||
|
- name: Check GPU Status
|
||||||
|
run: nvidia-smi
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
uv venv
|
||||||
|
uv pip install -e ".[dev]"
|
||||||
|
|
||||||
|
- name: Cache HuggingFace models
|
||||||
|
id: hf-hub-cache
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: ${{ runner.temp }}/huggingface
|
||||||
|
key: hf-cache-${{ runner.os }}-${{ hashFiles('tests/version.txt') }}
|
||||||
|
|
||||||
|
- name: Check quality
|
||||||
|
run: |
|
||||||
|
make style && make quality
|
||||||
|
env:
|
||||||
|
UV_NO_SYNC: 1
|
||||||
|
|
||||||
|
- name: Check license
|
||||||
|
run: |
|
||||||
|
make license
|
||||||
|
env:
|
||||||
|
UV_NO_SYNC: 1
|
||||||
|
|
||||||
|
- name: Check build
|
||||||
|
run: |
|
||||||
|
make build
|
||||||
|
env:
|
||||||
|
UV_NO_SYNC: 1
|
||||||
|
|
||||||
|
- name: Test with pytest
|
||||||
|
run: |
|
||||||
|
make test
|
||||||
|
env:
|
||||||
|
UV_NO_SYNC: 1
|
||||||
|
HF_HOME: ${{ runner.temp }}/huggingface
|
||||||
|
HF_HUB_OFFLINE: "${{ steps.hf-hub-cache.outputs.cache-hit == 'true' && '1' || '0' }}"
|
||||||
7
.github/workflows/tests_npu.yml
vendored
7
.github/workflows/tests_npu.yml
vendored
@@ -49,8 +49,11 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
run: |
|
uses: astral-sh/setup-uv@v7
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
with:
|
||||||
|
python-version: ${{ matrix.python }}
|
||||||
|
github-token: ${{ github.token }}
|
||||||
|
enable-cache: false
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -18,8 +18,11 @@ Contains shared fixtures, pytest configuration, and custom markers.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import torch
|
||||||
|
import torch.distributed as dist
|
||||||
from pytest import Config, FixtureRequest, Item, MonkeyPatch
|
from pytest import Config, FixtureRequest, Item, MonkeyPatch
|
||||||
|
|
||||||
from llamafactory.extras.misc import get_current_device, get_device_count, is_env_enabled
|
from llamafactory.extras.misc import get_current_device, get_device_count, is_env_enabled
|
||||||
@@ -70,7 +73,7 @@ def _handle_slow_tests(items: list[Item]):
|
|||||||
item.add_marker(skip_slow)
|
item.add_marker(skip_slow)
|
||||||
|
|
||||||
|
|
||||||
def _get_visible_devices_env() -> str | None:
|
def _get_visible_devices_env() -> Optional[str]:
|
||||||
"""Return device visibility env var name."""
|
"""Return device visibility env var name."""
|
||||||
if CURRENT_DEVICE == "cuda":
|
if CURRENT_DEVICE == "cuda":
|
||||||
return "CUDA_VISIBLE_DEVICES"
|
return "CUDA_VISIBLE_DEVICES"
|
||||||
@@ -118,6 +121,14 @@ def pytest_collection_modifyitems(config: Config, items: list[Item]):
|
|||||||
_handle_device_visibility(items)
|
_handle_device_visibility(items)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _cleanup_distributed_state():
|
||||||
|
"""Cleanup distributed state after each test."""
|
||||||
|
yield
|
||||||
|
if dist.is_initialized():
|
||||||
|
dist.destroy_process_group()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def _manage_distributed_env(request: FixtureRequest, monkeypatch: MonkeyPatch) -> None:
|
def _manage_distributed_env(request: FixtureRequest, monkeypatch: MonkeyPatch) -> None:
|
||||||
"""Set environment variables for distributed tests if specific devices are requested."""
|
"""Set environment variables for distributed tests if specific devices are requested."""
|
||||||
@@ -145,6 +156,10 @@ def _manage_distributed_env(request: FixtureRequest, monkeypatch: MonkeyPatch) -
|
|||||||
monkeypatch.setenv(env_key, visible_devices[0] if visible_devices else "0")
|
monkeypatch.setenv(env_key, visible_devices[0] if visible_devices else "0")
|
||||||
else:
|
else:
|
||||||
monkeypatch.setenv(env_key, "0")
|
monkeypatch.setenv(env_key, "0")
|
||||||
|
if CURRENT_DEVICE == "cuda":
|
||||||
|
monkeypatch.setattr(torch.cuda, "device_count", lambda: 1)
|
||||||
|
elif CURRENT_DEVICE == "npu":
|
||||||
|
monkeypatch.setattr(torch.npu, "device_count", lambda: 1)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|||||||
@@ -18,8 +18,10 @@ Contains shared fixtures, pytest configuration, and custom markers.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import torch
|
||||||
from pytest import Config, FixtureRequest, Item, MonkeyPatch
|
from pytest import Config, FixtureRequest, Item, MonkeyPatch
|
||||||
|
|
||||||
from llamafactory.v1.accelerator.helper import get_current_accelerator, get_device_count
|
from llamafactory.v1.accelerator.helper import get_current_accelerator, get_device_count
|
||||||
@@ -139,9 +141,21 @@ def _manage_distributed_env(request: FixtureRequest, monkeypatch: MonkeyPatch) -
|
|||||||
devices_str = ",".join(str(i) for i in range(required))
|
devices_str = ",".join(str(i) for i in range(required))
|
||||||
|
|
||||||
monkeypatch.setenv(env_key, devices_str)
|
monkeypatch.setenv(env_key, devices_str)
|
||||||
|
|
||||||
|
# add project root dir to path for mp run
|
||||||
|
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
if project_root not in sys.path:
|
||||||
|
sys.path.insert(0, project_root)
|
||||||
|
|
||||||
|
os.environ["PYTHONPATH"] = project_root + os.pathsep + os.environ.get("PYTHONPATH", "")
|
||||||
|
|
||||||
else: # non-distributed test
|
else: # non-distributed test
|
||||||
if old_value:
|
if old_value:
|
||||||
visible_devices = [v for v in old_value.split(",") if v != ""]
|
visible_devices = [v for v in old_value.split(",") if v != ""]
|
||||||
monkeypatch.setenv(env_key, visible_devices[0] if visible_devices else "0")
|
monkeypatch.setenv(env_key, visible_devices[0] if visible_devices else "0")
|
||||||
else:
|
else:
|
||||||
monkeypatch.setenv(env_key, "0")
|
monkeypatch.setenv(env_key, "0")
|
||||||
|
if CURRENT_DEVICE == "cuda":
|
||||||
|
monkeypatch.setattr(torch.cuda, "device_count", lambda: 1)
|
||||||
|
elif CURRENT_DEVICE == "npu":
|
||||||
|
monkeypatch.setattr(torch.npu, "device_count", lambda: 1)
|
||||||
|
|||||||
Reference in New Issue
Block a user