mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-12-31 11:10:35 +08:00
[ci] add cuda workflow (#9682)
Co-authored-by: frozenleaves <frozen@Mac.local> Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
This commit is contained in:
88
.github/workflows/tests_cuda.yml
vendored
Normal file
88
.github/workflows/tests_cuda.yml
vendored
Normal file
@@ -0,0 +1,88 @@
|
||||
name: tests_cuda
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- "main"
|
||||
paths:
|
||||
- "**/*.py"
|
||||
- "pyproject.toml"
|
||||
- "Makefile"
|
||||
- ".github/workflows/*.yml"
|
||||
pull_request:
|
||||
branches:
|
||||
- "main"
|
||||
paths:
|
||||
- "**/*.py"
|
||||
- "pyproject.toml"
|
||||
- "Makefile"
|
||||
- ".github/workflows/*.yml"
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python:
|
||||
- "3.11"
|
||||
os:
|
||||
- "linux-x86_64-gpu-2"
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os }}-${{ matrix.python }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
github-token: ${{ github.token }}
|
||||
enable-cache: false
|
||||
|
||||
- name: Check GPU Status
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv venv
|
||||
uv pip install -e ".[dev]"
|
||||
|
||||
- name: Cache HuggingFace models
|
||||
id: hf-hub-cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ${{ runner.temp }}/huggingface
|
||||
key: hf-cache-${{ runner.os }}-${{ hashFiles('tests/version.txt') }}
|
||||
|
||||
- name: Check quality
|
||||
run: |
|
||||
make style && make quality
|
||||
env:
|
||||
UV_NO_SYNC: 1
|
||||
|
||||
- name: Check license
|
||||
run: |
|
||||
make license
|
||||
env:
|
||||
UV_NO_SYNC: 1
|
||||
|
||||
- name: Check build
|
||||
run: |
|
||||
make build
|
||||
env:
|
||||
UV_NO_SYNC: 1
|
||||
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
make test
|
||||
env:
|
||||
UV_NO_SYNC: 1
|
||||
HF_HOME: ${{ runner.temp }}/huggingface
|
||||
HF_HUB_OFFLINE: "${{ steps.hf-hub-cache.outputs.cache-hit == 'true' && '1' || '0' }}"
|
||||
7
.github/workflows/tests_npu.yml
vendored
7
.github/workflows/tests_npu.yml
vendored
@@ -49,8 +49,11 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install uv
|
||||
run: |
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
github-token: ${{ github.token }}
|
||||
enable-cache: false
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
|
||||
@@ -18,8 +18,11 @@ Contains shared fixtures, pytest configuration, and custom markers.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
from pytest import Config, FixtureRequest, Item, MonkeyPatch
|
||||
|
||||
from llamafactory.extras.misc import get_current_device, get_device_count, is_env_enabled
|
||||
@@ -70,7 +73,7 @@ def _handle_slow_tests(items: list[Item]):
|
||||
item.add_marker(skip_slow)
|
||||
|
||||
|
||||
def _get_visible_devices_env() -> str | None:
|
||||
def _get_visible_devices_env() -> Optional[str]:
|
||||
"""Return device visibility env var name."""
|
||||
if CURRENT_DEVICE == "cuda":
|
||||
return "CUDA_VISIBLE_DEVICES"
|
||||
@@ -118,6 +121,14 @@ def pytest_collection_modifyitems(config: Config, items: list[Item]):
|
||||
_handle_device_visibility(items)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _cleanup_distributed_state():
|
||||
"""Cleanup distributed state after each test."""
|
||||
yield
|
||||
if dist.is_initialized():
|
||||
dist.destroy_process_group()
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _manage_distributed_env(request: FixtureRequest, monkeypatch: MonkeyPatch) -> None:
|
||||
"""Set environment variables for distributed tests if specific devices are requested."""
|
||||
@@ -145,6 +156,10 @@ def _manage_distributed_env(request: FixtureRequest, monkeypatch: MonkeyPatch) -
|
||||
monkeypatch.setenv(env_key, visible_devices[0] if visible_devices else "0")
|
||||
else:
|
||||
monkeypatch.setenv(env_key, "0")
|
||||
if CURRENT_DEVICE == "cuda":
|
||||
monkeypatch.setattr(torch.cuda, "device_count", lambda: 1)
|
||||
elif CURRENT_DEVICE == "npu":
|
||||
monkeypatch.setattr(torch.npu, "device_count", lambda: 1)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
||||
@@ -18,8 +18,10 @@ Contains shared fixtures, pytest configuration, and custom markers.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from pytest import Config, FixtureRequest, Item, MonkeyPatch
|
||||
|
||||
from llamafactory.v1.accelerator.helper import get_current_accelerator, get_device_count
|
||||
@@ -139,9 +141,21 @@ def _manage_distributed_env(request: FixtureRequest, monkeypatch: MonkeyPatch) -
|
||||
devices_str = ",".join(str(i) for i in range(required))
|
||||
|
||||
monkeypatch.setenv(env_key, devices_str)
|
||||
|
||||
# add project root dir to path for mp run
|
||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
if project_root not in sys.path:
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
os.environ["PYTHONPATH"] = project_root + os.pathsep + os.environ.get("PYTHONPATH", "")
|
||||
|
||||
else: # non-distributed test
|
||||
if old_value:
|
||||
visible_devices = [v for v in old_value.split(",") if v != ""]
|
||||
monkeypatch.setenv(env_key, visible_devices[0] if visible_devices else "0")
|
||||
else:
|
||||
monkeypatch.setenv(env_key, "0")
|
||||
if CURRENT_DEVICE == "cuda":
|
||||
monkeypatch.setattr(torch.cuda, "device_count", lambda: 1)
|
||||
elif CURRENT_DEVICE == "npu":
|
||||
monkeypatch.setattr(torch.npu, "device_count", lambda: 1)
|
||||
|
||||
Reference in New Issue
Block a user