add npu examples

Former-commit-id: af343034dd
This commit is contained in:
hiyouga
2024-05-14 23:32:53 +08:00
parent e32a44fe6b
commit f5df1ceaf1
9 changed files with 103 additions and 19 deletions

View File

@@ -6,7 +6,7 @@ RANK=0
MASTER_ADDR=192.168.0.1
MASTER_PORT=29500
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.run \
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \
--nproc_per_node $NPROC_PER_NODE \
--nnodes $NNODES \
--node_rank $RANK \

View File

@@ -1,9 +1,15 @@
#!/bin/bash
NPROC_PER_NODE=4
NNODES=1
RANK=0
MASTER_ADDR=127.0.0.1
MASTER_PORT=29500
CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.run \
CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun \
--nproc_per_node $NPROC_PER_NODE \
--nnodes 1 \
--standalone \
--nnodes $NNODES \
--node_rank $RANK \
--master_addr $MASTER_ADDR \
--master_port $MASTER_PORT \
src/train.py examples/full_multi_gpu/llama3_full_sft.yaml