Skip to content

[CI] Improve workflows #149

[CI] Improve workflows

[CI] Improve workflows #149

Workflow file for this run

name: Unit tests with DeepSpeed on Ascend NPU
on:
workflow_dispatch:
pull_request:
paths:
- '.github/workflows/deepspeed.yaml'
- 'requirements/**'
schedule:
- cron: "0 0 * * *"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
jobs:
deepspeed-ut:
if: ${{ github.repository_owner == 'Ascend' }}
name: Run unit tests with DeepSpeed
runs-on: [self-hosted, ascend, npu]
container:
image: ascendai/cann:latest
volumes:
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
- /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
- /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
- /etc/ascend_install.info:/etc/ascend_install.info
options: >-
--network host
--device /dev/davinci0
--device /dev/davinci_manager
--device /dev/devmm_svm
--device /dev/hisi_hdc
steps:
- name: Show NPU info
run: |
npu-smi info
- name: Config mirrors
run: |
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
- name: Install system dependencies
run: |
apt-get update
apt-get install -y \
git gcc g++ make cmake ninja-build
- name: Checkout
uses: actions/checkout@v4
- name: Install pytorch
run: |
pip install \
torch==2.2.0 \
torch_npu==2.2.0 \
torchvision==0.17.0 \
torchaudio==2.2.0 \
numpy==1.26.4 \
cloudpickle \
tornado \
ml-dtypes
python << EOF
if __name__ == '__main__':
import torch
import torch_npu
torch_npu.npu.set_device("npu:0")
print(f"Device Name: {torch.npu.get_device_name(0)}")
print(f"Device Count: {torch.npu.device_count()}")
print(f"Device Available: {torch.npu.is_available()}")
EOF
- name: Checkout transformers
uses: actions/checkout@v4
with:
repository: huggingface/transformers
path: transformers
- name: Install transformers
working-directory: transformers
run: |
pip install .
- name: Checkout deepspeed
uses: actions/checkout@v4
with:
repository: microsoft/DeepSpeed
path: deepspeed
- name: Install deepspeed dependencies
run: |
pip install -r requirements/requirements_deepspeed.txt
- name: Install deepspeed
working-directory: deepspeed
run: |
pip install .[1bit,autotuning,inf]
ds_report
- name: Show environment info
run: |
pip list
- name: Run unit tests
working-directory: deepspeed/tests/unit
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
pytest --verbose accelerator/*
pytest --verbose autotuning/*
pytest --verbose checkpoint/test_reshape_checkpoint.py
pytest --verbose checkpoint/test_moe_checkpoint.py
pytest --verbose checkpoint/test_shared_weights.py
pytest --verbose launcher/test_ds_arguments.py launcher/test_run.py
pytest --verbose model_parallelism/*
pytest --verbose moe/test_moe_tp.py
pytest --verbose monitor/*
pytest --verbose utils/*
pytest --verbose runtime/test_ds_config_model.py
pytest --verbose runtime/pipe/test_pipe_schedule.py
pytest --verbose runtime/zero/test_zero_config.py
pytest --verbose runtime/zero/test_zero_tiled.py
pytest --verbose runtime/zero/test_zeropp.py
pytest --verbose runtime/test_autocast.py
pytest --verbose runtime/test_data.py
pytest --verbose runtime/test_runtime_utils.py
pytest --verbose runtime/activation_checkpointing/*
pytest --verbose runtime/utils/*
pytest --verbose runtime/zero/test_zero_dynamic_class.py