[CI] Improve workflows #148
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit tests with DeepSpeed on Ascend NPU | |
on: | |
workflow_dispatch: | |
pull_request: | |
paths: | |
- '.github/workflows/deepspeed.yaml' | |
- 'requirements/**' | |
schedule: | |
- cron: "0 0 * * *" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly | |
# declared as "shell: bash -el {0}" on steps that need to be properly activated. | |
# It's used to activate ascend-toolkit environment variables. | |
defaults: | |
run: | |
shell: bash -el {0} | |
jobs: | |
deepspeed-ut: | |
if: ${{ github.repository_owner == 'Ascend' }} | |
name: Run unit tests with DeepSpeed | |
runs-on: [self-hosted, ascend, npu] | |
container: | |
image: ascendai/cann:latest | |
volumes: | |
- /usr/local/dcmi:/usr/local/dcmi | |
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi | |
- /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ | |
- /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info | |
- /etc/ascend_install.info:/etc/ascend_install.info | |
options: >- | |
--network host | |
--device /dev/davinci0 | |
--device /dev/davinci_manager | |
--device /dev/devmm_svm | |
--device /dev/hisi_hdc | |
steps: | |
- name: Show NPU info | |
run: | | |
npu-smi info | |
- name: Config mirrors | |
run: | | |
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list | |
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple | |
- name: Install system dependencies | |
run: | | |
apt-get update | |
apt-get install -y \ | |
git gcc g++ make cmake ninja-build | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Install pytorch | |
run: | | |
pip install \ | |
torch==2.2.0 \ | |
torch_npu==2.2.0 \ | |
torchvision==0.17.0 \ | |
torchaudio==2.2.0 \ | |
numpy==1.26.4 \ | |
cloudpickle \ | |
tornado \ | |
ml-dtypes | |
python << EOF | |
if __name__ == '__main__': | |
import torch | |
import torch_npu | |
torch_npu.npu.set_device("npu:0") | |
print(f"Device Name: {torch.npu.get_device_name(0)}") | |
print(f"Device Count: {torch.npu.device_count()}") | |
print(f"Device Available: {torch.npu.is_available()}") | |
EOF | |
- name: Checkout transformers | |
uses: actions/checkout@v4 | |
with: | |
repository: huggingface/transformers | |
path: transformers | |
- name: Install transformers | |
working-directory: transformers | |
run: | | |
pip install . | |
- name: Checkout deepspeed | |
uses: actions/checkout@v4 | |
with: | |
repository: microsoft/DeepSpeed | |
path: deepspeed | |
- name: Install deepspeed dependencies | |
run: | | |
pip install -r requirements/requirements_deepspeed.txt | |
- name: Install deepspeed | |
working-directory: deepspeed | |
run: | | |
pip install .[1bit,autotuning,inf] | |
ds_report | |
- name: Show environment info | |
run: | | |
pip list | |
- name: Run unit tests | |
working-directory: deepspeed/tests/unit | |
run: | | |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch | |
pytest --verbose accelerator/* | |
pytest --verbose autotuning/* | |
pytest --verbose checkpoint/test_reshape_checkpoint.py | |
pytest --verbose checkpoint/test_moe_checkpoint.py | |
pytest --verbose checkpoint/test_shared_weights.py | |
pytest --verbose launcher/test_ds_arguments.py launcher/test_run.py | |
pytest --verbose model_parallelism/* | |
pytest --verbose moe/test_moe_tp.py | |
pytest --verbose monitor/* | |
pytest --verbose utils/* | |
pytest --verbose runtime/test_ds_config_model.py | |
pytest --verbose runtime/pipe/test_pipe_schedule.py | |
pytest --verbose runtime/zero/test_zero_config.py | |
pytest --verbose runtime/zero/test_zero_tiled.py | |
pytest --verbose runtime/zero/test_zeropp.py | |
pytest --verbose runtime/test_autocast.py | |
pytest --verbose runtime/test_data.py | |
pytest --verbose runtime/test_runtime_utils.py | |
pytest --verbose runtime/activation_checkpointing/* | |
pytest --verbose runtime/utils/* | |
pytest --verbose runtime/zero/test_zero_dynamic_class.py |