-
Notifications
You must be signed in to change notification settings - Fork 4
139 lines (122 loc) · 4.47 KB
/
deepspeed.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
name: Unit tests with DeepSpeed on Ascend NPU
on:
workflow_dispatch:
pull_request:
paths:
- '.github/workflows/deepspeed.yaml'
- 'requirements/**'
schedule:
- cron: "0 0 * * *"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
# It's used to activate ascend-toolkit environment variables.
defaults:
run:
shell: bash -el {0}
jobs:
deepspeed-ut:
if: ${{ github.repository_owner == 'Ascend' }}
name: Run unit tests with DeepSpeed
runs-on: [self-hosted, ascend, npu]
container:
image: ascendai/cann:latest
volumes:
- /usr/local/dcmi:/usr/local/dcmi
- /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
- /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
- /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
- /etc/ascend_install.info:/etc/ascend_install.info
options: >-
--network host
--device /dev/davinci0
--device /dev/davinci_manager
--device /dev/devmm_svm
--device /dev/hisi_hdc
steps:
- name: Show NPU info
run: |
npu-smi info
- name: Config mirrors
run: |
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
- name: Install system dependencies
run: |
apt-get update
apt-get install -y \
git gcc g++ make cmake ninja-build
- name: Checkout
uses: actions/checkout@v4
- name: Install pytorch
run: |
pip install \
torch==2.2.0 \
torch_npu==2.2.0 \
torchvision==0.17.0 \
torchaudio==2.2.0 \
numpy==1.26.4 \
cloudpickle \
tornado \
ml-dtypes
python << EOF
if __name__ == '__main__':
import torch
import torch_npu
torch_npu.npu.set_device("npu:0")
print(f"Device Name: {torch.npu.get_device_name(0)}")
print(f"Device Count: {torch.npu.device_count()}")
print(f"Device Available: {torch.npu.is_available()}")
EOF
- name: Checkout transformers
uses: actions/checkout@v4
with:
repository: huggingface/transformers
path: transformers
- name: Install transformers
working-directory: transformers
run: |
pip install .
- name: Checkout deepspeed
uses: actions/checkout@v4
with:
repository: microsoft/DeepSpeed
path: deepspeed
- name: Install deepspeed dependencies
run: |
pip install -r requirements/requirements_deepspeed.txt
- name: Install deepspeed
working-directory: deepspeed
run: |
pip install .[1bit,autotuning,inf]
ds_report
- name: Show environment info
run: |
pip list
- name: Run unit tests
working-directory: deepspeed/tests/unit
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
pytest --verbose accelerator/*
pytest --verbose autotuning/*
pytest --verbose checkpoint/test_reshape_checkpoint.py
pytest --verbose checkpoint/test_moe_checkpoint.py
pytest --verbose checkpoint/test_shared_weights.py
pytest --verbose launcher/test_ds_arguments.py launcher/test_run.py
pytest --verbose model_parallelism/*
pytest --verbose moe/test_moe_tp.py
pytest --verbose monitor/*
pytest --verbose utils/*
pytest --verbose runtime/test_ds_config_model.py
pytest --verbose runtime/pipe/test_pipe_schedule.py
pytest --verbose runtime/zero/test_zero_config.py
pytest --verbose runtime/zero/test_zero_tiled.py
pytest --verbose runtime/zero/test_zeropp.py
pytest --verbose runtime/test_autocast.py
pytest --verbose runtime/test_data.py
pytest --verbose runtime/test_runtime_utils.py
pytest --verbose runtime/activation_checkpointing/*
pytest --verbose runtime/utils/*
pytest --verbose runtime/zero/test_zero_dynamic_class.py