Skip to content

Commit

Permalink
add requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
sjh committed Nov 4, 2024
1 parent 241369b commit d92bea8
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 31 deletions.
85 changes: 54 additions & 31 deletions .github/workflows/deepspeed.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ on:
workflow_dispatch:
pull_request:
paths:
- '.github/workflows/deepspeed.yml'
- '.github/workflows/deepspeed.yaml'
- '.github/workflows/deepspeed.yaml'
- 'requirements/**'
schedule:
- cron: "0 0 * * *"
push:
paths:
- '.github/workflows/deepspeed.yml'
- '.github/workflows/deepspeed.yaml'
- '.github/workflows/deepspeed.yaml'


concurrency:
Expand All @@ -39,31 +41,26 @@ jobs:
- /etc/ascend_install.info:/etc/ascend_install.info
options: --network host
--name deepspeed_unit-tests
--device /dev/davinci5
--device /dev/davinci6
--device /dev/davinci_manager
--device /dev/devmm_svm
--device /dev/hisi_hdc
--shm-size "20g"
--entrypoint /bin/bash

env:
PT_HPU_LAZY_MODE: 0
TORCHINDUCTOR_COMPILE_THREADS: 1
TEST_LIST: |
test_accelerator.py
test_autotuning.py
test_compression.py


steps:
- uses: actions/checkout@v4

- name: Install pytorch
run: |
npu-smi info
apt-get update
apt-get update
apt-get install sudo
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
source /root/.bashrc
pip install torch==2.2.0 torchvision==0.17.0 torch_npu==2.2.0 numpy==1.26.4 cloudpickle tornado ml-dtypes -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install torch==2.2.0 torchvision==0.17.0 torch_npu==2.2.0 torchaudio==2.2.0 numpy==1.26.4 cloudpickle tornado ml-dtypes
python << EOF
if __name__ == '__main__':
Expand All @@ -74,24 +71,30 @@ jobs:
print(f"Device Count: {torch.npu.device_count()}")
print(f"Device Available: {torch.npu.is_available()}")
EOF
# - name: Install transformers
# run: |
# source /root/.bashrc
# echo "y" | apt-get install git
# git clone https://github.com/huggingface/transformers
# cd transformers
# git rev-parse --short HEAD
# pip install . -i https://pypi.tuna.tsinghua.edu.cn/simple

- name: Install deepspeed
- name: Install transformers
run: |
source /root/.bashrc
echo "y" | apt-get install git
git clone --depth=1 https://github.com/microsoft/DeepSpeed.git
cd DeepSpeed
pip install .[dev,autotuning] -i https://pypi.tuna.tsinghua.edu.cn/simple
ds_report
git clone https://github.com/huggingface/transformers
cd transformers
git rev-parse --short HEAD
pip install .
- name: Install deepspeed
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 3
retry_on: error
command: |
source /root/.bashrc
git clone --depth=1 https://github.com/microsoft/DeepSpeed.git
pip install -r requirements/requirements_deepspeed.txt
cd DeepSpeed
pip install .[1bit,autotuning,inf]
ds_report

- name: Python environment
run: |
Expand All @@ -102,9 +105,29 @@ jobs:
run: |
source /root/.bashrc
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd DeepSpeed/tests/unit/
cd /DeepSpeed
pytest --verbose tests/*
pytest --verbose accelerator/*
pytest --verbose autotuning/*
pytest --verbose checkpoint/test_reshape_checkpoint.py
pytest --verbose checkpoint/test_moe_checkpoint.py
pytest --verbose checkpoint/test_shared_weights.py
pytest --verbose launcher/test_ds_arguments.py launcher/test_run.py
pytest --verbose model_parallelism/*
pytest --verbose moe/test_moe_tp.py
pytest --verbose monitor/*
pytest --verbose utils/*
pytest --verbose runtime/test_ds_config_model.py
pytest --verbose runtime/pipe/test_pipe_schedule.py
pytest --verbose runtime/zero/test_zero_config.py
pytest --verbose runtime/zero/test_zero_tiled.py
pytest --verbose runtime/zero/test_zeropp.py
pytest --verbose runtime/test_autocast.py
pytest --verbose runtime/test_data.py
pytest --verbose runtime/test_runtime_utils.py
pytest --verbose runtime/activation_checkpointing/*
pytest --verbose runtime/utils/*
pytest --verbose runtime/zero/test_zero_dynamic_class.py
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,20 @@ This [PR](https://github.com/microsoft/onnxruntime/pull/15833) refactored the Ex
Update on 2023.06.08
This [PR](https://github.com/microsoft/onnxruntime/pull/14731) introduced a missing registration of CANN Identity operator for version greater than 14. It has been fixed in this [PR](https://github.com/microsoft/onnxruntime/pull/16210).

## Deepspeed Ascend CI
The Deepspeed source code is from `main` branch of `microsoft/deepspeed` and will be run and tested daily with Ascend related.

------------------------------------------------------------

| Key | Value |
| :---: | :---: |
| CPU | Arrch64 |
| NPU | Ascend910B |
| OS | Ubantu |
| Period | UTC 1200 daily |
| Branch | main |
| Status | ![Deepspeed](https://github.com/Ascend/Ascend-CI/actions/workflows/deepspeed.yaml/badge.svg) |
| Recheck By Hand | comment 'recheck' in any issue |

## Pytorch Ascend CI
TBD
29 changes: 29 additions & 0 deletions requirements/requirements_deepspeed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
accelerate
clang-format==18.1.3
comet_ml>=3.41.0
# deepspeed-kernels ; sys_platform == 'linux'
docutils<0.18
future
importlib-metadata>=4
mup
pre-commit>=2.20.0
pytest>=7.2.0
pytest-forked
pytest-randomly
pytest-xdist
qtorch==0.3.0
recommonmark
sphinx
sphinx-rtd-theme
tensorboard
torchvision
transformers>=4.39.0
wandb
# google
# lm-eval==0.3.0
# protobuf
# qtorch
# safetensors
# sentencepiece
# transformers>=4.32.1
# tabulate

0 comments on commit d92bea8

Please sign in to comment.