diff --git a/.github/workflows/deepspeed.yaml b/.github/workflows/deepspeed.yaml index d423ad708..80b20f381 100644 --- a/.github/workflows/deepspeed.yaml +++ b/.github/workflows/deepspeed.yaml @@ -7,13 +7,13 @@ on: workflow_dispatch: pull_request: paths: - - '.github/workflows/deepspeed.yml' + - '.github/workflows/deepspeed.yaml' - 'requirements/**' schedule: - cron: "0 0 * * *" push: paths: - - '.github/workflows/deepspeed.yml' + - '.github/workflows/deepspeed.yaml' concurrency: @@ -39,21 +39,13 @@ jobs: - /etc/ascend_install.info:/etc/ascend_install.info options: --network host --name deepspeed_unit-tests - --device /dev/davinci5 + --device /dev/davinci4 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc --shm-size "20g" --entrypoint /bin/bash - - env: - PT_HPU_LAZY_MODE: 0 - TORCHINDUCTOR_COMPILE_THREADS: 1 - TEST_LIST: | - test_accelerator.py - test_autotuning.py - test_compression.py - + steps: - uses: actions/checkout@v4 @@ -61,10 +53,12 @@ jobs: run: | npu-smi info apt-get update + pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple source /root/.bashrc - pip install torch==2.2.0 torchvision==0.17.0 torch_npu==2.2.0 numpy==1.26.4 cloudpickle tornado ml-dtypes -i https://pypi.tuna.tsinghua.edu.cn/simple + pip install torch==2.2.0 torchvision==0.17.0 torch_npu==2.2.0 torchaudio==2.2.0 numpy==1.26.4 cloudpickle tornado ml-dtypes + python --version python << EOF if __name__ == '__main__': import torch @@ -74,23 +68,17 @@ jobs: print(f"Device Count: {torch.npu.device_count()}") print(f"Device Available: {torch.npu.is_available()}") EOF - - # - name: Install transformers - # run: | - # source /root/.bashrc - # echo "y" | apt-get install git - # git clone https://github.com/huggingface/transformers - # cd transformers - # git rev-parse --short HEAD - # pip install . -i https://pypi.tuna.tsinghua.edu.cn/simple - + - name: Install deepspeed run: | source /root/.bashrc echo "y" | apt-get install git git clone --depth=1 https://github.com/microsoft/DeepSpeed.git + pip install -r requirements/requirements_deepspeed.txt cd DeepSpeed - pip install .[dev,autotuning] -i https://pypi.tuna.tsinghua.edu.cn/simple + pip install . + + print("run ds_report...") ds_report - name: Python environment diff --git a/README.md b/README.md index 93ccb298e..780ec34e5 100644 --- a/README.md +++ b/README.md @@ -44,5 +44,20 @@ This [PR](https://github.com/microsoft/onnxruntime/pull/15833) refactored the Ex Update on 2023.06.08 This [PR](https://github.com/microsoft/onnxruntime/pull/14731) introduced a missing registration of CANN Identity operator for version greater than 14. It has been fixed in this [PR](https://github.com/microsoft/onnxruntime/pull/16210). +## Deepspeed Ascend CI +The Deepspeed source code is from `main` branch of `microsoft/deepspeed` and will be run and tested daily with Ascend related. + +------------------------------------------------------------ + +| Key | Value | +| :---: | :---: | +| CPU | Arrch64 | +| NPU | Ascend910B | +| OS | Ubantu | +| Period | UTC 1200 daily | +| Branch | main | +| Status | ![Deepspeed](https://github.com/Ascend/Ascend-CI/actions/workflows/deepspeed.yaml/badge.svg) | +| Recheck By Hand | comment 'recheck' in any issue | + ## Pytorch Ascend CI TBD diff --git a/requirements/requirements_deepspeed.txt b/requirements/requirements_deepspeed.txt new file mode 100644 index 000000000..66cc83714 --- /dev/null +++ b/requirements/requirements_deepspeed.txt @@ -0,0 +1,28 @@ +accelerate +clang-format==18.1.3 +comet_ml>=3.41.0 +docutils<0.18 +future +importlib-metadata>=4 +mup +pre-commit>=2.20.0 +pytest>=7.2.0 +pytest-forked +pytest-randomly +pytest-xdist +qtorch==0.3.0 +recommonmark +sphinx +sphinx-rtd-theme +tensorboard +torchvision +transformers>=4.39.0 +wandb +google +lm-eval==0.3.0 +protobuf +qtorch +safetensors +sentencepiece +transformers>=4.32.1 +tabulate \ No newline at end of file