From 23f3a3a31bba0db8cc077674475d24ec264b329e Mon Sep 17 00:00:00 2001 From: Joel Polizzi Date: Wed, 22 Jan 2025 16:21:30 -0800 Subject: [PATCH] Added automation support for AIMET PyPi fast release (#3737) - workflow run to manually trigger a build and deploy bi-weekly - add nightly pipeline, unit tests, acceptance tests, pypi deployment, etc Signed-off-by: Joel Polizzi --- .github/workflows/nightly-pipeline.yml | 162 ++++++++++++++ .github/workflows/pypi-release-pipeline.yml | 236 ++++++++++++++++++++ Jenkins/fast-release/Dockerfile.torch-gpu | 12 +- 3 files changed, 401 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/nightly-pipeline.yml create mode 100644 .github/workflows/pypi-release-pipeline.yml diff --git a/.github/workflows/nightly-pipeline.yml b/.github/workflows/nightly-pipeline.yml new file mode 100644 index 00000000000..151fe86f31c --- /dev/null +++ b/.github/workflows/nightly-pipeline.yml @@ -0,0 +1,162 @@ +name: Fast Release Nightly CI/CD Pipeline + +on: + schedule: + - cron: '0 0 * * *' + +jobs: + fast-release-docker-build: + if: github.server_url != 'https://github.com' + name: Docker build for torch-gpu fast release + runs-on: ubuntu-latest + steps: + - name: Install Tools + run: | + sudo apt update -qq + sudo apt install --no-install-recommends -y curl ca-certificates + sudo cp /tmp/certs/certificate-package.crt /usr/local/share/ca-certificates/certificate-package.crt + sudo update-ca-certificates + - uses: actions/checkout@v4 + with: + fetch-depth: 2 + - name: Check for Dockerfile Update + run: | + echo "docker-updated="$(if [ -n "$(git diff --name-only HEAD^ HEAD -- Jenkins/fast-release/Dockerfile.torch-gpu)" ]; then echo true; else echo false; fi) >> $GITHUB_ENV + - uses: ./.github/actions/docker-build-image + if: env.docker-updated == 'true' + with: + dockerfile: Jenkins/fast-release/Dockerfile.torch-gpu + docker-login: ${{ secrets.DOCKER_LOGIN }} + docker-password: ${{ secrets.DOCKER_CREDENTIALS }} + docker-registry: ${{ vars.DOCKER_REGISTRY }} + image-name: "${{ vars.DOCKER_IMAGE }}torch" + image-tag: "fast-release-latest" + build-args: | + REGISTRY=${{ vars.DOCKER_REGISTRY_TMP }}/ + PYTHON_VERSION=3.10 + BUILDKIT_INLINE_CACHE=1 + + build-pypi-whl: + if: github.server_url != 'https://github.com' + name: Build AIMET pypi wheel + runs-on: k8s-gpu + needs: [fast-release-docker-build] + env: + AIMET_VARIANT: torch-gpu + FAST_RELEASE: true + defaults: + run: + shell: bash + container: + image: "${{ vars.DOCKER_REGISTRY }}/${{ vars.DOCKER_IMAGE }}torch:fast-release-latest" + credentials: + username: ${{ secrets.DOCKER_LOGIN }} + password: ${{ secrets.DOCKER_CREDENTIALS }} + outputs: + version: ${{ steps.version.outputs.nightly_version }} + steps: + - uses: actions/checkout@v4 + - name: Install Tools + run: | + sudo apt-get update + sudo apt-get install python3 python3-pip -y + python3 -m pip install --upgrade pip + python3 -m pip install lastversion + - name: Format the Version String + id: version + run: | + set +e + CUR_VER=$(lastversion https://artifacts.codelinaro.org/ui/native/aimet/ 2>/dev/null) + if [ $? -ne 0 ]; then + CUR_VER=${{ vars.VERSION }} + fi + echo "Current Aimet-Torch Version: " $CUR_VER + IFS='.' read -r -a VERSION_SPLIT <<< "$CUR_VER" + VERSION_SPLIT[2]=$((VERSION_SPLIT[2] + 1)) + NIGHTLY_VERSION="${VERSION_SPLIT[0]}.${VERSION_SPLIT[1]}.b${VERSION_SPLIT[2]}" + echo "Updates Nightly Version: " $NIGHTLY_VERSION + echo "NIGHTLY_VERSION=$NIGHTLY_VERSION" >> $GITHUB_ENV + echo "nightly_version=$NIGHTLY_VERSION" >> $GITHUB_OUTPUT + unset IFS + - name: Make the Pypi Wheel + run: | + mkdir -p build/staging/universal + cmake -S . -B ./build -DENABLE_CUDA=ON -DENABLE_TORCH=ON -DENABLE_TENSORFLOW=OFF -DENABLE_ONNX=OFF -DPIP_INDEX=${{ vars.PYPI_INDEX }} -DSW_VERSION=${{ env.NIGHTLY_VERSION }} -DCMAKE_INSTALL_PREFIX="build/staging/universal" + cmake --build ./build --parallel 4 --target all --target install --target packageaimet + - uses: actions/upload-artifact@v3 + with: + name: aimet-torch-${{ env.NIGHTLY_VERSION }} + path: build/**/*.whl + if-no-files-found: error + retention-days: 1d + + pypi-unit-tests: + if: github.server_url != 'https://github.com' + name: AIMET Pypi Unit Test + runs-on: k8s-gpu + needs: [build-pypi-whl] + env: + pytest_github_report: true + pytest_use_zeros: true + VERSION: ${{needs.build-pypi-whl.outputs.version}} + defaults: + run: + shell: bash + container: + image: "${{ vars.DOCKER_REGISTRY_TMP }}/nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04" + credentials: + username: ${{ secrets.DOCKER_LOGIN }} + password: ${{ secrets.DOCKER_CREDENTIALS }} + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v2 + with: + name: aimet-torch-${{ env.VERSION }} + path: . + - name: Install Tools + run: | + apt-get update + apt-get install python3 python3-pip -y + python3 -m pip install --upgrade pip + python3 -m pip install packaging/dist/aimet_torch-*.whl + python3 -m pip install bs4 deepspeed onnxruntime peft pytest spconv transformers torch==2.2.2 torchvision==0.17.2 + python3 -m pip install pytorch-ignite --no-dependencies + - name: Run Unit Tests + run: pytest TrainingExtensions/torch/test/python/ + + pypi-acceptance-tests: + if: github.server_url != 'https://github.com' + name: AIMET Pypi Acceptance Tests + runs-on: k8s-gpu + needs: [build-pypi-whl] + env: + pytest_github_report: true + pytest_use_zeros: true + DEPENDENCY_DATA_PATH: /data + VERSION: ${{needs.build-pypi-whl.outputs.version}} + defaults: + run: + shell: bash + container: + image: "${{ vars.DOCKER_REGISTRY_TMP }}/nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04" + credentials: + username: ${{ secrets.DOCKER_LOGIN }} + password: ${{ secrets.DOCKER_CREDENTIALS }} + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v2 + with: + name: aimet-torch-${{ env.VERSION }} + path: . + - name: Install Tools + run: | + apt-get update + apt-get install python3 python3-pip -y + python3 -m pip install packaging/dist/aimet_torch-*.whl + python3 -m pip install bs4 deepspeed==0.15.2 peft pytest safetensors torch==2.2.2 torchvision==0.17.2 transformers==4.27.4 wget + python3 -m pip install pytorch-ignite --no-dependencies + - name: Verify Dependency Data + run: python3 NightlyTests/torch/dependencies.py NightlyTests/torch/resnet18_eval_scores.csv True + - name: Run Acceptance Tests + run: pytest NightlyTests/torch/ + diff --git a/.github/workflows/pypi-release-pipeline.yml b/.github/workflows/pypi-release-pipeline.yml new file mode 100644 index 00000000000..8cad6d74f23 --- /dev/null +++ b/.github/workflows/pypi-release-pipeline.yml @@ -0,0 +1,236 @@ +name: Fast Release Biweekly Release CI/CD Pipeline + +on: + workflow_dispatch: + inputs: + release-branch: + description: "Release branch to target on pipeline" + required: true + type: string + release-version: + description: "Release version to build" + required: true + type: string + deploy: + description: "Deploy to official pypi if true" + required: false + type: boolean + default: false + +jobs: + build-test-pypi-whl: + if: github.server_url != 'https://github.com' + name: Build AIMET test-pypi wheel + runs-on: k8s-gpu + env: + AIMET_VARIANT: torch-gpu + FAST_RELEASE: true + VERSION: ${{ inputs.release-version }} + defaults: + run: + shell: bash + container: + image: "${{ vars.DOCKER_REGISTRY }}/${{ vars.DOCKER_IMAGE }}torch:fast-release-latest" + credentials: + username: ${{ secrets.DOCKER_LOGIN }} + password: ${{ secrets.DOCKER_CREDENTIALS }} + outputs: + version: ${{ steps.version.outputs.dev_version }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.release-branch }} + + - name: Install Tools + run: | + sudo apt-get update + sudo apt-get install python3 python3-pip -y + python3 -m pip install --upgrade pip + python3 -m pip install twine + + - name: Format dev version + id: version + run: | + AVAILABLE_VERSION=$(pip index versions aimet-torch --index-url https://test.pypi.org/simple/ --pre 2>/dev/null | grep -oP '\d+\.\d+\.\d+\.dev\d+' | head -n 1) + LATEST_VERSION=$(pip index versions aimet-torch --index-url https://test.pypi.org/simple/ 2>/dev/null | grep -oP '\d+\.\d+\.\d+' | head -n 1) + if [[ $LATEST_VERSION == *${{ env.VERSION }}* ]]; then + if [ -z "$AVAILABLE_VERSION" ]; then + DEV_VER=${{ env.VERSION }}.dev1; + else + DEV_VER=${{ env.VERSION }}.dev$(echo $AVAILABLE_VERSION | awk -F. '{print $NF}' | grep -oE '[0-9]+' | awk '{print $1+1}'); + fi + else + DEV_VER=${{ env.VERSION }}.dev$(echo $AVAILABLE_VERSION | awk -F. '{print $NF}' | grep -oE '[0-9]+' | awk '{print $1+1}'); + fi + echo "Latest available dev version: $AVAILABLE_VERSION" + echo "Formatting Dev Version: $DEV_VER" + echo "AVAILABLE_VERSION=$AVAILABLE_VERSION" >> $GITHUB_ENV + echo "DEV_VER=$DEV_VER" >> $GITHUB_ENV + echo "dev_version=$DEV_VER" >> $GITHUB_OUTPUT + + - name: Make the Pypi Wheel + run: | + mkdir -p build/staging/universal + cmake -S . -B ./build -DENABLE_CUDA=ON -DENABLE_TORCH=ON -DENABLE_TENSORFLOW=OFF -DENABLE_ONNX=OFF -DPIP_INDEX=pypi -DSW_VERSION=${{ env.DEV_VER }} -DCMAKE_INSTALL_PREFIX="build/staging/universal" + cmake --build ./build --parallel 4 --target all --target install --target packageaimet --target upload + find . -name *.whl + echo "Retag the whl file" + WHL_PATH=$(pwd)/$(find . -name *.whl -printf '%P\n') + echo "WHL_PATH=$(pwd)/$(find . -name *.whl -printf '%P\n')" >> $GITHUB_ENV + wheel tags --python-tag=py38 --abi-tag=none --platform-tag=any $WHL_PATH + ls -al build/packaging/dist + + - name: Push to test-pypi + run: | + twine upload --verbose --repository testpypi --config-file ~/.pypirc build/packaging/dist/* + + - name: Verify the package is available + run: | + while true; do + sleep 10 + HEAD_VERSION=$(pip index versions aimet-torch --index-url https://test.pypi.org/simple/ --pre 2>/dev/null | grep -oP '\d+\.\d+\.\d+\.dev\d+' | head -n 1) + if [[ "$HEAD_VERSION" == "$DEV_VER" ]]; then + echo "uploaded $DEV_VER" + break + else + echo "upload pending..." + sleep 10 + fi + done + + - uses: actions/upload-artifact@v3 + with: + name: aimet-torch-${{ env.DEV_VER }} + path: ${{ env.WHL_PATH }} + if-no-files-found: error + retention-days: 1d + + pypi-unit-tests: + if: github.server_url != 'https://github.com' + name: AIMET Pypi Unit Test + runs-on: k8s-gpu + needs: [build-test-pypi-whl] + env: + pytest_github_report: true + pytest_use_zeros: true + BUILD_VERSION: ${{ needs.build-test-pypi-whl.outputs.version }} + defaults: + run: + shell: bash + container: + image: "${{ vars.DOCKER_REGISTRY_TMP }}/nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04" + credentials: + username: ${{ secrets.DOCKER_LOGIN }} + password: ${{ secrets.DOCKER_CREDENTIALS }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.release-branch }} + + - uses: actions/download-artifact@v2 + with: + name: aimet-torch-${{ env.BUILD_VERSION }} + path: . + + - name: Install Tools + run: | + apt-get update + apt-get install python3 python3-pip -y + python3 -m pip install --upgrade pip + python3 -m pip install aimet_torch-*.whl + python3 -m pip install bs4 deepspeed onnxruntime peft pytest spconv transformers torch==2.2.2 torchvision==0.17.2 + python3 -m pip install pytorch-ignite --no-dependencies + - name: Run Unit Tests + run: pytest TrainingExtensions/torch/test/python/ + + pypi-acceptance-tests: + if: github.server_url != 'https://github.com' + name: AIMET Pypi Acceptance Tests + runs-on: k8s-gpu + needs: [build-test-pypi-whl] + env: + pytest_github_report: true + pytest_use_zeros: true + DEPENDENCY_DATA_PATH: /data + BUILD_VERSION: ${{ needs.build-test-pypi-whl.outputs.version }} + defaults: + run: + shell: bash + container: + image: "${{ vars.DOCKER_REGISTRY_TMP }}/nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04" + credentials: + username: ${{ secrets.DOCKER_LOGIN }} + password: ${{ secrets.DOCKER_CREDENTIALS }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.release-branch }} + - uses: actions/download-artifact@v2 + with: + name: aimet-torch-${{ env.BUILD_VERSION }} + path: . + - name: Install Tools + run: | + apt-get update + apt-get install python3 python3-pip -y + python3 -m pip install --upgrade pip + python3 -m pip install aimet_torch-*.whl + python3 -m pip install bs4 deepspeed==0.15.2 onnxruntime peft pytest safetensors spconv transformers==4.27.4 torch==2.2.2 torchvision==0.17.2 wget + python3 -m pip install pytorch-ignite --no-dependencies + - name: Verify Dependency Data + run: python3 NightlyTests/torch/dependencies.py NightlyTests/torch/resnet18_eval_scores.csv True + - name: Run Acceptance Tests + run: pytest NightlyTests/torch/ + + build-pypi-whl: + if: github.server_url != 'https://github.com' + name: Promote AIMET pypi wheel + runs-on: k8s-gpu + needs: [build-test-pypi-whl, pypi-unit-tests, pypi-acceptance-tests] + env: + AIMET_VARIANT: torch-gpu + FAST_RELEASE: true + VERSION: ${{ inputs.release-version }} + BUILD_VERSION: ${{ needs.build-test-pypi-whl.outputs.version }} + DEPLOY: ${{ inputs.deploy }} + defaults: + run: + shell: bash + container: + image: "${{ vars.DOCKER_REGISTRY }}/${{ vars.DOCKER_IMAGE }}torch:fast-release-latest" + credentials: + username: ${{ secrets.DOCKER_LOGIN }} + password: ${{ secrets.DOCKER_CREDENTIALS }} + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ inputs.release-branch }} + + - name: Make the Pypi Wheel + run: | + mkdir -p build/staging/universal + cmake -S . -B ./build -DENABLE_CUDA=ON -DENABLE_TORCH=ON -DENABLE_TENSORFLOW=OFF -DENABLE_ONNX=OFF -DPIP_INDEX=pypi -DSW_VERSION=${{ env.VERSION }} -DCMAKE_INSTALL_PREFIX="build/staging/universal" + cmake --build ./build --parallel 4 --target all --target install --target packageaimet --target upload + echo "Retag the whl file" + WHL_PATH=$(pwd)/$(find . -name *.whl -printf '%P\n') + echo "WHL_PATH=$(pwd)/$(find . -name *.whl -printf '%P\n')" >> $GITHUB_ENV + wheel tags --python-tag=py38 --abi-tag=none --platform-tag=any $WHL_PATH + find . -name *.whl + + - name: Install Tools + run: | + sudo apt-get update + sudo apt-get install python3 python3-pip -y + python3 -m pip install --upgrade pip + python3 -m pip install twine + + - name: Push to pypi + run: | + case $DEPLOY in + (true) + echo "Deploy variable set to true, deploying. . . " + twine upload --verbose --repository pypi --config-file ~/.pypirc build/packaging/dist/* ;; + (false) + echo "Deploy variable is false, deployment for twine upload --verbose --repository pypi --config-file ~/.pypirc build/packaging/dist/* will not proceed." ;; + esac + diff --git a/Jenkins/fast-release/Dockerfile.torch-gpu b/Jenkins/fast-release/Dockerfile.torch-gpu index 505011806fe..2b17ae5ed27 100644 --- a/Jenkins/fast-release/Dockerfile.torch-gpu +++ b/Jenkins/fast-release/Dockerfile.torch-gpu @@ -37,7 +37,7 @@ # Docker image file to build and test AIMET for Tensorflow in a GPU environment ARG REGISTRY -FROM ${REGISTRY}nvidia/cuda:12.0.0-cudnn8-devel-ubuntu22.04 +FROM ${REGISTRY}nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 ARG DEBIAN_FRONTEND=noninteractive ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn @@ -67,16 +67,9 @@ RUN apt-get update > /dev/null && \ xauth > /dev/null && \ rm -rf /var/lib/apt/lists/* -# Install certificates +## Install certificates RUN sudo update-ca-certificates -# Modified version of bash.bashrc that adjusts the prompt -### COPY bash.bashrc /etc/ -### RUN chmod 644 /etc/bash.bashrc - -### COPY profile.global /usr/local/etc/ -### RUN chmod 555 /usr/local/etc/profile.global - # Add sudo support RUN echo "%users ALL = (ALL) NOPASSWD: ALL" >> /etc/sudoers @@ -212,3 +205,4 @@ RUN python3 -m pip --no-cache-dir install \ sphinx-rtd-theme \ sphinx-tabs \ torch==2.2 +