-
Notifications
You must be signed in to change notification settings - Fork 180
181 lines (166 loc) · 8.09 KB
/
build-rapids.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
name: Build all RAPIDS repositories
on:
workflow_call:
inputs:
enable_slack_alerts:
description: "If true, a message will be posted to the CCCL GHA CI Alert channel if the workflow fails."
required: false
default: false
type: boolean
jobs:
check-event:
name: Check GH Event
runs-on: ubuntu-latest
outputs:
ok: ${{ steps.check_gh_event.outputs.ok }}
steps:
- id: check_gh_event
name: Check GH Event
shell: bash
run: |
[[ '${{ github.event_name }}' == 'push' && '${{ github.repository }}' == 'NVIDIA/cccl' ]] || \
[[ '${{ github.event_name }}' == 'schedule' && '${{ github.repository }}' == 'NVIDIA/cccl' ]] || \
[[ '${{ github.event_name }}' == 'workflow_dispatch' && '${{ github.repository }}' == 'NVIDIA/cccl' ]] || \
[[ '${{ github.event_name }}' == 'pull_request' && '${{ github.repository }}' != 'NVIDIA/cccl' ]] \
&& echo "ok=true" | tee -a $GITHUB_OUTPUT \
|| echo "ok=false" | tee -a $GITHUB_OUTPUT;
build-rapids:
name: "${{ matrix.libs }}"
if: needs.check-event.outputs.ok == 'true'
needs: check-event
runs-on: ${{ fromJSON(github.repository != 'NVIDIA/cccl' && '"ubuntu-latest"' || '"linux-amd64-cpu32"') }}
strategy:
fail-fast: false
matrix:
include:
- { cuda: '12.5', libs: 'rmm kvikio cudf cudf_kafka cuspatial' }
- { cuda: '12.5', libs: 'rmm ucxx raft cuvs cumlprims_mg cuml' }
- { cuda: '12.5', libs: 'rmm ucxx raft cugraph-ops cugraph cugraph-gnn'}
permissions:
id-token: write
contents: read
steps:
- name: Checkout repo
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Add NVCC problem matcher
run: echo "::add-matcher::$(pwd)/.github/problem-matchers/problem-matcher.json"
- uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-NVIDIA
aws-region: us-east-2
role-duration-seconds: 43200 # 12h
- name: Run command # Do not change this step's name, it is checked in parse-job-times.py
env:
CI: true
RAPIDS_LIBS: ${{ matrix.libs }}
# Uncomment any of these to customize the git repo and branch for a RAPIDS lib:
# RAPIDS_cmake_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_cudf_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_cudf_kafka_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_cugraph_ops_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_cugraph_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_cugraph_gnn_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_cuml_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_cumlprims_mg_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_cuspatial_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_cuvs_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_kvikio_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_raft_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_rmm_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-25.02"}'
# RAPIDS_ucxx_GIT_REPO: '{"upstream": "rapidsai", "tag": "branch-0.42"}'
run: |
cat <<"EOF" > "$RUNNER_TEMP/ci-entrypoint.sh"
#! /usr/bin/env bash
# Start the ssh-agent and add the repo deploy keys
if ! pgrep ssh-agent >/dev/null 2>&1; then eval "$(ssh-agent -s)"; fi
ssh-add - <<< '${{ secrets.RAPIDSAI_CUMLPRIMS_DEPLOY_KEY }}'
ssh-add - <<< '${{ secrets.RAPIDSAI_CUGRAPH_OPS_DEPLOY_KEY }}'
devcontainer-utils-init-ssh-deploy-keys || true
exec "$@"
EOF
cat <<"EOF" > "$RUNNER_TEMP/ci.sh"
#! /usr/bin/env bash
set -eo pipefail
declare -a failures
declare -A failures_map
_print_err_exit_msg() {
local code=$?
if test $code -ne 0; then
echo "::error:: Failures: ${failures[*]}"
echo -e "::group::️❗ \e[1;31mInstructions to Reproduce CI Failure Locally\e[0m"
echo "::error:: To replicate this failure locally, follow the steps below:"
echo "1. Clone the repository, and navigate to the correct branch and commit:"
echo " git clone --branch $GITHUB_REF_NAME --single-branch https://github.com/$GITHUB_REPOSITORY.git && cd $(echo $GITHUB_REPOSITORY | cut -d'/' -f2) && git checkout $GITHUB_SHA"
echo ""
echo "2. Run the failed command inside the same Docker container used by this CI job:"
cat <<____EOF
RAPIDS_LIBS='${RAPIDS_LIBS}'$(for lib in cmake ${RAPIDS_LIBS}; do var=RAPIDS_${lib//-/_}_GIT_REPO; if test -v "$var" && test -n "${!var}"; then echo -n " $var='${!var}'"; fi; done) \\
.devcontainer/launch.sh -d -c ${{matrix.cuda}} -H rapids-conda -- ./ci/rapids/rapids-entrypoint.sh \\
/bin/bash -li -c 'uninstall-all -j -qqq && clean-all -j && build-all -j -v || exec /bin/bash -li'
____EOF
echo ""
echo "For additional information, see:"
echo " - DevContainer Documentation: https://github.com/NVIDIA/cccl/blob/main/.devcontainer/README.md"
echo " - Continuous Integration (CI) Overview: https://github.com/NVIDIA/cccl/blob/main/ci-overview.md"
fi
exit $code
}
# Print failures and exit
trap '_print_err_exit_msg' EXIT;
. ~/cccl/ci/rapids/post-create-command.sh;
# Configure and build each lib with -DBUILD_TESTS=OFF, then again with -DBUILD_TESTS=ON
for RAPIDS_ENABLE_TESTS in OFF ON; do
_apply_manifest_modifications;
for lib in ${RAPIDS_LIBS}; do
sccache -z
if ! configure-${lib}-cpp || ! build-${lib}-cpp; then
if ! test -v failures_map["${lib}"]; then
failures+=("${lib}")
failures_map["${lib}"]=1
fi
fi
sccache --show-adv-stats
done
done
# Exit with error if any failures occurred
if test ${#failures[@]} -ne 0; then
exit 1
fi
EOF
chmod +x "$RUNNER_TEMP"/ci{,-entrypoint}.sh
.devcontainer/launch.sh \
--docker \
--cuda ${{matrix.cuda}} \
--host rapids-conda \
--env "AWS_ROLE_ARN=" \
--env "AWS_REGION=$AWS_REGION" \
--env "SCCACHE_REGION=$AWS_REGION" \
--env "AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID" \
--env "AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN" \
--env "AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY" \
--env "GITHUB_SHA=$GITHUB_SHA" \
--env "GITHUB_REF_NAME=$GITHUB_REF_NAME" \
--env "GITHUB_REPOSITORY=$GITHUB_REPOSITORY" \
--volume "$RUNNER_TEMP/ci.sh:/ci.sh" \
--volume "$RUNNER_TEMP/ci-entrypoint.sh:/ci-entrypoint.sh" \
-- /ci-entrypoint.sh ./ci/rapids/rapids-entrypoint.sh /ci.sh
notify-failure:
name: Notify Slack of RAPIDS failure
if: ${{ failure() && inputs.enable_slack_alerts }}
needs: build-rapids
runs-on: ubuntu-latest
steps:
- name: Notify
uses: slackapi/[email protected]
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_NOTIFIER_BOT_TOKEN }}
WORKFLOW_TYPE: ${{ github.workflow }}
SUMMARY_URL: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}
with:
channel-id: ${{ secrets.SLACK_CHANNEL_CI_ALERT }}
slack-message: |
RAPIDS build in workflow '${{ env.WORKFLOW_TYPE }}' failed.
Details: ${{ env.SUMMARY_URL }}