-
-
Notifications
You must be signed in to change notification settings - Fork 44
247 lines (214 loc) · 11.3 KB
/
pr_benchmark_updater.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
name: Update PR comments with benchmark
on:
# This number should correspond to the IGNORE_RUNS_OLDER_THAN value below.
# When setting up for the first time, use "on: push" instead of "on: schedule"
# and set IGNORE_RUNS_OLDER_THAN to a very high number until it runs once.
schedule:
- cron: '*/15 * * * *'
jobs:
pr_benchmark_updater:
runs-on: ubuntu-latest
steps:
- name: main
env:
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
WORKFLOW_NAME: "Run benchmark"
# the name of the artifact whose content comment published by PR. Must have a single markdown file inside.
MSG_ARTIFACT_NAME: "pr_message"
# How far back to look for finished runs, in minutes.
# Set to 10-20 minutes higher than cron's job frequency set above.
IGNORE_RUNS_OLDER_THAN: 30
# How far back to look for updated pull requests, in minutes.
# Should be bigger than IGNORE_RUNS_OLDER_THAN by the maximum time a pull request jobs may take
IGNORE_PRS_OLDER_THAN: 80
run: |
#
# Strategy:
# * get all recently updated pull requests
# * get all recent workflow runs
# * match pull requests and their current SHA with the last workflow run for the same SHA
# * for each found match of <pull-request-number> and <workflow-run-id> :
# * download artifact from the workflow run -- expects a single file with markdown content
# * look through existing PR comments to see if we have posted a comment before
# (uses a hidden magical header to identify our comment)
# * either create or update the comment with the new text (if changed)
#
export GITHUB_API="https://api.github.com/repos/$GITHUB_REPOSITORY"
export COMMENT_MAGIC_HEADER='<!--'" Do not edit. This comment will be auto-updated with artifact '$MSG_ARTIFACT_NAME' created by action '$WORKFLOW_NAME' -->"
# A useful wrapper around CURL
crl() {
curl --silent --show-error --location --retry 1 "${@:2}" \
-H "Accept: application/vnd.github.antiope-preview+json, application/vnd.github.v3+json" \
"$1"
}
auth_crl() {
crl "$1" -H "authorization: Bearer $GITHUB_TOKEN" "${@:2}"
}
#
# Parse current pull requests
#
# Get all pull requests, most recently updated first
# (this way we don't need to page through all of them)
# Filter out PRs that are older than $IGNORE_PRS_OLDER_THAN minutes
# Result is an object, mapping a "key" to the pull request number:
# {
# "nyurik/openmaptiles/nyurik-patch-1/4953dd2370b9988a7832d090b5e47b3cd867f594": 6,
# ...
# }
PULL_REQUESTS_RAW="$( crl "$GITHUB_API/pulls?sort=updated&direction=desc" )"
if ! PULL_REQUESTS="$(jq --arg IGNORE_PRS_OLDER_THAN "$IGNORE_PRS_OLDER_THAN" '
map(
# Only select unlocked pull requests updated within last $IGNORE_PRS_OLDER_THAN minutes
select(.locked==false
and (now - (.updated_at|fromdate)) / 60 < ($IGNORE_PRS_OLDER_THAN | tonumber))
# Prepare for "from_entries" by creating a key/value object
# The key is a combination of repository name, branch name, and latest SHA
| { key: (.head.repo.full_name + "/" + .head.ref + "/" + .head.sha), value: .number }
)
| from_entries
' <( echo "$PULL_REQUESTS_RAW" ) )"; then
echo "Error parsing pull requests"
echo "$PULL_REQUESTS_RAW"
exit 1
fi
# Count how many pull requests we should process, and exit early if there are none
PR_COUNT="$(jq 'length' <( echo "$PULL_REQUESTS" ) )"
if [ "$PR_COUNT" -eq 0 ]; then
echo "There are no pull requests updated in the last $IGNORE_PRS_OLDER_THAN minutes. Exiting."
exit
else
echo "$PR_COUNT pull requests have been updated in the last $IGNORE_PRS_OLDER_THAN minutes"
echo "$PULL_REQUESTS" | jq -r 'keys|.[]|" * " + .'
fi
#
# Resolve workflow name into workflow ID
#
WORKFLOW_ID="$(crl "$GITHUB_API/actions/workflows" \
| jq --arg WORKFLOW_NAME "$WORKFLOW_NAME" '
.workflows[] | select(.name == $WORKFLOW_NAME) | .id
')"
if [ -z "$WORKFLOW_ID" ]; then
echo "Unable to find workflow '$WORKFLOW_NAME' in $GITHUB_REPOSITORY"
exit 1
else
echo "Resolved workflow '$WORKFLOW_NAME' to ID $WORKFLOW_ID"
fi
#
# Match pull requests with the workflow runs
#
# Get all workflow runs that were triggered by pull requests
WORKFLOW_PR_RUNS="$(crl "$GITHUB_API/actions/workflows/${WORKFLOW_ID}/runs?event=pull_request")"
# For each workflow run, match it with the pull request to get the PR number
# A match is based on "source repository + branch + SHA" key
# In rare cases (e.g. force push to an older revision), there could be more than one match
# for a given PR number, so just use the most recent one.
# Result is a table (list of lists) - each row with PR number, JOB ID, and the above key
PR_JOB_MAP="$(jq --arg IGNORE_RUNS_OLDER_THAN "$IGNORE_RUNS_OLDER_THAN" '
# second input is the pull request map - use it to lookup PR numbers
input as $PULL_REQUESTS
| .workflow_runs
| map(
# Create a new object with the relevant values
{
id,
updated_at,
# create lookup key based on source repository + branch + SHA
key: (.head_repository.full_name + "/" + .head_branch + "/" + .head_sha),
# was this a successful run?
# do not include .conclusion=="success" because errors could also post messages
success: (.status=="completed")
}
# lookup PR number from $PULL_REQUESTS using the above key
| . += { pr_number: $PULL_REQUESTS[.key] }
# Remove runs that were not in the list of the PRs
| select(.pr_number)
)
# Keep just the most recent run per pull request
| group_by(.pr_number)
| map(
sort_by(.updated_at)
| last
# If the most recent run did not succeed, or if the run is too old, ignore it
| select(.success and (now - (.updated_at|fromdate)) / 60 < ($IGNORE_RUNS_OLDER_THAN | tonumber))
# Keep just the pull request number mapping to run ID
| { pr_number, id, key }
)
' <( echo "$WORKFLOW_PR_RUNS" ) <( echo "$PULL_REQUESTS" ) )"
# Count how many jobs we should process, and exit early if there are none
JOBS_COUNT="$(jq 'length' <( echo "$PR_JOB_MAP" ) )"
if [ "$JOBS_COUNT" -eq 0 ]; then
echo "There are no recent workflow job runs in the last $IGNORE_RUNS_OLDER_THAN minutes. Exiting."
exit
else
echo "$JOBS_COUNT '$WORKFLOW_NAME' jobs have been updated in the last $IGNORE_RUNS_OLDER_THAN minutes"
echo "$PR_JOB_MAP" | jq -r '.[] | " * PR #\(.pr_number) Job #\(.id) -- \(.key) "'
fi
#
# Iterate over the found pairs of PR number + run ID, and update them all
#
echo "$PR_JOB_MAP" | jq -r '.[] | [ .pr_number, .id, .key ] | @sh' | \
while read -r PR_NUMBER RUN_ID RUN_KEY; do
echo "Processing '$WORKFLOW_NAME' run #$RUN_ID for pull request #$PR_NUMBER $RUN_KEY..."
ARTIFACTS="$(crl "$GITHUB_API/actions/runs/$RUN_ID/artifacts")"
# Find the artifact download URL for the artifact with the expected name
ARTIFACT_URL="$(jq -r --arg MSG_ARTIFACT_NAME "$MSG_ARTIFACT_NAME" '
.artifacts
| map(select(.name == $MSG_ARTIFACT_NAME and .expired == false))
| first
| .archive_download_url
| select(.!=null)
' <( echo "$ARTIFACTS" ) )"
if [ -z "$ARTIFACT_URL" ]; then
echo "Unable to find an artifact named '$MSG_ARTIFACT_NAME' in workflow $RUN_ID (PR #$PR_NUMBER), skipping..."
continue
fi
echo "Downloading artifact $ARTIFACT_URL (assuming single text file per artifact)..."
if ! MESSAGE="$(auth_crl "$ARTIFACT_URL" | gunzip)"; then
echo "Unable to download or parse message from artifact '$MSG_ARTIFACT_NAME' in workflow $RUN_ID (PR #$PR_NUMBER), skipping..."
continue
fi
if [ -z "$MESSAGE" ]; then
echo "Empty message in artifact '$MSG_ARTIFACT_NAME' in workflow $RUN_ID (PR #$PR_NUMBER), skipping..."
continue
fi
# Create a message body by appending a magic header
# and stripping any starting and ending whitespace from the original message
MESSAGE_BODY="$(jq -n \
--arg COMMENT_MAGIC_HEADER "$COMMENT_MAGIC_HEADER" \
--arg MESSAGE "$MESSAGE" \
'{ body: ($COMMENT_MAGIC_HEADER + "\n" + ($MESSAGE | sub( "^[\\s\\p{Cc}]+"; "" ) | sub( "[\\s\\p{Cc}]+$"; "" ))) }' \
)"
EXISTING_PR_COMMENTS="$(crl "$GITHUB_API/issues/$PR_NUMBER/comments")"
# Get the comment URL for the first comment that begins with the magic header, or empty string
OLD_COMMENT="$(jq --arg COMMENT_MAGIC_HEADER "$COMMENT_MAGIC_HEADER" '
map(select(.body | startswith($COMMENT_MAGIC_HEADER)))
| first
| select(.!=null)
' <( echo "$EXISTING_PR_COMMENTS" ) )"
if [ -z "$OLD_COMMENT" ]; then
COMMENT_HTML_URL="$(auth_crl "$GITHUB_API/issues/$PR_NUMBER/comments" \
-X POST \
-H "Content-Type: application/json" \
--data "$MESSAGE_BODY" \
| jq -r '.html_url' )"
COMMENT_INFO="New comment $COMMENT_HTML_URL was created"
else
# Make sure the content of the message has changed
COMMENT_URL="$(jq -r '
(input | .body) as $body
| select(.body | . != $body)
| .url
' <( echo "$OLD_COMMENT" ) <( echo "$MESSAGE_BODY" ) )"
if [ -z "$COMMENT_URL" ]; then
echo "The message has already been posted from artifact '$MSG_ARTIFACT_NAME' in workflow $RUN_ID (PR #$PR_NUMBER), skipping..."
continue
fi
COMMENT_HTML_URL="$(auth_crl "$COMMENT_URL" \
-X PATCH \
-H "Content-Type: application/json" \
--data "$MESSAGE_BODY" \
| jq -r '.html_url' )"
COMMENT_INFO="Existing comment $COMMENT_HTML_URL was updated"
fi
echo "$COMMENT_INFO from workflow $WORKFLOW_NAME #$RUN_ID"
done