-
Notifications
You must be signed in to change notification settings - Fork 34
154 lines (132 loc) · 6.46 KB
/
validate_runner_status.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
---
# NOTE: ipatch, self-hosted runners are removed from runner registry if they don't connect to github in 14 days
# 1. check if remote machine hosting the virtual machine running the self-hosted runner is online
# 2. if remote box is online, check to see if vm service ie. vmmojave is up and running.
# a. status the systemd service for the vm
# b. use `nc` to verify the online status of the vm using its static ip (ping is NOT an option)
# c. if the vm for the self-hosted runner is online, verify online status of the self-hosted runner service/process
# 3. if one of our checks fails we need to send an email followed by an action / step to try
# and bring the service online, and then move to the next step.
# 4. setup a specific group on remote box and virtual-machine allowing an isolated $USER to run specific tasks for statusing and controlling runner related services.
# the way of thinking of this is to,
# 1 .first check if the runner is online, by querying the github api endpoint
# a1. if getting a successful online response from api take note
# b1. if runner is offline, run through the below steps/checks
name: validate self-hosted runner status
on:
# NOTE: run this action on request from github web UI
workflow_dispatch:
schedule:
# min hour day month day-of-week,1-6 contrab.guru
- cron: '1 4 */12 * *' # run task every 12 days
jobs:
validate-self-hosted-runners:
# a fast booting github-hosted runner
runs-on: ubuntu-latest
env:
vmmojave: 192.168.1.114
vmcatalina: 192.168.1.115
vmbigsur: 191.168.1.116
maintainer_emai: [email protected]
steps:
- name: Configure SSH for github hosted runner
id: configure_ssh
run: |
mkdir -p ~/.ssh/
echo "$SSH_KEY" > ~/.ssh/shrunserver.key
chmod 600 ~/.ssh/shrunserver.key
cat >>~/.ssh/config <<END
Host shrunserver
HostName $SSH_HOST
User $SSH_USER
IdentityFile ~/.ssh/shrunserver.key
StrictHostKeyChecking no
END
env:
SSH_USER: ${{ secrets.ARCHBOX_SERVER_USER }}
SSH_KEY: ${{ secrets.ACTIONS_SSH_PRIVATE_KEY }}
SSH_HOST: ${{ secrets.ARCHBOX_SERVER_IP }}
- name: Debug runner IPs
id: debug_runner_ips
run: |
echo "vmmojave: $vmmojave"
echo "vmcatalina: $vmcatalina"
echo "vmbigsur: $vmbigsur"
- name: test_runner_status
id: test_runner_status
run: |
for runner in "vmmojave" "vmcatalina" "vmbigsur"; do
# Access the IP address using indirect expansion
runner_ip="${!runner}"
echo "$runner_ip"
status_selfhosted_runner=$(curl -s -H "Authorization: Bearer ${{ secrets.HOMEBREW_GITHUB_API_TOKEN }}" \
https://api.github.com/repos/freecad/homebrew-freecad/actions/runners \
| jq -r ".runners[] | select(.name == \"$runner\") | .status // \"not found\"")
echo "$runner is $status_selfhosted_runner"
done
# NOTE: HOMEBREW_GITHUB_API_TOKEN needed in repo secrets, use web UI to add token
- name: check self-hosted runners status for freecad/homebrew-freecad
id: status_runners
run: |
for runner in "vmmojave" "vmcatalina" "vmbigsur"; do
# Access the IP address using indirect expansion
status_selfhosted_runner=$(curl -s -H "Authorization: Bearer ${{ secrets.HOMEBREW_GITHUB_API_TOKEN }}" \
https://api.github.com/repos/freecad/homebrew-freecad/actions/runners \
| jq -r ".runners[] | select(.name == \"$runner\") | .status // \"not found\"")
echo "$status_selfhosted_runner"
if [[ "$status_selfhosted_runner" == "online" ]]; then
echo "$runner is online ✅"
echo "${runner}_status=online" >> "$GITHUB_ENV"
else
echo "the github runner service for $runner is OFFLINE 🚫"
# Check reachability of host machine
if nc -v -z -w 5 "${{ secrets.ARCHBOX_SERVER_IP }}" 22; then
echo "Machine hosting the vm services for $runner is online ✅"
# NOTE: ipatch, run command on remote computer via ssh from github hosted runner
ssh shrunserver "date"
# ssh shrunserver "/usr/bin/sudo ls" # DEBUG
# echo "DEBUG $runner_ip" # DEBUG
if ssh shrunserver "nc -v -z -w 5 \$runner_ip\ 22 < /dev/null"; then
echo "virtual machine $runner appears to be online"
else
echo "could not reach the virtual machine $runner"
# NOTE: status the vm systemd service, if offline attempt to start the service
ssh shrunserver "/usr/bin/sudo systemctl restart \$runner\\" || echo "failed to restart service"
sleep 45
if ssh shrunserver "nc -v -z -w 5 \$runner_ip\ 22 < /dev/null"; then
echo "$runner appears to have come back online ✅"
else
echo "$runner is still NOT online 🚫"
echo "${runner}_status=down" >> "$GITHUB_ENV"
fi
fi
else
echo "can not connect to host machine running $runner virtual machine"
fi
fi
done
- name: Print vm_status variable
id: print_vm_status
run: |
echo "vmmojave status is ${{ env.vmmojave_status }}"
echo "vmcatalina status is ${{ env.vmcatalina_status }}"
echo "vmbigsur status is ${{ env.vmbigsur_status }}"
- name: Send email on failure to reach runner service
id: send_alert_email
if: ${{
env.vmmojave_status == 'down' ||
env.vmcatalina_status == 'down' ||
env.vmbigsur_status == 'down'
}}
uses: dawidd6/[email protected]
with:
server_address: smtp.gmail.com
server_port: 587
username: ${{ secrets.SMTP_USERNAME }}
password: ${{ secrets.SMTP_PASSWORD }}
subject: 'homebrew-freecad self-hosted runner is offline 🚫'
from: ${{ secrets.SMTP_USERNAME }}
to: ${{ env.maintainer_emai }}
body: >
One of the components related to the homebrew-freecad self-hosted runners is offline,
and can not be reached. Please follow up before runner expires from github registry.