generated from Knowledge-Graph-Hub/kg-example
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathJenkinsfile
261 lines (236 loc) · 12.8 KB
/
Jenkinsfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
pipeline {
agent {
docker {
reuseNode false
image 'caufieldjh/kg-idg:4'
}
}
triggers{
cron('H H 1 1-12 *')
}
environment {
BUILDSTARTDATE = sh(script: "echo `date +%Y%m%d`", returnStdout: true).trim()
S3PROJECTDIR = 'kg-idg' // no trailing slash
// Distribution ID for the AWS CloudFront for this bucket
// used solely for invalidations
AWS_CLOUDFRONT_DISTRIBUTION_ID = 'EUVSWXZQBXCFP'
MERGEDKGNAME_BASE = "KG-IDG"
MERGEDKGNAME_GENERIC = "merged-kg"
}
options {
timestamps()
disableConcurrentBuilds()
}
stages {
// Very first: pause for a minute to give a chance to
// cancel and clean the workspace before use.
stage('Ready and clean') {
steps {
// Give us a minute to cancel if we want.
sleep time: 30, unit: 'SECONDS'
}
}
stage('Initialize') {
steps {
// print some info
dir('./gitrepo') {
sh 'env > env.txt'
sh 'echo $BRANCH_NAME > branch.txt'
sh 'echo "$BRANCH_NAME"'
sh 'cat env.txt'
sh 'cat branch.txt'
sh "echo $BUILDSTARTDATE > dow.txt"
sh "echo $BUILDSTARTDATE"
sh "echo $MERGEDKGNAME_BASE"
sh "echo $MERGEDKGNAME_GENERIC"
sh "python3.9 --version"
sh "id"
sh "whoami" // this should be jenkinsuser
// if the above fails, then the docker host didn't start the docker
// container as a user that this image knows about. This will
// likely cause lots of problems (like trying to write to $HOME
// directory that doesn't exist, etc), so we should fail here and
// have the user fix this
}
}
}
stage('Build kg_idg') {
steps {
dir('./gitrepo') {
git(
url: 'https://github.com/Knowledge-Graph-Hub/kg-idg',
branch: env.BRANCH_NAME
)
sh '/usr/bin/python3.9 -m venv venv'
sh '. venv/bin/activate'
// Start up the database platforms
// Starting may fail if resources aren't adequately available
// So just in case, we try to run the command a few times
sh 'for i in {1..5}; do sudo /etc/init.d/postgresql start && break || sleep 60; done'
sh 'for i in {1..5}; do sudo /etc/init.d/mysql start && break || sleep 60; done'
sh 'sudo /etc/init.d/postgresql status'
echo 'PostgreSQL server status:'
sh 'pg_isready -h localhost -p 5432'
sh 'sudo /etc/init.d/mysql status'
// Now move on to the actual install + reqs
sh './venv/bin/pip install .'
sh './venv/bin/pip install awscli boto3 s3cmd'
// Temporary - install ensmallen on its own
//sh './venv/bin/pip install ensmallen'
}
}
}
stage('Download') {
steps {
dir('./gitrepo') {
script {
// Verify that the project directory is defined, or it will make a mess
// when it uploads everything to the wrong directory
if (S3PROJECTDIR.replaceAll("\\s","") == '') {
error("Project name contains only whitespace. Will not continue.")
}
def run_py_dl = sh(
script: '. venv/bin/activate && python3.9 run.py download', returnStatus: true
)
if (run_py_dl == 0) {
if (env.BRANCH_NAME != 'master') { // upload raw to s3 if we're on correct branch
echo "Will not push if not on correct branch."
} else {
withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) {
sh '. venv/bin/activate && s3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text --cf-invalidate put -r data/raw s3://kg-hub-public-data/$S3PROJECTDIR/'
}
}
} else { // 'run.py download' failed - let's try to download last good copy of raw/ from s3 to data/
currentBuild.result = "UNSTABLE"
withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) {
sh 'rm -fr data/raw || true;'
sh 'mkdir -p data/raw || true'
sh '. venv/bin/activate && s3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/$S3PROJECTDIR/raw/ data/raw/'
}
}
}
}
}
}
stage('Transform') {
steps {
dir('./gitrepo') {
sh '. venv/bin/activate && env && python3.9 run.py transform'
}
}
}
stage('Merge') {
steps {
dir('./gitrepo') {
sh '. venv/bin/activate && python3.9 run.py merge -y merge.yaml'
sh 'cp merged_graph_stats.yaml merged_graph_stats_$BUILDSTARTDATE.yaml'
sh 'mv merged_graph_stats_$BUILDSTARTDATE.yaml data/merged/'
sh 'cd data/merged/ && tar -czvf merged-kg.tar.gz merged-kg_nodes.tsv merged-kg_edges.tsv merged_graph_stats_$BUILDSTARTDATE.yaml'
sh 'cd ../..'
}
}
}
//stage('Make blazegraph journal'){
// steps {
// dir('./gitrepo/blazegraph') {
// git(
// url: 'https://github.com/balhoff/blazegraph-runner.git',
// branch: 'master'
// )
// sh 'HOME=`pwd` && sbt stage' // set HOME here to prevent sbt from trying to make dir .cache in /
// sh 'ls -lhd ../data/merged/${MERGEDKGNAME_BASE}.nt.gz'
// sh 'pigz -f -d ../data/merged/${MERGEDKGNAME_BASE}.nt.gz'
// sh 'export JAVA_OPTS=-Xmx128G && ./target/universal/stage/bin/blazegraph-runner load --informat=ntriples --journal=../data/merged/${MERGEDKGNAME_BASE}.jnl --use-ontology-graph=true ../data/merged/${MERGEDKGNAME_BASE}.nt'
// sh 'pigz -f ../data/merged/${MERGEDKGNAME_BASE}.jnl'
// sh 'pigz -f ../data/merged/${MERGEDKGNAME_BASE}.nt'
// sh 'ls -lhd ../data/merged/${MERGEDKGNAME_BASE}.jnl.gz'
// }
// }
//}
stage('Publish') {
steps {
dir('./gitrepo') {
script {
// make sure we aren't going to clobber existing data
withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) {
REMOTE_BUILD_DIR_CONTENTS = sh (
script: '. venv/bin/activate && s3cmd -c $S3CMD_CFG ls s3://kg-hub-public-data/$S3PROJECTDIR/$BUILDSTARTDATE/',
returnStdout: true
).trim()
echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'"
if("${REMOTE_BUILD_DIR_CONTENTS}" != ''){
echo "Will not overwrite existing remote S3 directory: $S3PROJECTDIR/$BUILDSTARTDATE"
sh 'exit 1'
} else {
echo "remote directory $S3PROJECTDIR/$BUILDSTARTDATE is empty, proceeding"
}
}
if (env.BRANCH_NAME != 'master') {
echo "Will not push if not on correct branch."
} else {
withCredentials([
file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG'),
file(credentialsId: 'aws_kg_hub_push_json', variable: 'AWS_JSON'),
string(credentialsId: 'aws_kg_hub_access_key', variable: 'AWS_ACCESS_KEY_ID'),
string(credentialsId: 'aws_kg_hub_secret_key', variable: 'AWS_SECRET_ACCESS_KEY')]) {
//
// make $BUILDSTARTDATE/ directory and sync to s3 bucket
//
sh 'mkdir $BUILDSTARTDATE/'
//sh 'cp -p data/merged/${MERGEDKGNAME_BASE}.nt.gz $BUILDSTARTDATE/${MERGEDKGNAME_BASE}.nt.gz'
sh 'cp -p data/merged/merged-kg.tar.gz $BUILDSTARTDATE/${MERGEDKGNAME_BASE}.tar.gz'
//sh 'cp -p data/merged/${MERGEDKGNAME_BASE}.jnl.gz $BUILDSTARTDATE/${MERGEDKGNAME_BASE}.jnl.gz'
// transformed data
sh 'rm -fr data/transformed/.gitkeep'
sh 'cp -pr data/transformed $BUILDSTARTDATE/'
sh 'cp -pr data/raw $BUILDSTARTDATE/'
sh 'cp Jenkinsfile $BUILDSTARTDATE/'
// copy that NEAT config, too
// but update its buildname internally first
sh """ sed -i '/ - path:/ s/BUILDNAME/$BUILDSTARTDATE/' neat.yaml """
sh """ sed -i '/ s3_bucket_dir:/ s/kg-idg/$S3PROJECTDIR\\/$BUILDSTARTDATE\\/graph_ml/' neat.yaml """
sh 'cp neat.yaml $BUILDSTARTDATE/'
// stats dir
sh 'mkdir $BUILDSTARTDATE/stats/'
sh 'cp -p *_stats.yaml $BUILDSTARTDATE/stats/'
sh 'cp templates/README.build $BUILDSTARTDATE/README'
// build the index, then upload to remote
sh '. venv/bin/activate && multi_indexer -v --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/$S3PROJECTDIR/$BUILDSTARTDATE -x -u'
sh '. venv/bin/activate && s3cmd -c $S3CMD_CFG put -pr --acl-public --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/$S3PROJECTDIR/'
sh '. venv/bin/activate && s3cmd -c $S3CMD_CFG rm -r s3://kg-hub-public-data/$S3PROJECTDIR/current/'
sh '. venv/bin/activate && s3cmd -c $S3CMD_CFG put -pr --acl-public --cf-invalidate $BUILDSTARTDATE/* s3://kg-hub-public-data/$S3PROJECTDIR/current/'
// make index for project dir
sh '. venv/bin/activate && multi_indexer -v --prefix https://kg-hub.berkeleybop.io/$S3PROJECTDIR/ -b kg-hub-public-data -r $S3PROJECTDIR -x'
sh '. venv/bin/activate && s3cmd -c $S3CMD_CFG put -pr --acl-public --cf-invalidate ./index.html s3://kg-hub-public-data/$S3PROJECTDIR/'
// Invalidate the CDN now that the new files are up.
sh 'echo "[preview]" > ./awscli_config.txt && echo "cloudfront=true" >> ./awscli_config.txt'
sh '. venv/bin/activate && AWS_CONFIG_FILE=./awscli_config.txt python3.9 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_DISTRIBUTION_ID --paths "/*"'
// Should now appear at:
// https://kg-hub.berkeleybop.io/[artifact name]
}
}
}
}
}
}
}
post {
always {
echo 'In always'
echo 'Cleaning workspace...'
cleanWs()
}
success {
echo 'I succeeded!'
}
unstable {
echo 'I am unstable :/'
}
failure {
echo 'I failed :('
}
changed {
echo 'Things were different before...'
}
}
}