-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathingest_operations.py
444 lines (351 loc) · 15.8 KB
/
ingest_operations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
import connexion
from flask import Flask
import os
import re
import traceback
import urllib.parse
import auth
import katsu_ingest
import htsget_ingest
from opa_ingest import remove_user_from_dataset, add_user_to_dataset
import config
import tempfile
import uuid
import json
app = Flask(__name__)
ERROR_CODES = {
"SUCCESS": 0,
"UNAUTHORIZED": 1,
"VALIDATION": 2,
"PROGRAMEXISTS": 3,
"INTERNAL": 4,
"AUTHORIZATIONERR": 5
}
def generateResponse(result, response_code):
response_mapping = {
0: ("Success", 200),
1: ("Unauthorized", 403),
2: ("Validation error", 422),
3: ("Program exists", 422),
4: ("Internal CanDIG error", 500),
5: ("Authorization error", 401)
}
return {"result": result, "response_code": response_code,
"response_message": response_mapping[response_code][0]}, response_mapping[response_code][1]
def get_headers():
headers = {}
if "Authorization" not in connexion.request.headers:
return generateResponse("Bearer token required", ERROR_CODES["UNAUTHORIZED"])
try:
# New auth model
# refresh_token = connexion.request.headers["Authorization"].split("Bearer ")[1]
# token = auth.get_bearer_from_refresh(refresh_token)
if not connexion.request.headers["Authorization"].startswith("Bearer "):
return generateResponse("Invalid bearer token", ERROR_CODES["UNAUTHORIZED"])
token = connexion.request.headers["Authorization"].split("Bearer ")[1]
headers["Authorization"] = "Bearer %s" % token
except Exception as e:
if "Invalid bearer token" in str(e):
return generateResponse("Bearer token invalid or unauthorized", ERROR_CODES["UNAUTHORIZED"])
return generateResponse("Unknown error during authorization", ERROR_CODES["AUTHORIZATIONERR"])
headers["Content-Type"] = "application/json"
return headers
def check_default_site_admin(response):
if auth.is_default_site_admin_set():
if "warnings" not in response:
response["warnings"] = []
response["warnings"].append(f"Default site administrator {os.getenv('DEFAULT_SITE_ADMIN_USER')} is still configured. Use the /ingest/site-role/site_admin endpoint to set a different site admin.")
# API endpoints
def get_service_info():
return {
"id": "org.candig.ingest",
"name": "CanDIG Ingest Passthrough Service",
"description": "A microservice used as a processing intermediary for ingesting data into Katsu and htsget",
"organization": {
"name": "CanDIG",
"url": "https://www.distributedgenomics.ca"
},
"version": config.VERSION
}
####
# S3 credentials
####
async def add_s3_credential():
data = await connexion.request.json()
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
return auth.store_s3_credential(data["endpoint"], data["bucket"], data["access_key"], data["secret_key"], token)
@app.route('/s3-credential/endpoint/<path:endpoint_id>/bucket/<path:bucket_id>')
def get_s3_credential(endpoint_id, bucket_id):
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
endpoint_cleaned = re.sub(r"\W", "_", endpoint_id)
return auth.get_s3_credential(endpoint_cleaned, bucket_id, token)
@app.route('/s3-credential/endpoint/<path:endpoint_id>/bucket/<path:bucket_id>')
def delete_s3_credential(endpoint_id, bucket_id):
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
endpoint_cleaned = re.sub(r"\W", "_", endpoint_id)
return auth.remove_s3_credential(endpoint_cleaned, bucket_id, token)
####
# Site roles
####
@app.route('/site-role/<path:role_type>')
def list_role(role_type):
try:
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
result, status_code = auth.get_role_type_in_opa(role_type, token)
return result, status_code
except Exception as e:
return {"error": str(e)}, 500
@app.route('/site-role/<path:role_type>')
async def update_role(role_type):
role_members = await connexion.request.json()
try:
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
result, status_code = auth.set_role_type_in_opa(role_type, role_members, token)
return result, status_code
except Exception as e:
return {"error": str(e)}, 500
@app.route('/site-role/<path:role_type>/email/<path:email>')
def is_user_in_role(role_type, email):
try:
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
result, status_code = auth.get_role_type_in_opa(role_type, token)
if status_code == 200:
return (email in result[role_type]), 200
return result, status_code
except Exception as e:
return {"error": str(e)}, 500
@app.route('/site-role/<path:role_type>/email/<path:email>')
def add_user_to_role(role_type, email):
try:
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
result, status_code = auth.get_role_type_in_opa(role_type, token)
if status_code == 200:
if email not in result[role_type]:
result[role_type].append(email)
result, status_code = auth.set_role_type_in_opa(role_type, result[role_type], token)
return result, status_code
except Exception as e:
return {"error": str(e)}, 500
@app.route('/site-role/<path:role_type>/email/<path:email>')
def remove_user_from_role(role_type, email):
try:
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
result, status_code = auth.get_role_type_in_opa(role_type, token)
if status_code == 200:
if email in result[role_type]:
result[role_type].remove(email)
result, status_code = auth.set_role_type_in_opa(role_type, result[role_type], token)
else:
return {"error": f"User {email} not found in role {role_type}"}, 404
return result, status_code
except Exception as e:
return {"error": str(e)}, 500
####
# Data ingest
####
async def add_genomic_linkages():
dataset = await connexion.request.json()
do_not_index = bool(connexion.request.query_params.get("do_not_index", False))
headers = get_headers()
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
response, status_code = htsget_ingest.check_genomic_data(dataset, token)
if status_code == 200:
ingest_uuid = add_to_queue({"htsget": response, "do_not_index": do_not_index})
response = {"queue_id": ingest_uuid}
check_default_site_admin(response)
return response, status_code
async def add_clinical_donors():
dataset = await connexion.request.json()
batch_size = int(connexion.request.query_params.get("batch_size", 1000))
headers = get_headers()
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
response, status_code = katsu_ingest.prep_check_clinical_data(dataset, token, batch_size)
if status_code == 200:
ingest_uuid = add_to_queue({"katsu": response})
response = {"queue_id": ingest_uuid}
check_default_site_admin(response)
return response, status_code
def add_to_queue(ingest_json):
queue_id = str(uuid.uuid1())
with tempfile.NamedTemporaryFile(delete_on_close=False, mode="w") as f:
json.dump(ingest_json, f, indent=4)
os.rename(f.name, os.path.join(config.DAEMON_PATH, "to_ingest", queue_id))
results_path = os.path.join(config.DAEMON_PATH, "results", queue_id)
with open(results_path, "w") as f:
json.dump({"status": "still in queue"}, f)
return queue_id
@app.route('/status/<path:queue_id>')
def get_ingest_status(queue_id):
try:
results_path = os.path.join(config.DAEMON_PATH, "results", queue_id)
with open(results_path) as f:
json_data = json.load(f)
# os.remove(results_path)
return json_data, 200
except:
return {"error": f"no such queue_id {queue_id}"}, 404
####
# Program authorizations
####
def list_program_authorizations():
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
response, status_code = auth.list_programs_in_opa(token)
return response, status_code
async def add_program_authorization():
program = await connexion.request.json()
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
response, status_code = auth.add_program_to_opa(program, token)
check_default_site_admin(response)
return response, status_code
@app.route('/program/<path:program_id>')
def get_program_authorization(program_id):
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
response, status_code = auth.get_program_in_opa(program_id, token)
return response, status_code
@app.route('/program/<path:program_id>')
def remove_program(program_id):
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
response = {"errors": {}}
check_default_site_admin(response)
opa_response, opa_status = auth.remove_program_from_opa(program_id, token)
katsu_response = katsu_ingest.delete_program(program_id, token)
htsget_response = htsget_ingest.delete_program(program_id, token)
if opa_status == 404:
# htsget status is not included here because it doesn't have a 404 response
return {"message": f"Program {program_id} not found"}, 404
if opa_status != 200:
response["errors"]["opa"] = {"message": opa_response, "status_code": opa_status}
if katsu_response.status_code != 204 and katsu_response.status_code != 404:
response["errors"]["katsu"] = {"message": katsu_response.text, "status_code": katsu_response.status_code}
if htsget_response.status_code != 200:
response["errors"]["htsget"] = {"message": htsget_response.text, "status_code": htsget_response.status_code}
if len(response["errors"]) == 0:
response.pop("errors")
response["message"] = f"Program {program_id} successfully deleted"
return response, 200
return response, 500
####
# Pending users: approving a pending user creates a CanDIG-authorized user
####
def add_pending_user():
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
response, status_code = auth.add_pending_user_to_opa(token)
return response, status_code
def list_pending_users():
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
response, status_code = auth.list_pending_users_in_opa(token)
return {"results": response}, status_code
@app.route('/user/pending/<path:user_id>')
def approve_pending_user(user_id):
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
user_name = urllib.parse.unquote_plus(user_id)
response, status_code = auth.approve_pending_user_in_opa(user_name, token)
return response, status_code
@app.route('/user/pending/<path:user_id>')
def reject_pending_user(user_id):
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
user_name = urllib.parse.unquote_plus(user_id)
response, status_code = auth.reject_pending_user_in_opa(user_name, token)
return response, status_code
async def approve_pending_users():
users = await connexion.request.json()
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
rejected = []
for user_id in users:
response, status_code = auth.approve_pending_user_in_opa(user_id, token)
if status_code != 200:
rejected.append(user_id)
if len(rejected) > 0:
status_code = 401
response = {"message": f"The following requested user IDs could not be approved: {rejected}"}
return response, status_code
def clear_pending_users():
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
response, status_code = auth.clear_pending_users_in_opa(token)
return response, status_code
####
# DAC authorization for users
####
def list_programs_for_self(token):
response, status_code = auth.get_self_in_opa(token)
if status_code == 404:
# We next check if the user is pending
response, status_code = auth.is_self_pending(token)
# NB: The results is a string if unauthorized or pending, and a list otherwise
return "Pending" if response else "Unauthorized", status_code
print(response)
# NB: The results is a list if authorized, and a string otherwise
return list(response["programs"].values()), status_code
@app.route('/user/<path:user_id>/authorize')
def list_programs_for_user(user_id):
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
response = ""
status_code = 0
if user_id == "me":
# Grab the user's own authorization
response, status_code = list_programs_for_self(token)
else:
user_name = urllib.parse.unquote_plus(user_id)
response, status_code = auth.get_user_in_opa(user_name, token)
if status_code != 200:
return response, status_code
response = list(response["programs"].values())
print(response)
return {"results": response}, status_code
@app.route('/user/<path:user_id>/authorize')
async def authorize_program_for_user(user_id):
program_dict = await connexion.request.json()
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
user_name = urllib.parse.unquote_plus(user_id)
response, status_code = auth.get_user_in_opa(user_name, token)
if status_code != 200:
return response, status_code
# we need to check to see if the program even exists in the system
all_programs, status_code = auth.list_programs_in_opa(token)
if status_code != 200:
return all_programs, status_code
if program_dict["program_id"] not in all_programs:
return {"error": f"Program {program_dict['program_id']} does not exist in {all_programs}"}
response["programs"][program_dict["program_id"]] = program_dict
response, status_code = auth.write_user_in_opa(response, token)
return response, status_code
@app.route('/user/<path:user_id>/authorize/<path:program_id>')
def get_program_for_user(user_id, program_id):
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
user_name = urllib.parse.unquote_plus(user_id)
response, status_code = auth.get_user_in_opa(user_name, token)
if status_code != 200:
return response, status_code
for p in response["programs"]:
if p == program_id:
return p, 200
return {"error": f"No program {program_id} found for user"}, status_code
@app.route('/user/<path:user_id>/authorize/<path:program_id>')
def remove_program_for_user(user_id, program_id):
token = connexion.request.headers['Authorization'].split("Bearer ")[1]
user_name = urllib.parse.unquote_plus(user_id)
response, status_code = auth.get_user_in_opa(user_name, token)
if status_code != 200:
return response, status_code
for p in response["programs"]:
if p == program_id:
response["programs"].pop(program_id)
response, status_code = auth.write_user_in_opa(response, token)
return response, status_code
return {"error": f"No program {program_id} found for user"}, status_code
@app.route('/get-token')
def get_token():
# Attempt to grab the token via session_id
if not hasattr(connexion.request, 'cookies'):
return {'error': 'Unable to use the get-token endpoint without cookies'}, 200
token = connexion.request.cookies['session_id']
return {"token": token}, 200
# Uncomment the below to exchange for a new token and return
# that, instead
# try:
# response = auth.get_refresh_token(token)
# if "error" in response:
# return {"error": response["error"]}, 500
# return {"token": response["refresh_token"]}, 200
#except Exception as e:
# return {"error": str(e)}, 500