Merge pull request #69 from CanDIG/stable-candidate-v3.1.0

v3.1.0: Data model 3, site curator role
CanDIG · Oct 4, 2024 · 2b2a26b · 2b2a26b
2 parents c7b7ccc + 9fbb78b
commit 2b2a26b
Show file tree

Hide file tree

Showing 15 changed files with 430 additions and 265 deletions.
diff --git a/.github/workflows/dispatch-actions.yml b/.github/workflows/dispatch-actions.yml
@@ -1,7 +1,8 @@
 name: Submodule PR
 on:
-    push:
+    pull_request:
         branches: [develop]
+        types: [closed]
 jobs:
   CanDIG-dispatch:
     runs-on: ubuntu-latest
@@ -14,17 +15,18 @@ jobs:
             - name: Check out repository code
               uses: actions/checkout@v4
             - name: get PR data
-              uses: actions/github-script@v7
-              id: get_pr_data
-              with:
-                script: |
-                  return (
-                    await github.rest.repos.listPullRequestsAssociatedWithCommit({
-                      commit_sha: context.sha,
-                      owner: context.repo.owner,
-                      repo: context.repo.repo,
-                    })
-                  ).data[0];
+              shell: python
+              run: |
+                  import json
+                  import os
+                  with open('${{ github.event_path }}') as fh:
+                      event = json.load(fh)
+                      escaped = event['pull_request']['title'].replace("'", '"')
+                      pr_number = event["number"]
+                      print(escaped)    
+                  with open(os.environ['GITHUB_ENV'], 'a') as fh:
+                      print(f'PR_TITLE={escaped}', file=fh)
+                      print(f'PR_NUMBER={pr_number}', file=fh)
             - name: Create PR in CanDIGv2
               id: make_pr
               uses: CanDIG/github-action-pr-expanded@v4
@@ -33,7 +35,7 @@ jobs:
                   parent_repository: ${{ env.PARENT_REPOSITORY }}
                   checkout_branch: ${{ env.CHECKOUT_BRANCH}}
                   pr_against_branch: ${{ env.PR_AGAINST_BRANCH }}
-                  pr_title: '${{ github.repository }} merging: ${{ fromJson(steps.get_pr_data.outputs.result).title }}'
-                  pr_description: "PR triggered by update to develop branch on ${{ github.repository }}. Commit hash: `${{ github.sha }}`. PR link: [#${{ fromJson(steps.get_pr_data.outputs.result).number }}](https://github.com/${{ github.repository }}/pull/${{ fromJson(steps.get_pr_data.outputs.result).number }})"
+                  pr_title: "${{ github.repository }} merging: ${{ env.PR_TITLE }}"
+                  pr_description: "PR triggered by update to develop branch on ${{ github.repository }}. Commit hash: `${{ github.sha }}`. PR link: [#${{ env.PR_NUMBER }}](https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }})"
                   owner: ${{ env.OWNER }}
                   submodule_path: lib/opa/opa
diff --git a/README.md b/README.md
@@ -9,13 +9,16 @@ Interactions with Vault are handled by [vault.rego](permissions_engine/vault.reg
 
 Authorization to endpoints in the OPA service itself is defined in [authz.rego](permissions_engine/authz.rego).
 
-* Token-based auth: There are two api tokens defined: the root token allows any path to be accessed, while the service token only allows the `permissions/datasets` and `permissions/allowed` endpoints to be viewed.
-
-* Role-based auth: Roles for the site are defined in the format given in [site_roles.json](defaults/site_roles.json). if the User is defined as a site admin, they are allowed to view any endpoint. Other site-based roles can be similarly defined.
+* Role-based auth: Roles for the site are defined in the format given in [site_roles.json](defaults/site_roles.json).
+  * If the User is defined as a site admin, they are allowed to access any endpoint.
+  * If the User is defined as a site curator, they are allowed to use any of the curate method/path combinations defined in [paths.json](defaults/paths.json) for all programs known to the system.
+  * Other site-based roles can be similarly defined.
 
 * Endpoint-based auth: Any service can use the `/service/verified` endpoint. Other specific endpoints can be similarly allowed.
 
-* Program-based and user-based authorizations are defined at the `permissions` path: For a given User and the method of accessing a service (method, path), the `/permissions/datasets` endpoint returns the list of programs that user is allowed to access for that method/path, while the `/permissions/allowed` endpoint returns True if either the user is a site admin or the user is allowed to access that method/path. The following two types of authorizations are available:
+* An authenticated and authorized user is allowed to find out their own user ID, the key of which is defined system-wide in the .env file as CANDIG_USER_KEY. By default, this is the user's email address. This is the user ID by which user-based and program-based authorizations are keyed.
+
+* Program-based and user-based authorizations are defined at the `permissions` path: A User can access these Opa endpoints to introspect their own authorizations. For a given method of accessing a service (method, path), the `/permissions/datasets` endpoint returns the list of programs that the User is allowed to access for that method/path, while the `/permissions/allowed` endpoint returns True if either the User is a site admin or the User is allowed to access that method/path. The following two types of authorizations are available:
 
   * Authorizations for roles in particular programs: users defined as team_members for a program are allowed to access the read paths specified in [paths.json](defaults/paths.json), while users defined as program_curators are allowed to access the curate and delete paths. Note: read and curate paths are separately allowed: if a user should be allowed to both read and curate, they should be in both the team_members and program_curators groups. Program authorizations can be created, edited, and deleted through the ingest microservice. Default test examples can be found in [programs.json](defaults/programs.json).
 

diff --git a/defaults/paths.json b/defaults/paths.json
@@ -2,8 +2,8 @@
     "paths": {
         "read": {
             "get": [
-                "/v2/discovery/?.*",
-                "/v2/authorized/?.*",
+                "/v3/discovery/?.*",
+                "/v3/authorized/?.*",
                 "/htsget/v1/variants/?.*",
                 "/htsget/v1/variants/search",
                 "/htsget/v1/reads/?.*",
@@ -21,20 +21,27 @@
         },
         "curate": {
             "get": [
+                "/ingest/?.*",
                 "/htsget/v1/variants/?.*/index",
                 "/htsget/v1/variants/?.*/verify",
                 "/htsget/v1/reads/?.*/index",
                 "/htsget/v1/reads/?.*/verify"
             ],
             "post": [
-                "/ingest/?.*",
+                "/ingest/s3-credential/?.*",
+                "/ingest/program/?.*",
+                "/ingest/user/?.*",
+                "/ingest/genomic",
+                "/ingest/clinical",
                 "/ga4gh/drs/v1/?.*",
-                "/v2/ingest/?.*"
+                "/v3/ingest/?.*"
             ],
             "delete": [
-                "/ingest/?.*",
+                "/ingest/s3-credential/?.*",
+                "/ingest/program/?.*",
+                "/ingest/user/?.*",
                 "/ga4gh/drs/v1/?.*",
-                "/v2/ingest/?.*"
+                "/v3/ingest/?.*"
             ]
         }
     }

diff --git a/defaults/site_roles.json b/defaults/site_roles.json
@@ -4,6 +4,7 @@
             "SITE_ADMIN_USER"
         ],
         "curator": [
+            "USER2"
         ],
         "local_team": [
             "USER1"

diff --git a/entrypoint.sh b/entrypoint.sh
@@ -2,11 +2,6 @@
 
 set -Euo pipefail
 
-OPA_ROOT_TOKEN=$(cat /run/secrets/opa-root-token)
-OPA_SERVICE_TOKEN=$(cat /run/secrets/opa-service-token)
-SITE_ADMIN_USER=$(cat /run/secrets/site_admin_name)
-USER1=$(cat /run/secrets/user1_name)
-USER2=$(cat /run/secrets/user2_name)
 
 if [[ -f "/app/initial_setup" ]]; then
     # set up our default values
@@ -20,23 +15,24 @@ if [[ -f "/app/initial_setup" ]]; then
     sed -i s/USER1/$USER1/ /app/defaults/programs.json
     sed -i s/USER2/$USER2/ /app/defaults/programs.json
 
-    sed -i s/OPA_SERVICE_TOKEN/$OPA_SERVICE_TOKEN/ /app/permissions_engine/authz.rego
-    sed -i s/OPA_ROOT_TOKEN/$OPA_ROOT_TOKEN/ /app/permissions_engine/authz.rego
-
+    token=$(dd if=/dev/urandom bs=1 count=16 2>/dev/null | base64 | tr -d '\n\r+' | sed s/[^A-Za-z0-9]//g)
+    echo { \"opa_secret\": \"$token\" } > /app/permissions_engine/opa_secret.json
     # set up vault URL
     sed -i s@VAULT_URL@$VAULT_URL@ /app/permissions_engine/vault.rego
 
     echo "initializing stores"
     python3 /app/initialize_vault_store.py
     if [[ $? -eq 0 ]]; then
         rm /app/initial_setup
-        rm /app/bearer.txt
         echo "setup complete"
     else
         echo "!!!!!! INITIALIZATION FAILED, TRY AGAIN !!!!!!"
     fi
 fi
 
+# make sure that our idp is still set correctly (maybe keycloak was reinitialized)
+python3 get_vault_store_token.py
+python3 /app/initialize_idp.py
 
 while [ 0 -eq 0 ]
 do

diff --git a/get_vault_store_token.py b/get_vault_store_token.py
@@ -7,11 +7,11 @@
 
 # get the token for the opa store
 try:
-    with open("/run/secrets/opa-root-token") as f:
-        OPA_ROOT_TOKEN = f.read().strip()
+    with open("/app/permissions_engine/opa_secret.json") as f:
+        opa_json = json.load(f)
         opa_token = get_vault_token_for_service("opa")
         headers = {
-            "X-Opa": OPA_ROOT_TOKEN,
+            "X-Opa": opa_json["opa_secret"],
             "Content-Type": "application/json; charset=utf-8"
         }
         payload = f"{{\"token\": \"{opa_token}\"}}"

diff --git a/initialize_idp.py b/initialize_idp.py
@@ -0,0 +1,24 @@
+import json
+import os
+from authx.auth import add_provider_to_opa, get_user_id
+import sys
+
+## Updates Vault's opa service store with the information for our IDP
+
+token = None
+try:
+    if os.path.isfile('/app/bearer.txt'):
+        with open('/app/bearer.txt') as f:
+            token = f.read().strip()
+    if token is not None:
+        print("Updating our IDP with a new bearer token")
+        response = add_provider_to_opa(token, os.getenv("KEYCLOAK_REALM_URL"))
+        os.remove('/app/bearer.txt')
+        if get_user_id(None, token=token) is None:
+            print("IDP is incorrect: verify that Keycloak is set up and clean/build/compose opa again")
+            sys.exit(2)
+except Exception as e:
+    raise Exception(f"failed to save idp keys: {str(e)} {status_code}")
+    sys.exit(1)
+
+sys.exit(0)
diff --git a/initialize_vault_store.py b/initialize_vault_store.py
@@ -1,44 +1,42 @@
 import json
 import os
-from authx.auth import get_service_store_secret, set_service_store_secret, add_provider_to_opa, add_program_to_opa
+from authx.auth import get_service_store_secret, set_service_store_secret, add_program_to_opa, list_programs_in_opa
 import sys
 
-## Initializes Vault's opa service store with the information for our IDP and the data in site_roles.json, paths.json, programs.json
+## Initializes Vault's opa service store with the data in site_roles.json, paths.json, programs.json
 
 results = []
 
 try:
-    with open('/app/bearer.txt') as f:
-        try:
-            token = f.read().strip()
-            response, status_code = set_service_store_secret("opa", key="data", value=json.dumps({"keys":[]}))
-            response = add_provider_to_opa(token, os.getenv("KEYCLOAK_REALM_URL"))
+    response, status_code = get_service_store_secret("opa", key="paths")
+    if status_code != 200:
+        with open('/app/defaults/paths.json') as f:
+            data = f.read()
+            response, status_code = set_service_store_secret("opa", key="paths", value=data)
+            if status_code != 200:
+                raise Exception(f"failed to save paths: {response} {status_code}")
             results.append(response)
-        except Exception as e:
-            print(str(e))
-            sys.exit(1)
-
-    with open('/app/defaults/paths.json') as f:
-        data = f.read()
-        response, status_code = set_service_store_secret("opa", key="paths", value=data)
-        if status_code != 200:
-            sys.exit(3)
-        results.append(response)
 
-    with open('/app/defaults/site_roles.json') as f:
-        data = f.read()
-        response, status_code = set_service_store_secret("opa", key="site_roles", value=data)
-        if status_code != 200:
-            sys.exit(2)
-        results.append(response)
+    response, status_code = get_service_store_secret("opa", key="site_roles")
+    if status_code != 200:
+        with open('/app/defaults/site_roles.json') as f:
+            data = f.read()
+            response, status_code = set_service_store_secret("opa", key="site_roles", value=data)
+            if status_code != 200:
+                raise Exception(f"failed to save site roles: {response} {status_code}")
+            results.append(response)
 
+    current_programs, status_code = list_programs_in_opa()
+    if status_code != 200:
+        current_programs = []
     with open('/app/defaults/programs.json') as f:
         programs = json.load(f)
         for program in programs:
-            response, status_code = add_program_to_opa(programs[program])
-            if status_code != 200:
-                sys.exit(2)
-            results.append(response)
+            if programs[program] not in current_programs:
+                response, status_code = add_program_to_opa(programs[program])
+                if status_code != 200:
+                    raise Exception(f"failed to save program authz: {response} {status_code}")
+                results.append(response)
 except Exception as e:
     print(str(e))
     sys.exit(4)

diff --git a/permissions_engine/authz.rego b/permissions_engine/authz.rego
@@ -3,59 +3,12 @@ package system.authz
 # this defines authentication to have access to opa at all
 # from: https://www.openpolicyagent.org/docs/v0.22.0/security/#token-based-authentication-example
 
-rights = {
-    "admin": {
-        "path": "*"
-    },
-    "datasets": {
-        "path": ["v1", "data", "permissions", "datasets"]
-    },
-    "allowed": {
-        "path": ["v1", "data", "permissions", "allowed"]
-    },
-    "site_admin": {
-        "path": ["v1", "data", "permissions", "site_admin"]
-    },
-    "user_id": {
-        "path": ["v1", "data", "idp", "user_key"]
-    },
-    "tokenControlledAccessREMS": {
-        "path": ["v1", "data", "ga4ghPassport", "tokenControlledAccessREMS"]
-    }
-}
-
-root_token := "OPA_ROOT_TOKEN"
-service_token := "OPA_SERVICE_TOKEN"
-
-tokens = {
-    root_token : {
-        "roles": ["admin"]
-    },
-    service_token : {
-        "roles": ["datasets", "allowed", "site_admin", "user_id", "tokenControlledAccessREMS"]
-    }
-}
-
-default allow = false               # Reject requests by default.
-
-allow {                             # Allow request if...
-    some right
-    identity_rights[right]          # Rights for identity exist, and...
-    right.path == "*"               # Right.path is '*'.
-}
-
-allow {                             # Allow request if...
-    some right
-    identity_rights[right]          # Rights for identity exist, and...
-    right.path == input.path        # Right.path matches input.path.
-}
-
-x_opa := input.headers["X-Opa"][_]
+# Reject requests by default
+default allow = false
 
-identity_rights[right] {             # Right is in the identity_rights set if...
-    token := tokens[x_opa]  # Token exists for identity, and...
-    role := token.roles[_]           # Token has a role, and...
-    right := rights[role]            # Role has rights defined.
+# Site admin should be able to see anything
+allow {
+    data.permissions.site_admin == true
 }
 
 # Any service should be able to verify that a service is who it says it is:
@@ -64,8 +17,30 @@ allow {
     input.method == "POST"
 }
 
+# Opa should be able to store its vault token
+allow {
+    input.path == ["v1", "data", "store_token"]
+    input.method == "PUT"
+    input.headers["X-Opa"][_] == data.opa_secret
+}
+
 # Service-info path for healthcheck
 allow {
     input.path == ["v1", "data", "service", "service-info"]
     input.method == "GET"
 }
+
+# The authx library uses these paths:
+authx_paths = {
+    "permissions": ["v1", "data", "permissions"],
+    "user_id": ["v1", "data", "idp", "user_key"]
+}
+
+# An authorized user has a valid token (and passes in that same token for both bearer and body)
+# Authz users can access the authx paths
+allow {
+    input.path == authx_paths[_]
+    input.method == "POST"
+    data.permissions.valid_token == true
+    input.body.input.token == input.identity
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ @@
                 "SITE_ADMIN_USER"
             ],
             "curator": [
+                "USER2"
             ],
             "local_team": [
                 "USER1"
@@ Expand Down @@