Skip to content

Commit

Permalink
Merge pull request #3778 from consideRatio/pr/cleanup-misc
Browse files Browse the repository at this point in the history
Cleanup config connector historically used to provision gcp buckets
  • Loading branch information
consideRatio authored Mar 6, 2024
2 parents c61c6e4 + 26b21ab commit 85b14f4
Show file tree
Hide file tree
Showing 14 changed files with 52 additions and 208 deletions.
9 changes: 3 additions & 6 deletions config/clusters/2i2c/dask-staging.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,6 @@ basehub:
hosts:
- dask-staging.2i2c.cloud
custom:
cloudResources:
provider: gcp
gcp:
projectId: two-eye-two-see
scratchBucket:
enabled: true
2i2c:
add_staff_user_ids_to_admin_users: true
add_staff_user_ids_of_type: "google"
Expand All @@ -39,6 +33,9 @@ basehub:
image:
name: pangeo/pangeo-notebook
tag: "latest"
extraEnv:
SCRATCH_BUCKET: gs://pilot-hubs-scratch-dask-staging/$(JUPYTERHUB_USER)
PANGEO_SCRATCH: gs://pilot-hubs-scratch-dask-staging/$(JUPYTERHUB_USER)
hub:
config:
JupyterHub:
Expand Down
20 changes: 2 additions & 18 deletions config/clusters/2i2c/ohw.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,9 @@ basehub:
- oceanhackweek.2i2c.cloud
singleuser:
networkPolicy:
# In clusters with NetworkPolicy enabled, do not
# allow outbound internet access that's not DNS, HTTP or HTTPS
# For OHW, we allow 8080 (for DAP) and 22 (for ssh)
# https://github.com/2i2c-org/infrastructure/issues/549#issuecomment-892276020
enabled: true
egress:
- ports:
- port: 53
protocol: UDP
- ports:
- port: 80
protocol: TCP
- ports:
- port: 443
protocol: TCP
- ports:
- port: 8080
protocol: TCP
Expand Down Expand Up @@ -54,15 +42,11 @@ basehub:
cpu_limit: 2
cpu_guarantee: 0.5
extraEnv:
SCRATCH_BUCKET: gs://pilot-hubs-scratch-ohw/$(JUPYTERHUB_USER)
PANGEO_SCRATCH: gs://pilot-hubs-scratch-ohw/$(JUPYTERHUB_USER)
GH_SCOPED_CREDS_CLIENT_ID: "Iv1.9c20af442fad0d86"
GH_SCOPED_CREDS_APP_URL: "https://github.com/apps/ohw-gh-scoped-creds-app"
custom:
cloudResources:
provider: gcp
gcp:
projectId: two-eye-two-see
scratchBucket:
enabled: true
2i2c:
add_staff_user_ids_to_admin_users: true
add_staff_user_ids_of_type: "github"
Expand Down
7 changes: 2 additions & 5 deletions config/clusters/pangeo-hubs/coessing.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ basehub:
iam.gke.io/gcp-service-account: pangeo-hubs-coessing@pangeo-integration-te-3eea.iam.gserviceaccount.com
jupyterhub:
custom:
cloudResources:
scratchBucket:
enabled: false
2i2c:
add_staff_user_ids_to_admin_users: true
add_staff_user_ids_of_type: "google"
Expand All @@ -23,8 +20,8 @@ basehub:
secretName: https-auto-tls
singleuser:
extraEnv:
SCRATCH_BUCKET: gcs://pangeo-hubs-coessing-scratch/$(JUPYTERHUB_USER)
PANGEO_SCRATCH: gcs://pangeo-hubs-coessing-scratch/$(JUPYTERHUB_USER)
SCRATCH_BUCKET: gs://pangeo-hubs-coessing-scratch/$(JUPYTERHUB_USER)
PANGEO_SCRATCH: gs://pangeo-hubs-coessing-scratch/$(JUPYTERHUB_USER)
# No profiles
profileList: null
memory:
Expand Down
6 changes: 0 additions & 6 deletions config/clusters/pangeo-hubs/common.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ basehub:
2i2c:
add_staff_user_ids_to_admin_users: true
add_staff_user_ids_of_type: "github"
cloudResources:
provider: gcp
gcp:
projectId: pangeo-integration-te-3eea
scratchBucket:
enabled: true
homepage:
templateVars:
org:
Expand Down
4 changes: 4 additions & 0 deletions config/clusters/pangeo-hubs/prod.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ basehub:
config:
GitHubOAuthenticator:
oauth_callback_url: https://us-central1-b.gcp.pangeo.io/hub/oauth_callback
singleuser:
extraEnv:
SCRATCH_BUCKET: gs://pangeo-hubs-scratch/$(JUPYTERHUB_USER)
PANGEO_SCRATCH: gs://pangeo-hubs-scratch/$(JUPYTERHUB_USER)
4 changes: 4 additions & 0 deletions config/clusters/pangeo-hubs/staging.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ basehub:
config:
GitHubOAuthenticator:
oauth_callback_url: https://staging.us-central1-b.gcp.pangeo.io/hub/oauth_callback
singleuser:
extraEnv:
SCRATCH_BUCKET: gs://pangeo-hubs-scratch-staging/$(JUPYTERHUB_USER)
PANGEO_SCRATCH: gs://pangeo-hubs-scratch-staging/$(JUPYTERHUB_USER)
48 changes: 22 additions & 26 deletions docs/topic/infrastructure/cluster-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,29 +134,25 @@ to isolate them from each other.

## Cloud access credentials for hub users

For hub users to access cloud resources (like storage buckets), they will need
to be authorized via a [GCP ServiceAccount](https://cloud.google.com/iam/docs/service-accounts).
This is different from a [Kubernetes ServiceAccount](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/),
which is used to authenticate and authorize access to kubernetes resources (like spawning pods).

For dask hubs, we want to provide users with write access to at least one storage
bucket they can use for temporary data storage. User pods need to be given access to
a GCP ServiceAccount that has write permissions to this bucket. There are two ways
to do this:

1. Provide appropriate permissions to the GCP ServiceAccount used by the node the user
pods are running on. When used with [Metadata Concealment](https://cloud.google.com/kubernetes-engine/docs/how-to/protecting-cluster-metadata#overview),
user pods can read / write from storage buckets. However, this grants the same permissions
to *all* pods on the cluster, and hence is unsuitable for clusters with multiple
hubs running for different organizations.

2. Use the [GKE Cloud Config Connector](https://cloud.google.com/config-connector/docs/overview) to
create a GCP ServiceAccount + Storage Bucket for each hub via helm. This requires using
[Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) and
is incompatible with (1). This is required for multi-tenant clusters, since users on a hub
have much tighter scoped permissions.

Long-term, (2) is the appropriate way to do this for everyone. However, it affects the size
of the core node pool, since it runs some components in the cluster. For now, we use (1) for
single-tenant clusters, and (2) for multi-tenant clusters. If nobody wants a scratch GCS bucket,
neither option is required.
For hub users to access cloud resources like storage buckets from their user
servers, they will need to have credentials from a cloud specific service
account - like a [GCP ServiceAccount].

Currently for practical reasons we only provision one cloud specific service
account per hub, which makes all users interaction be seen as a single user.
Note that providing for example two cloud service accounts, one for hub admin
users and one for non-admin users is by far an easier improvement than providing
one for each hub user.

```{note} Technical notes
When we create a hub with access to a bucket, we create cloud provider specific
service account for the hub via `terraform`. We then also create a [Kubernetes
ServiceAccount] via the basehub chart's templates that references the cloud
specific service account via an annotation. When this Kubernetes ServiceAccount
is mounted to the hub's user server pods, a cloud specific controller ensures
the Pod gets credentials that can be exchanged for temporary credentials to the
cloud specific service account.

[gcp serviceaccount]: https://cloud.google.com/iam/docs/service-accounts
[kubernetes serviceaccount]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/
```

This file was deleted.

This file was deleted.

This file was deleted.

27 changes: 0 additions & 27 deletions helm-charts/basehub/values.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@ properties:
required:
- singleuserAdmin
- singleuser
- cloudResources
- 2i2c
- auth
- jupyterhubConfigurator
Expand Down Expand Up @@ -451,32 +450,6 @@ properties:
additionalProperties: true
extraEnv:
type: object
cloudResources:
type: object
additionalProperties: false
required:
- provider
- gcp
- scratchBucket
properties:
provider:
enum: ["", gcp]
gcp:
type: object
additionalProperties: false
required:
- projectId
properties:
projectId:
type: string
scratchBucket:
type: object
additionalProperties: false
required:
- enabled
properties:
enabled:
type: boolean
2i2c:
type: object
additionalProperties: false
Expand Down
34 changes: 2 additions & 32 deletions helm-charts/basehub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,6 @@ jupyterhub:
- name: home
mountPath: /home/rstudio/shared-readwrite
subPath: _shared
cloudResources:
provider: ""
gcp:
projectId: ""
scratchBucket:
enabled: false
2i2c:
# Should 2i2c engineering staff user IDs be injected to the admin_users
# configuration of the JupyterHub's authenticator by our custom
Expand Down Expand Up @@ -779,31 +773,7 @@ jupyterhub:
return pod
c.KubeSpawner.modify_pod_hook = modify_pod_hook
03-cloud-storage-bucket: |
from z2jh import get_config
cloud_resources = get_config('custom.cloudResources')
scratch_bucket = cloud_resources['scratchBucket']
import os
if scratch_bucket['enabled']:
# FIXME: Support other providers too
assert cloud_resources['provider'] == 'gcp'
project_id = cloud_resources['gcp']['projectId']
release = os.environ['HELM_RELEASE_NAME']
bucket_protocol = 'gcs'
bucket_name = f'{project_id}-{release}-scratch-bucket'
env = {
'SCRATCH_BUCKET_PROTOCOL': bucket_protocol,
# Matches "daskhub.scratchBUcket.name" helm template
'SCRATCH_BUCKET_NAME': bucket_name,
# Use k8s syntax of $(ENV_VAR) to substitute env vars dynamically in other env vars
'SCRATCH_BUCKET': f'{bucket_protocol}://{bucket_name}/$(JUPYTERHUB_USER)',
'PANGEO_SCRATCH': f'{bucket_protocol}://{bucket_name}/$(JUPYTERHUB_USER)',
}
c.KubeSpawner.environment.update(env)
04-2i2c-add-staff-user-ids-to-admin-users: |
03-2i2c-add-staff-user-ids-to-admin-users: |
from z2jh import get_config
add_staff_user_ids_to_admin_users = get_config("custom.2i2c.add_staff_user_ids_to_admin_users", False)
Expand All @@ -816,7 +786,7 @@ jupyterhub:
staff_user_ids.extend(get_config("hub.config.Authenticator.admin_users", []))
c.Authenticator.admin_users = staff_user_ids
05-per-user-disk: |
04-per-user-disk: |
# Optionally, create a PVC per user - useful for per-user databases
from jupyterhub.utils import exponential_backoff
from z2jh import get_config
Expand Down
1 change: 1 addition & 0 deletions terraform/gcp/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ resource "google_container_node_pool" "notebook" {

workload_metadata_config {
# Config Connector requires workload identity to be enabled (via GKE_METADATA_SERVER).
# Config Connector hasn't been used since March 2024, see https://github.com/2i2c-org/infrastructure/pull/3778.
# If config connector is not necessary, we use simple metadata concealment
# (https://cloud.google.com/kubernetes-engine/docs/how-to/protecting-cluster-metadata)
# to expose the node CA to users safely.
Expand Down
17 changes: 12 additions & 5 deletions terraform/gcp/projects/pilot-hubs.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,26 @@ dask_nodes = {
},
}

user_buckets = {}
user_buckets = {
"scratch-dask-staging" : {
"delete_after" : 7,
},
"scratch-ohw" : {
"delete_after" : 7,
},
}


hub_cloud_permissions = {
"dask-staging" : {
allow_access_to_external_requester_pays_buckets : true,
bucket_admin_access : [],
hub_namespace : "dask-staging"
bucket_admin_access : ["scratch-dask-staging"],
hub_namespace : "dask-staging",
},
"ohw" : {
allow_access_to_external_requester_pays_buckets : true,
bucket_admin_access : [],
hub_namespace : "ohw"
bucket_admin_access : ["scratch-ohw"],
hub_namespace : "ohw",
},
}

Expand Down

0 comments on commit 85b14f4

Please sign in to comment.