From 418a966f197816c31e71b7dacf6695569baf9640 Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Fri, 11 Oct 2024 12:55:46 -0400 Subject: [PATCH 1/3] Enable karpenter prometheus metrics --- terraform/modules/spack_aws_k8s/karpenter.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/terraform/modules/spack_aws_k8s/karpenter.tf b/terraform/modules/spack_aws_k8s/karpenter.tf index 68ed75a06..003a72376 100644 --- a/terraform/modules/spack_aws_k8s/karpenter.tf +++ b/terraform/modules/spack_aws_k8s/karpenter.tf @@ -46,6 +46,8 @@ resource "helm_release" "karpenter" { clusterName: ${module.eks.cluster_name} clusterEndpoint: ${module.eks.cluster_endpoint} interruptionQueueName: ${module.karpenter.queue_name} + serviceMonitor: + enabled: true EOT ] From 0cd65be171523b8448af7787f25fb5216aea46d1 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Fri, 11 Oct 2024 13:03:32 -0400 Subject: [PATCH 2/3] Use new karpenter instance price metric --- analytics/analytics/job_processor/prometheus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analytics/analytics/job_processor/prometheus.py b/analytics/analytics/job_processor/prometheus.py index 806075efc..a33633ffc 100644 --- a/analytics/analytics/job_processor/prometheus.py +++ b/analytics/analytics/job_processor/prometheus.py @@ -409,7 +409,7 @@ def get_pod_node_data(self, pod: str, start: datetime, end: datetime) -> NodeDat # lifetime, we return all values from this query and average them. zone = node_labels["label_topology_kubernetes_io_zone"] price_query = f""" - karpenter_cloudprovider_instance_type_price_estimate{{ + karpenter_cloudprovider_instance_type_offering_price_estimate{{ capacity_type='{capacity_type}', instance_type='{instance_type}', zone='{zone}' From d7873380348dd94f2b102ccae72c39d26877d8ef Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Fri, 11 Oct 2024 13:04:18 -0400 Subject: [PATCH 3/3] Bump django image version --- .github/workflows/custom_docker_builds.yml | 2 +- .../custom/webhook-handler/deployments.yaml | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/.github/workflows/custom_docker_builds.yml b/.github/workflows/custom_docker_builds.yml index 06de12039..b1be0eebd 100644 --- a/.github/workflows/custom_docker_builds.yml +++ b/.github/workflows/custom_docker_builds.yml @@ -42,7 +42,7 @@ jobs: - docker-image: ./images/cache-indexer image-tags: ghcr.io/spack/cache-indexer:0.0.3 - docker-image: ./analytics - image-tags: ghcr.io/spack/django:0.3.16 + image-tags: ghcr.io/spack/django:0.3.17 - docker-image: ./images/ci-prune-buildcache image-tags: ghcr.io/spack/ci-prune-buildcache:0.0.4 - docker-image: ./images/protected-publish diff --git a/k8s/production/custom/webhook-handler/deployments.yaml b/k8s/production/custom/webhook-handler/deployments.yaml index 22a7b5d91..29732f83f 100644 --- a/k8s/production/custom/webhook-handler/deployments.yaml +++ b/k8s/production/custom/webhook-handler/deployments.yaml @@ -23,7 +23,7 @@ spec: serviceAccountName: webhook-handler containers: - name: webhook-handler - image: ghcr.io/spack/django:0.3.16 + image: ghcr.io/spack/django:0.3.17 imagePullPolicy: Always resources: requests: @@ -146,8 +146,18 @@ spec: serviceAccountName: webhook-handler containers: - name: webhook-handler-worker - image: ghcr.io/spack/django:0.3.16 - command: ["celery", "-A", "analytics.celery", "worker", "-l", "info", "-Q", "celery"] + image: ghcr.io/spack/django:0.3.17 + command: + [ + "celery", + "-A", + "analytics.celery", + "worker", + "-l", + "info", + "-Q", + "celery", + ] imagePullPolicy: Always resources: requests: