From c2af4d717ba5d366670167477ff53e1f1835e01a Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Thu, 3 Oct 2024 13:32:40 -0500 Subject: [PATCH] add HorizontalPodAutoscaler to catalog --- serving-catalog/core/deployment/base/hpa.yaml | 19 +++++++++++++++++++ .../core/deployment/base/kustomization.yaml | 1 + .../jetstream/gemma-7b-it/base/hpa.patch.yaml | 6 ++++++ .../gemma-7b-it/base/kustomization.yaml | 5 +++++ .../jetstream/llama3-8b/base/hpa.patch.yaml | 6 ++++++ .../llama3-8b/base/kustomization.yaml | 5 +++++ .../vllm/gemma-2b/base/hpa.patch.yaml | 6 ++++++ .../vllm/gemma-2b/base/kustomization.yaml | 5 +++++ .../vllm/llama3-8b/base/hpa.patch.yaml | 6 ++++++ .../vllm/llama3-8b/base/kustomization.yaml | 5 +++++ 10 files changed, 64 insertions(+) create mode 100644 serving-catalog/core/deployment/base/hpa.yaml create mode 100644 serving-catalog/core/deployment/jetstream/gemma-7b-it/base/hpa.patch.yaml create mode 100644 serving-catalog/core/deployment/jetstream/llama3-8b/base/hpa.patch.yaml create mode 100644 serving-catalog/core/deployment/vllm/gemma-2b/base/hpa.patch.yaml create mode 100644 serving-catalog/core/deployment/vllm/llama3-8b/base/hpa.patch.yaml diff --git a/serving-catalog/core/deployment/base/hpa.yaml b/serving-catalog/core/deployment/base/hpa.yaml new file mode 100644 index 0000000..65206a0 --- /dev/null +++ b/serving-catalog/core/deployment/base/hpa.yaml @@ -0,0 +1,19 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: singlehost-inference-hpa +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: singlehost-inference-deployment-blueprint + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Pods + pods: + metric: + name: token-latency-ms + target: + type: AverageValue + averageValue: 50 diff --git a/serving-catalog/core/deployment/base/kustomization.yaml b/serving-catalog/core/deployment/base/kustomization.yaml index 3097586..a5bf51b 100644 --- a/serving-catalog/core/deployment/base/kustomization.yaml +++ b/serving-catalog/core/deployment/base/kustomization.yaml @@ -4,3 +4,4 @@ kind: Kustomization resources: - deployment.yaml +- hpa.yaml diff --git a/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/hpa.patch.yaml b/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/hpa.patch.yaml new file mode 100644 index 0000000..0901bb8 --- /dev/null +++ b/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/hpa.patch.yaml @@ -0,0 +1,6 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + labels: + app: gemma-7b-it-jetstream-inference-server + name: gemma-7b-it-jetstream-hpa diff --git a/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/kustomization.yaml b/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/kustomization.yaml index b235d6f..8d3c6cb 100644 --- a/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/kustomization.yaml +++ b/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/kustomization.yaml @@ -11,6 +11,11 @@ patches: kind: Deployment options: allowNameChange: true + - path: hpa.patch.yaml + target: + kind: HorizontalPodAutoscaler + options: + allowNameChange: true - path: job.patch.yaml target: kind: Job diff --git a/serving-catalog/core/deployment/jetstream/llama3-8b/base/hpa.patch.yaml b/serving-catalog/core/deployment/jetstream/llama3-8b/base/hpa.patch.yaml new file mode 100644 index 0000000..d3553d9 --- /dev/null +++ b/serving-catalog/core/deployment/jetstream/llama3-8b/base/hpa.patch.yaml @@ -0,0 +1,6 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + labels: + app: llama3-8b-jetstream-inference-server + name: llama3-8b-jetstream-hpa diff --git a/serving-catalog/core/deployment/jetstream/llama3-8b/base/kustomization.yaml b/serving-catalog/core/deployment/jetstream/llama3-8b/base/kustomization.yaml index b235d6f..8d3c6cb 100644 --- a/serving-catalog/core/deployment/jetstream/llama3-8b/base/kustomization.yaml +++ b/serving-catalog/core/deployment/jetstream/llama3-8b/base/kustomization.yaml @@ -11,6 +11,11 @@ patches: kind: Deployment options: allowNameChange: true + - path: hpa.patch.yaml + target: + kind: HorizontalPodAutoscaler + options: + allowNameChange: true - path: job.patch.yaml target: kind: Job diff --git a/serving-catalog/core/deployment/vllm/gemma-2b/base/hpa.patch.yaml b/serving-catalog/core/deployment/vllm/gemma-2b/base/hpa.patch.yaml new file mode 100644 index 0000000..c54d08e --- /dev/null +++ b/serving-catalog/core/deployment/vllm/gemma-2b/base/hpa.patch.yaml @@ -0,0 +1,6 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + labels: + app: gemma-2b-vllm-inference-server + name: gemma-2b-vllm-hpa diff --git a/serving-catalog/core/deployment/vllm/gemma-2b/base/kustomization.yaml b/serving-catalog/core/deployment/vllm/gemma-2b/base/kustomization.yaml index f5845bd..285c985 100644 --- a/serving-catalog/core/deployment/vllm/gemma-2b/base/kustomization.yaml +++ b/serving-catalog/core/deployment/vllm/gemma-2b/base/kustomization.yaml @@ -11,6 +11,11 @@ patches: kind: Deployment options: allowNameChange: true + - path: hpa.patch.yaml + target: + kind: HorizontalPodAutoscaler + options: + allowNameChange: true - path: service.patch.yaml target: kind: Service diff --git a/serving-catalog/core/deployment/vllm/llama3-8b/base/hpa.patch.yaml b/serving-catalog/core/deployment/vllm/llama3-8b/base/hpa.patch.yaml new file mode 100644 index 0000000..dd11ffb --- /dev/null +++ b/serving-catalog/core/deployment/vllm/llama3-8b/base/hpa.patch.yaml @@ -0,0 +1,6 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + labels: + app: llama3-8b-vllm-inference-server + name: llama3-8b-vllm-hpa diff --git a/serving-catalog/core/deployment/vllm/llama3-8b/base/kustomization.yaml b/serving-catalog/core/deployment/vllm/llama3-8b/base/kustomization.yaml index 2dc82cb..250c4d0 100644 --- a/serving-catalog/core/deployment/vllm/llama3-8b/base/kustomization.yaml +++ b/serving-catalog/core/deployment/vllm/llama3-8b/base/kustomization.yaml @@ -11,6 +11,11 @@ patches: path: deployment.patch.yaml target: kind: Deployment +- options: + allowNameChange: true + path: hpa.patch.yaml + target: + kind: HorizontalPodAutoscaler - options: allowNameChange: true path: service.patch.yaml