Skip to content

Commit

Permalink
Merge pull request #15 from nojnhuh/hpa-catalog
Browse files Browse the repository at this point in the history
add HorizontalPodAutoscaler stub to catalog
  • Loading branch information
k8s-ci-robot authored Oct 29, 2024
2 parents 07dd507 + 54ee723 commit 93745fc
Show file tree
Hide file tree
Showing 16 changed files with 149 additions and 0 deletions.
10 changes: 10 additions & 0 deletions serving-catalog/core/deployment/components/hpa/base/hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: singlehost-inference-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: singlehost-inference-deployment-blueprint
# TODO: also include stabilizing-windows, tolerance threshold, etc.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component

resources:
- hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: singlehost-inference-hpa
spec:
# TODO: add best practices as defined at:
# - https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/autoscaling-tpu
# - https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/autoscaling
minReplicas: 1
maxReplicas: 10
metrics:
- type: Pods
pods:
metric:
name: jetstream-token-latency-ms
target:
type: AverageValue
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component

components:
- ../../base

patches:
- path: hpa.patch.yaml
target:
kind: HorizontalPodAutoscaler
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: singlehost-inference-hpa
spec:
minReplicas: 1
maxReplicas: 10
metrics:
- type: Pods
pods:
metric:
name: vllm-token-latency-ms
target:
type: AverageValue
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component

components:
- ../../base

patches:
- path: hpa.patch.yaml
target:
kind: HorizontalPodAutoscaler
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
- op: add
path: /metadata/name
value: gemma-7b-it-jetstream-hpa
- op: add
path: /metadata/labels
value:
app: gemma-7b-it-jetstream-inference-server
- op: add
path: /spec/metrics/0/pods/target/averageValue
value: 50
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ resources:

components:
- ../../../components/gke/resources/tpu/v5e-2x4
# - ../../../components/hpa/jetstream/token-latency # HPA is a work-in-progress

patches:
- path: deployment.patch.yaml
Expand All @@ -15,3 +16,8 @@ patches:
- path: job.patch.yaml
target:
kind: Job
- options:
allowNameChange: true
path: hpa.patch.yaml
target:
kind: HorizontalPodAutoscaler
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
- op: add
path: /metadata/name
value: llama3-8b-jetstream-hpa
- op: add
path: /metadata/labels
value:
app: llama3-8b-jetstream-inference-server
- op: add
path: /spec/metrics/0/pods/target/averageValue
value: 50
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ resources:

components:
- ../../../components/gke/resources/tpu/v5e-2x4
# - ../../../components/hpa/jetstream/token-latency # HPA is a work-in-progress

patches:
- path: deployment.patch.yaml
Expand All @@ -15,3 +16,8 @@ patches:
- path: job.patch.yaml
target:
kind: Job
- options:
allowNameChange: true
path: hpa.patch.yaml
target:
kind: HorizontalPodAutoscaler
10 changes: 10 additions & 0 deletions serving-catalog/core/deployment/vllm/gemma-2b/gke/hpa.patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
- op: add
path: /metadata/name
value: gemma-2b-vllm-hpa
- op: add
path: /metadata/labels
value:
app: gemma-2b-vllm-inference-server
- op: add
path: /spec/metrics/0/pods/target/averageValue
value: 50
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,14 @@ resources:

components:
- ../../../components/gke/resources/gpu/1-L4
# - ../../../components/hpa/vllm/token-latency # HPA is a work-in-progress

patches:
- path: deployment.patch.yaml
target:
kind: Deployment
- options:
allowNameChange: true
path: hpa.patch.yaml
target:
kind: HorizontalPodAutoscaler
10 changes: 10 additions & 0 deletions serving-catalog/core/deployment/vllm/llama3-70b/gke/hpa.patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
- op: add
path: /metadata/name
value: llama3-70b-vllm-hpa
- op: add
path: /metadata/labels
value:
app: llama3-70b-vllm-inference-server
- op: add
path: /spec/metrics/0/pods/target/averageValue
value: 50
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,14 @@ resources:

components:
- ../../../components/gke/resources/gpu/8-L4
# - ../../../components/hpa/vllm/token-latency # HPA is a work-in-progress

patches:
- path: deployment.patch.yaml
target:
kind: Deployment
- options:
allowNameChange: true
path: hpa.patch.yaml
target:
kind: HorizontalPodAutoscaler
10 changes: 10 additions & 0 deletions serving-catalog/core/deployment/vllm/llama3-8b/gke/hpa.patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
- op: add
path: /metadata/name
value: llama3-8b-vllm-hpa
- op: add
path: /metadata/labels
value:
app: llama3-8b-vllm-inference-server
- op: add
path: /spec/metrics/0/pods/target/averageValue
value: 50
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,14 @@ resources:

components:
- ../../../components/gke/resources/gpu/1-L4
# - ../../../components/hpa/vllm/token-latency # HPA is a work-in-progress

patches:
- path: deployment.patch.yaml
target:
kind: Deployment
- options:
allowNameChange: true
path: hpa.patch.yaml
target:
kind: HorizontalPodAutoscaler

0 comments on commit 93745fc

Please sign in to comment.