Skip to content

Commit

Permalink
add HorizontalPodAutoscaler stub to catalog
Browse files Browse the repository at this point in the history
  • Loading branch information
nojnhuh committed Oct 9, 2024
1 parent b071351 commit 5efe829
Show file tree
Hide file tree
Showing 14 changed files with 100 additions and 0 deletions.
9 changes: 9 additions & 0 deletions serving-catalog/core/deployment/base/hpa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: singlehost-inference-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: singlehost-inference-deployment-blueprint
1 change: 1 addition & 0 deletions serving-catalog/core/deployment/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ kind: Kustomization

resources:
- deployment.yaml
# - hpa.yaml # HPA is still a work-in-progress
15 changes: 15 additions & 0 deletions serving-catalog/core/deployment/jetstream/base/hpa.patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: jetstream-hpa-blueprint
spec:
minReplicas: 1
maxReplicas: 10
metrics:
- type: Pods
pods:
metric:
name: jetstream-token-latency-ms
target:
type: AverageValue
averageValue: 50
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,9 @@ patches:
group: apps
version: v1
kind: Deployment
# HPA is still a work-in-progress
# - path: hpa.patch.yaml
# target:
# group: autoscaling
# version: v2
# kind: HorizontalPodAutoscaler
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
labels:
app: gemma-7b-it-jetstream-inference-server
name: gemma-7b-it-jetstream-hpa
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ patches:
kind: Deployment
options:
allowNameChange: true
# HPA is still a work-in-progress
# - path: hpa.patch.yaml
# target:
# kind: HorizontalPodAutoscaler
# options:
# allowNameChange: true
- path: job.patch.yaml
target:
kind: Job
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
labels:
app: llama3-8b-jetstream-inference-server
name: llama3-8b-jetstream-hpa
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ patches:
kind: Deployment
options:
allowNameChange: true
# HPA is still a work-in-progress
# - path: hpa.patch.yaml
# target:
# kind: HorizontalPodAutoscaler
# options:
# allowNameChange: true
- path: job.patch.yaml
target:
kind: Job
Expand Down
15 changes: 15 additions & 0 deletions serving-catalog/core/deployment/vllm/base/hpa.patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: vllm-hpa-blueprint
spec:
minReplicas: 1
maxReplicas: 10
metrics:
- type: Pods
pods:
metric:
name: vllm-token-latency-ms
target:
type: AverageValue
averageValue: 50
6 changes: 6 additions & 0 deletions serving-catalog/core/deployment/vllm/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ patches:
group: apps
version: v1
kind: Deployment
# HPA is still a work-in-progress
# - path: hpa.patch.yaml
# target:
# group: autoscaling
# version: v2
# kind: HorizontalPodAutoscaler
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
labels:
app: gemma-2b-vllm-inference-server
name: gemma-2b-vllm-hpa
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ patches:
kind: Deployment
options:
allowNameChange: true
# HPA is still a work-in-progress
# - path: hpa.patch.yaml
# target:
# kind: HorizontalPodAutoscaler
# options:
# allowNameChange: true
- path: service.patch.yaml
target:
kind: Service
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
labels:
app: llama3-8b-vllm-inference-server
name: llama3-8b-vllm-hpa
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ patches:
path: deployment.patch.yaml
target:
kind: Deployment
# HPA is still a work-in-progress
# - options:
# allowNameChange: true
# path: hpa.patch.yaml
# target:
# kind: HorizontalPodAutoscaler
- options:
allowNameChange: true
path: service.patch.yaml
Expand Down

0 comments on commit 5efe829

Please sign in to comment.