From 5efe829292ba8236d7ed589140b6f492b4040ced Mon Sep 17 00:00:00 2001
From: Jon Huhn <huhnjon@gmail.com>
Date: Thu, 3 Oct 2024 13:32:40 -0500
Subject: [PATCH] add HorizontalPodAutoscaler stub to catalog

---
 serving-catalog/core/deployment/base/hpa.yaml     |  9 +++++++++
 .../core/deployment/base/kustomization.yaml       |  1 +
 .../core/deployment/jetstream/base/hpa.patch.yaml | 15 +++++++++++++++
 .../deployment/jetstream/base/kustomization.yaml  |  6 ++++++
 .../jetstream/gemma-7b-it/base/hpa.patch.yaml     |  6 ++++++
 .../jetstream/gemma-7b-it/base/kustomization.yaml |  6 ++++++
 .../jetstream/llama3-8b/base/hpa.patch.yaml       |  6 ++++++
 .../jetstream/llama3-8b/base/kustomization.yaml   |  6 ++++++
 .../core/deployment/vllm/base/hpa.patch.yaml      | 15 +++++++++++++++
 .../core/deployment/vllm/base/kustomization.yaml  |  6 ++++++
 .../deployment/vllm/gemma-2b/base/hpa.patch.yaml  |  6 ++++++
 .../vllm/gemma-2b/base/kustomization.yaml         |  6 ++++++
 .../deployment/vllm/llama3-8b/base/hpa.patch.yaml |  6 ++++++
 .../vllm/llama3-8b/base/kustomization.yaml        |  6 ++++++
 14 files changed, 100 insertions(+)
 create mode 100644 serving-catalog/core/deployment/base/hpa.yaml
 create mode 100644 serving-catalog/core/deployment/jetstream/base/hpa.patch.yaml
 create mode 100644 serving-catalog/core/deployment/jetstream/gemma-7b-it/base/hpa.patch.yaml
 create mode 100644 serving-catalog/core/deployment/jetstream/llama3-8b/base/hpa.patch.yaml
 create mode 100644 serving-catalog/core/deployment/vllm/base/hpa.patch.yaml
 create mode 100644 serving-catalog/core/deployment/vllm/gemma-2b/base/hpa.patch.yaml
 create mode 100644 serving-catalog/core/deployment/vllm/llama3-8b/base/hpa.patch.yaml

diff --git a/serving-catalog/core/deployment/base/hpa.yaml b/serving-catalog/core/deployment/base/hpa.yaml
new file mode 100644
index 0000000..c54850a
--- /dev/null
+++ b/serving-catalog/core/deployment/base/hpa.yaml
@@ -0,0 +1,9 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: singlehost-inference-hpa
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: singlehost-inference-deployment-blueprint
diff --git a/serving-catalog/core/deployment/base/kustomization.yaml b/serving-catalog/core/deployment/base/kustomization.yaml
index 3097586..72ce5bc 100644
--- a/serving-catalog/core/deployment/base/kustomization.yaml
+++ b/serving-catalog/core/deployment/base/kustomization.yaml
@@ -4,3 +4,4 @@ kind: Kustomization
 
 resources:
 - deployment.yaml
+# - hpa.yaml # HPA is still a work-in-progress
diff --git a/serving-catalog/core/deployment/jetstream/base/hpa.patch.yaml b/serving-catalog/core/deployment/jetstream/base/hpa.patch.yaml
new file mode 100644
index 0000000..aa440c6
--- /dev/null
+++ b/serving-catalog/core/deployment/jetstream/base/hpa.patch.yaml
@@ -0,0 +1,15 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: jetstream-hpa-blueprint
+spec:
+  minReplicas: 1
+  maxReplicas: 10
+  metrics:
+  - type: Pods
+    pods:
+      metric:
+        name: jetstream-token-latency-ms
+      target:
+        type: AverageValue
+        averageValue: 50
diff --git a/serving-catalog/core/deployment/jetstream/base/kustomization.yaml b/serving-catalog/core/deployment/jetstream/base/kustomization.yaml
index 8dffffe..228e759 100644
--- a/serving-catalog/core/deployment/jetstream/base/kustomization.yaml
+++ b/serving-catalog/core/deployment/jetstream/base/kustomization.yaml
@@ -13,3 +13,9 @@ patches:
       group: apps
       version: v1
       kind: Deployment
+  # HPA is still a work-in-progress
+  # - path: hpa.patch.yaml
+  #   target:
+  #     group: autoscaling
+  #     version: v2
+  #     kind: HorizontalPodAutoscaler
diff --git a/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/hpa.patch.yaml b/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/hpa.patch.yaml
new file mode 100644
index 0000000..0901bb8
--- /dev/null
+++ b/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/hpa.patch.yaml
@@ -0,0 +1,6 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  labels:
+    app: gemma-7b-it-jetstream-inference-server
+  name: gemma-7b-it-jetstream-hpa
diff --git a/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/kustomization.yaml b/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/kustomization.yaml
index b235d6f..a14bc77 100644
--- a/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/kustomization.yaml
+++ b/serving-catalog/core/deployment/jetstream/gemma-7b-it/base/kustomization.yaml
@@ -11,6 +11,12 @@ patches:
       kind: Deployment
     options:
       allowNameChange: true
+  # HPA is still a work-in-progress
+  # - path: hpa.patch.yaml
+  #   target:
+  #     kind: HorizontalPodAutoscaler
+  #   options:
+  #     allowNameChange: true
   - path: job.patch.yaml
     target:
       kind: Job
diff --git a/serving-catalog/core/deployment/jetstream/llama3-8b/base/hpa.patch.yaml b/serving-catalog/core/deployment/jetstream/llama3-8b/base/hpa.patch.yaml
new file mode 100644
index 0000000..d3553d9
--- /dev/null
+++ b/serving-catalog/core/deployment/jetstream/llama3-8b/base/hpa.patch.yaml
@@ -0,0 +1,6 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  labels:
+    app: llama3-8b-jetstream-inference-server
+  name: llama3-8b-jetstream-hpa
diff --git a/serving-catalog/core/deployment/jetstream/llama3-8b/base/kustomization.yaml b/serving-catalog/core/deployment/jetstream/llama3-8b/base/kustomization.yaml
index b235d6f..a14bc77 100644
--- a/serving-catalog/core/deployment/jetstream/llama3-8b/base/kustomization.yaml
+++ b/serving-catalog/core/deployment/jetstream/llama3-8b/base/kustomization.yaml
@@ -11,6 +11,12 @@ patches:
       kind: Deployment
     options:
       allowNameChange: true
+  # HPA is still a work-in-progress
+  # - path: hpa.patch.yaml
+  #   target:
+  #     kind: HorizontalPodAutoscaler
+  #   options:
+  #     allowNameChange: true
   - path: job.patch.yaml
     target:
       kind: Job
diff --git a/serving-catalog/core/deployment/vllm/base/hpa.patch.yaml b/serving-catalog/core/deployment/vllm/base/hpa.patch.yaml
new file mode 100644
index 0000000..518622c
--- /dev/null
+++ b/serving-catalog/core/deployment/vllm/base/hpa.patch.yaml
@@ -0,0 +1,15 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: vllm-hpa-blueprint
+spec:
+  minReplicas: 1
+  maxReplicas: 10
+  metrics:
+  - type: Pods
+    pods:
+      metric:
+        name: vllm-token-latency-ms
+      target:
+        type: AverageValue
+        averageValue: 50
diff --git a/serving-catalog/core/deployment/vllm/base/kustomization.yaml b/serving-catalog/core/deployment/vllm/base/kustomization.yaml
index 6a4a488..7ad4ed6 100644
--- a/serving-catalog/core/deployment/vllm/base/kustomization.yaml
+++ b/serving-catalog/core/deployment/vllm/base/kustomization.yaml
@@ -12,3 +12,9 @@ patches:
       group: apps
       version: v1
       kind: Deployment
+  # HPA is still a work-in-progress
+  # - path: hpa.patch.yaml
+  #   target:
+  #     group: autoscaling
+  #     version: v2
+  #     kind: HorizontalPodAutoscaler
diff --git a/serving-catalog/core/deployment/vllm/gemma-2b/base/hpa.patch.yaml b/serving-catalog/core/deployment/vllm/gemma-2b/base/hpa.patch.yaml
new file mode 100644
index 0000000..c54d08e
--- /dev/null
+++ b/serving-catalog/core/deployment/vllm/gemma-2b/base/hpa.patch.yaml
@@ -0,0 +1,6 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  labels:
+    app: gemma-2b-vllm-inference-server
+  name: gemma-2b-vllm-hpa
diff --git a/serving-catalog/core/deployment/vllm/gemma-2b/base/kustomization.yaml b/serving-catalog/core/deployment/vllm/gemma-2b/base/kustomization.yaml
index f5845bd..17bf17e 100644
--- a/serving-catalog/core/deployment/vllm/gemma-2b/base/kustomization.yaml
+++ b/serving-catalog/core/deployment/vllm/gemma-2b/base/kustomization.yaml
@@ -11,6 +11,12 @@ patches:
       kind: Deployment
     options:
       allowNameChange: true
+  # HPA is still a work-in-progress
+  # - path: hpa.patch.yaml
+  #   target:
+  #     kind: HorizontalPodAutoscaler
+  #   options:
+  #     allowNameChange: true
   - path: service.patch.yaml
     target:
       kind: Service
diff --git a/serving-catalog/core/deployment/vllm/llama3-8b/base/hpa.patch.yaml b/serving-catalog/core/deployment/vllm/llama3-8b/base/hpa.patch.yaml
new file mode 100644
index 0000000..dd11ffb
--- /dev/null
+++ b/serving-catalog/core/deployment/vllm/llama3-8b/base/hpa.patch.yaml
@@ -0,0 +1,6 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  labels:
+    app: llama3-8b-vllm-inference-server
+  name: llama3-8b-vllm-hpa
diff --git a/serving-catalog/core/deployment/vllm/llama3-8b/base/kustomization.yaml b/serving-catalog/core/deployment/vllm/llama3-8b/base/kustomization.yaml
index 2dc82cb..3145c75 100644
--- a/serving-catalog/core/deployment/vllm/llama3-8b/base/kustomization.yaml
+++ b/serving-catalog/core/deployment/vllm/llama3-8b/base/kustomization.yaml
@@ -11,6 +11,12 @@ patches:
   path: deployment.patch.yaml
   target:
     kind: Deployment
+# HPA is still a work-in-progress
+# - options:
+#     allowNameChange: true
+#   path: hpa.patch.yaml
+#   target:
+#     kind: HorizontalPodAutoscaler
 - options:
     allowNameChange: true
   path: service.patch.yaml