Skip to content

Commit

Permalink
improvements to the chart (#716)
Browse files Browse the repository at this point in the history
  • Loading branch information
noyoshi authored Jan 13, 2025
1 parent 095e892 commit 7195d25
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 65 deletions.
19 changes: 9 additions & 10 deletions charts/lorax/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
apiVersion: v2
name: lorax
description: LoRAX is the open-source framework for serving
description: LoRAX is the open-source framework for serving
hundreds of fine-tuned LLMs in production for the price of one.
version: 0.3.0
version: 0.4.0
appVersion: 0.3.0

home: https://github.com/predibase/lorax
Expand All @@ -11,15 +11,14 @@ annotations:
artifacthub.io/category: ai-machine-learning

keywords:
- lorax
- llama
- llm
- predibase
- lorax
- llama
- llm
- predibase

maintainers:
- email: [email protected]
name: Predibase
- email: [email protected]
name: Predibase

sources:
- https://github.com/predibase/lorax

- https://github.com/predibase/lorax
25 changes: 8 additions & 17 deletions charts/lorax/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,16 @@ spec:
{{- end }}
containers:
- args:
- --model-id
- {{ .Values.deployment.args.modelId }}
- --max-input-length
- {{ .Values.deployment.args.maxInputLength | quote }}
- --max-total-tokens
- {{ .Values.deployment.args.maxTotalTokens | quote }}
- --max-batch-total-tokens
- {{ .Values.deployment.args.maxBatchTotalTokens | quote }}
- --max-batch-prefill-tokens
- {{ .Values.deployment.args.maxBatchPrefillTokens | quote }}
- --sharded
- {{ .Values.deployment.args.sharded | quote }}
- --eager-prefill
- {{ .Values.deployment.args.eagerPrefill | quote }}
{{- range .Values.deployment.args }}
- {{ .name }}
{{- if .value }}
- {{ .value | quote }}
{{- end }}
{{- end }}
env:
- name: PORT
value: "8000"
- name: HUGGING_FACE_HUB_TOKEN
value: {{ .Values.deployment.env.huggingFaceHubToken | quote }}
{{- toYaml .Values.deployment.env | nindent 8 }}
image: {{ .Values.deployment.image.repository }}:{{ .Values.deployment.image.tag }}
imagePullPolicy: IfNotPresent
livenessProbe: {{ toYaml .Values.deployment.livenessProbe | nindent 10 }}
Expand All @@ -68,7 +59,7 @@ spec:
- mountPath: /dev/shm
name: shm
{{- if .Values.deployment.tolerations }}
tolerations:
tolerations:
{{- toYaml .Values.deployment.tolerations | nindent 6 }}
{{- end }}
nodeSelector: {{ toYaml .Values.deployment.nodeSelector | nindent 8 }}
Expand Down
76 changes: 38 additions & 38 deletions charts/lorax/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,54 +7,55 @@ deployment:
tag: "latest"

args:
modelId: "mistralai/Mistral-7B-Instruct-v0.1"
maxInputLength: 512
maxTotalTokens: 1024
maxBatchTotalTokens: 4096
maxBatchPrefillTokens: 2048
sharded: false
eagerPrefill: false
- name: "--model-id"
value: "mistralai/Mistral-7B-Instruct-v0.1"
- name: "--max-input-length"
value: "512"
- name: "--max-total-tokens"
value: "1024"
- name: "--max-batch-total-tokens"
value: "4096"
- name: "--max-batch-prefill-tokens"
value: "2048"
- name: "--eager-prefill"
value: "false"
- name: "--compile"
value: "" # --complie does not take a second argument

env:
# Your huggingface hub token. Required for some models such as the llama-2 family.
huggingFaceHubToken: ""

# Model types that support dynamic adapter loading
loraxEnabledModelTypes: "llama,mistral"
- name: "HUGGING_FACE_HUB_TOKEN"
value: ""

resources:
limits:
cpu: "8"
ephemeral-storage: 100Gi
memory: 27041Mi
nvidia.com/gpu: "1"
requests:
cpu: "8"
ephemeral-storage: 100Gi
memory: 27041Mi
nvidia.com/gpu: "1"

livenessProbe:
failureThreshold: 240
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
livenessProbe:
{}
# failureThreshold: 240
# httpGet:
# path: /health
# port: http
# scheme: HTTP
# initialDelaySeconds: 5
# periodSeconds: 5
# successThreshold: 1
# timeoutSeconds: 1

readinessProbe:
failureThreshold: 600
httpGet:
path: /health
port: http
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 1
readinessProbe:
{}
# failureThreshold: 600
# httpGet:
# path: /health
# port: http
# scheme: HTTP
# initialDelaySeconds: 5
# periodSeconds: 5
# successThreshold: 1
# timeoutSeconds: 1

nodeSelector: {}
tolerations: []
Expand All @@ -72,4 +73,3 @@ service:
serviceType: ClusterIP
port: 80
additionalLabels: {}

0 comments on commit 7195d25

Please sign in to comment.