From 5428b3bf6a463cf0537568b8aaad3aea02bd8e24 Mon Sep 17 00:00:00 2001 From: Sujeeth Jinesh Date: Fri, 19 Apr 2024 00:56:26 +0000 Subject: [PATCH] Prevent Pathways SIGTERMs from counting against backoffLimit --- xpk.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/xpk.py b/xpk.py index 7e98e3b8..d095d92d 100644 --- a/xpk.py +++ b/xpk.py @@ -265,10 +265,20 @@ class CapacityType(enum.Enum): xpk.google.com/workload: {args.workload} spec: backoffLimit: 4 + podFailurePolicy: + rules: + - action: Ignore # one of: Ignore, FailJob, Count + onExitCodes: + containerName: "pathways-worker" + operator: In # one of: In, NotIn + # TODO: verify this is the correct exit code + values: [143] # Don't count SIGTERMed workers against the backoffLimit + onPodConditions: [] completions: {system.vms_per_slice} parallelism: {system.vms_per_slice} template: spec: + restartPolicy: Never terminationGracePeriodSeconds: {args.termination_grace_period_seconds} containers: - args: