From d55d6fad12f46c94d0a398e41d6b66cacb7df8f0 Mon Sep 17 00:00:00 2001 From: Nicolas Pepin-Perreault Date: Fri, 8 Dec 2023 17:53:03 +0100 Subject: [PATCH 1/3] refactor: move all job push experiments together --- .../restart.json => job-push/gateway-restart.json} | 0 .../terminate.json => job-push/gateway-terminate.json} | 0 .../internal/chaos-experiments/camunda-cloud/manifest.yml | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename go-chaos/internal/chaos-experiments/camunda-cloud/{job-push-gateway/restart.json => job-push/gateway-restart.json} (100%) rename go-chaos/internal/chaos-experiments/camunda-cloud/{job-push-gateway/terminate.json => job-push/gateway-terminate.json} (100%) diff --git a/go-chaos/internal/chaos-experiments/camunda-cloud/job-push-gateway/restart.json b/go-chaos/internal/chaos-experiments/camunda-cloud/job-push/gateway-restart.json similarity index 100% rename from go-chaos/internal/chaos-experiments/camunda-cloud/job-push-gateway/restart.json rename to go-chaos/internal/chaos-experiments/camunda-cloud/job-push/gateway-restart.json diff --git a/go-chaos/internal/chaos-experiments/camunda-cloud/job-push-gateway/terminate.json b/go-chaos/internal/chaos-experiments/camunda-cloud/job-push/gateway-terminate.json similarity index 100% rename from go-chaos/internal/chaos-experiments/camunda-cloud/job-push-gateway/terminate.json rename to go-chaos/internal/chaos-experiments/camunda-cloud/job-push/gateway-terminate.json diff --git a/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml b/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml index 8f38344fc..cd7d0ed77 100644 --- a/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml +++ b/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml @@ -39,11 +39,11 @@ experiments: - path: worker-restart/experiment.json clusterPlans: - production-s - - path: job-push-gateway/restart.json + - path: job-push/gateway-restart.json clusterPlans: - production-s minVersion: 8.4 - - path: job-push-gateway/terminate.json + - path: job-push/gateway-terminate.json clusterPlans: - production-s minVersion: 8.4 From e6ab7d87db913ff19f812e7d20e5d929256b5c7c Mon Sep 17 00:00:00 2001 From: Nicolas Pepin-Perreault Date: Fri, 8 Dec 2023 17:57:03 +0100 Subject: [PATCH 2/3] feat: add new job push cluster restart experiment --- .../job-push/cluster-restart.json | 76 +++++++++++++++++++ .../camunda-cloud/manifest.yml | 4 + 2 files changed, 80 insertions(+) create mode 100644 go-chaos/internal/chaos-experiments/camunda-cloud/job-push/cluster-restart.json diff --git a/go-chaos/internal/chaos-experiments/camunda-cloud/job-push/cluster-restart.json b/go-chaos/internal/chaos-experiments/camunda-cloud/job-push/cluster-restart.json new file mode 100644 index 000000000..37ed444c4 --- /dev/null +++ b/go-chaos/internal/chaos-experiments/camunda-cloud/job-push/cluster-restart.json @@ -0,0 +1,76 @@ +{ + "version": "0.1.0", + "title": "Job push cluster restart experiment", + "description": "Job workers with streaming enabled should be fault-tolerant. The worker should receive and complete jobs even if all brokers restart.", + "contributions": { + "performance": "high", + "reliability": "high", + "availability": "high" + }, + "steady-state-hypothesis": { + "title": "Zeebe is alive", + "probes": [ + { + "name": "All pods should be ready", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["verify", "readiness"], + "timeout": 900 + } + }, + { + "name": "Can deploy process model", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["deploy", "process"], + "timeout": 900 + } + }, + { + "name": "Can deploy workers", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["deploy", "worker", "--pollingDelay", "86400000"], + "timeout": 900 + }, + "pauses": { + "after": 5 + } + }, + { + "name": "Should be able to create process instances", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["verify", "instance-creation", "--awaitResult"], + "timeout": 900 + } + } + ] + }, + "method": [ + { + "name": "Restart all brokers", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["restart", "broker", "--all"], + "timeout": 900 + } + } + ], + "rollbacks": [] +} diff --git a/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml b/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml index cd7d0ed77..4048c8c1f 100644 --- a/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml +++ b/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml @@ -47,6 +47,10 @@ experiments: clusterPlans: - production-s minVersion: 8.4 + - path: job-push/cluster-restart.json + clusterPlans: + - production-s + minVersion: 8.4 - path: worker-resilience/gateway-restart.json clusterPlans: - production-s From 9891b49dd54d0ae18cba86e75e8ae3da5a0e6e1b Mon Sep 17 00:00:00 2001 From: Nicolas Pepin-Perreault Date: Mon, 11 Dec 2023 14:41:26 +0100 Subject: [PATCH 3/3] feat: add job push cluster terminate experiment --- .../job-push/cluster-terminate.json | 76 +++++++++++++++++++ .../camunda-cloud/manifest.yml | 4 + 2 files changed, 80 insertions(+) create mode 100644 go-chaos/internal/chaos-experiments/camunda-cloud/job-push/cluster-terminate.json diff --git a/go-chaos/internal/chaos-experiments/camunda-cloud/job-push/cluster-terminate.json b/go-chaos/internal/chaos-experiments/camunda-cloud/job-push/cluster-terminate.json new file mode 100644 index 000000000..ed7cf0937 --- /dev/null +++ b/go-chaos/internal/chaos-experiments/camunda-cloud/job-push/cluster-terminate.json @@ -0,0 +1,76 @@ +{ + "version": "0.1.0", + "title": "Job push cluster restart experiment", + "description": "Job workers with streaming enabled should be fault-tolerant. The worker should receive and complete jobs even if all brokers crash.", + "contributions": { + "performance": "high", + "reliability": "high", + "availability": "high" + }, + "steady-state-hypothesis": { + "title": "Zeebe is alive", + "probes": [ + { + "name": "All pods should be ready", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["verify", "readiness"], + "timeout": 900 + } + }, + { + "name": "Can deploy process model", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["deploy", "process"], + "timeout": 900 + } + }, + { + "name": "Can deploy workers", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["deploy", "worker", "--pollingDelay", "86400000"], + "timeout": 900 + }, + "pauses": { + "after": 5 + } + }, + { + "name": "Should be able to create process instances", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["verify", "instance-creation", "--awaitResult"], + "timeout": 900 + } + } + ] + }, + "method": [ + { + "name": "Terminate all brokers", + "type": "probe", + "tolerance": 0, + "provider": { + "type": "process", + "path": "zbchaos", + "arguments": ["terminate", "broker", "--all"], + "timeout": 900 + } + } + ], + "rollbacks": [] +} diff --git a/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml b/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml index 4048c8c1f..5374a1a99 100644 --- a/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml +++ b/go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml @@ -51,6 +51,10 @@ experiments: clusterPlans: - production-s minVersion: 8.4 + - path: job-push/cluster-terminate.json + clusterPlans: + - production-s + minVersion: 8.4 - path: worker-resilience/gateway-restart.json clusterPlans: - production-s