Skip to content

Commit

Permalink
Experiment: streaming job worker should be resilient to full cluster …
Browse files Browse the repository at this point in the history
…restart (brokers) (#454)

Experiment: streaming job worker should be resilient to full cluster restart (brokers)
  • Loading branch information
npepinpe authored Dec 11, 2023
2 parents 95e3c9e + 9891b49 commit 0f2935f
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"version": "0.1.0",
"title": "Job push cluster restart experiment",
"description": "Job workers with streaming enabled should be fault-tolerant. The worker should receive and complete jobs even if all brokers restart.",
"contributions": {
"performance": "high",
"reliability": "high",
"availability": "high"
},
"steady-state-hypothesis": {
"title": "Zeebe is alive",
"probes": [
{
"name": "All pods should be ready",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["verify", "readiness"],
"timeout": 900
}
},
{
"name": "Can deploy process model",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["deploy", "process"],
"timeout": 900
}
},
{
"name": "Can deploy workers",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["deploy", "worker", "--pollingDelay", "86400000"],
"timeout": 900
},
"pauses": {
"after": 5
}
},
{
"name": "Should be able to create process instances",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["verify", "instance-creation", "--awaitResult"],
"timeout": 900
}
}
]
},
"method": [
{
"name": "Restart all brokers",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["restart", "broker", "--all"],
"timeout": 900
}
}
],
"rollbacks": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"version": "0.1.0",
"title": "Job push cluster restart experiment",
"description": "Job workers with streaming enabled should be fault-tolerant. The worker should receive and complete jobs even if all brokers crash.",
"contributions": {
"performance": "high",
"reliability": "high",
"availability": "high"
},
"steady-state-hypothesis": {
"title": "Zeebe is alive",
"probes": [
{
"name": "All pods should be ready",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["verify", "readiness"],
"timeout": 900
}
},
{
"name": "Can deploy process model",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["deploy", "process"],
"timeout": 900
}
},
{
"name": "Can deploy workers",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["deploy", "worker", "--pollingDelay", "86400000"],
"timeout": 900
},
"pauses": {
"after": 5
}
},
{
"name": "Should be able to create process instances",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["verify", "instance-creation", "--awaitResult"],
"timeout": 900
}
}
]
},
"method": [
{
"name": "Terminate all brokers",
"type": "probe",
"tolerance": 0,
"provider": {
"type": "process",
"path": "zbchaos",
"arguments": ["terminate", "broker", "--all"],
"timeout": 900
}
}
],
"rollbacks": []
}
12 changes: 10 additions & 2 deletions go-chaos/internal/chaos-experiments/camunda-cloud/manifest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,19 @@ experiments:
- path: worker-restart/experiment.json
clusterPlans:
- production-s
- path: job-push-gateway/restart.json
- path: job-push/gateway-restart.json
clusterPlans:
- production-s
minVersion: 8.4
- path: job-push-gateway/terminate.json
- path: job-push/gateway-terminate.json
clusterPlans:
- production-s
minVersion: 8.4
- path: job-push/cluster-restart.json
clusterPlans:
- production-s
minVersion: 8.4
- path: job-push/cluster-terminate.json
clusterPlans:
- production-s
minVersion: 8.4
Expand Down

0 comments on commit 0f2935f

Please sign in to comment.