diff --git a/src/charm.py b/src/charm.py index f430e17761..b83b6f8a1f 100755 --- a/src/charm.py +++ b/src/charm.py @@ -354,7 +354,12 @@ def _on_leader_elected(self, event: LeaderElectedEvent) -> None: self.set_secret("app", REWIND_PASSWORD_KEY, new_password()) # Create resources and add labels needed for replication. - self._create_resources() + try: + self._create_resources() + except ApiError: + logger.exception("failed to create k8s resources") + self.unit.status = BlockedStatus("failed to create k8s resources") + return # Add this unit to the list of cluster members # (the cluster should start with only this member). @@ -436,25 +441,8 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None: event.defer() return - if self.unit.is_leader(): - # Add the labels needed for replication in this pod. - # This also enables the member as part of the cluster. - try: - self._patch_pod_labels(self._unit) - except ApiError as e: - logger.error("failed to patch pod") - self.unit.status = BlockedStatus(f"failed to patch pod with error {e}") - return - - if not self._patroni.primary_endpoint_ready: - logger.debug( - "Deferring on_postgresql_pebble_ready: Waiting for primary endpoint to be ready" - ) - self.unit.status = WaitingStatus("awaiting for primary endpoint to be ready") - event.defer() - return - - self._initialize_cluster() + if self.unit.is_leader() and not self._initialize_cluster(event): + return # Update the archive command and replication configurations. self.update_config() @@ -462,7 +450,32 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None: # All is well, set an ActiveStatus. self.unit.status = ActiveStatus() - def _initialize_cluster(self) -> None: + def _initialize_cluster(self, event: WorkloadEvent) -> bool: + # Add the labels needed for replication in this pod. + # This also enables the member as part of the cluster. + try: + self._patch_pod_labels(self._unit) + except ApiError as e: + logger.error("failed to patch pod") + self.unit.status = BlockedStatus(f"failed to patch pod with error {e}") + return False + + # Create resources and add labels needed for replication + try: + self._create_resources() + except ApiError: + logger.exception("failed to create k8s resources") + self.unit.status = BlockedStatus("failed to create k8s resources") + return False + + if not self._patroni.primary_endpoint_ready: + logger.debug( + "Deferring on_postgresql_pebble_ready: Waiting for primary endpoint to be ready" + ) + self.unit.status = WaitingStatus("awaiting for primary endpoint to be ready") + event.defer() + return False + # Create the backup user. if BACKUP_USER not in self.postgresql.list_users(): self.postgresql.create_user(BACKUP_USER, new_password(), admin=True) @@ -470,6 +483,8 @@ def _initialize_cluster(self) -> None: # Mark the cluster as initialised. self._peers.data[self.app]["cluster_initialised"] = "True" + return True + @property def is_blocked(self) -> bool: """Returns whether the unit is in a blocked state.""" @@ -529,14 +544,18 @@ def _create_resources(self) -> None: client.replace(resource) else: logger.error("failed to create resource: %s.", str(resource.to_dict())) - self.unit.status = BlockedStatus(f"failed to create services {e}") - return + raise e @property def _has_blocked_status(self) -> bool: """Returns whether the unit is in a blocked state.""" return isinstance(self.unit.status, BlockedStatus) + @property + def _has_waiting_status(self) -> bool: + """Returns whether the unit is in a waiting state.""" + return isinstance(self.unit.status, WaitingStatus) + def _on_get_password(self, event: ActionEvent) -> None: """Returns the password for a user as an action response. @@ -658,8 +677,8 @@ def _on_update_status(self, _) -> None: logger.debug("on_update_status early exit: Cannot connect to container") return - if self._has_blocked_status: - logger.debug("on_update_status early exit: Unit is in Blocked status") + if self._has_blocked_status or self._has_waiting_status: + logger.debug("on_update_status early exit: Unit is in Blocked/Waiting status") return services = container.pebble.get_services(names=[self._postgresql_service]) diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index d3ae9c96b8..1a96af563f 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -99,6 +99,9 @@ def test_on_leader_elected(self, _, __, _render_postgresql_conf_file, ___): @patch("charm.Patroni.primary_endpoint_ready", new_callable=PropertyMock) @patch("charm.PostgresqlOperatorCharm.update_config") @patch("charm.PostgresqlOperatorCharm.postgresql") + @patch( + "charm.PostgresqlOperatorCharm._create_resources", side_effect=[None, _FakeApiError, None] + ) @patch_network_get(private_address="1.1.1.1") @patch("charm.Patroni.member_started") @patch("charm.PostgresqlOperatorCharm.push_tls_files_to_workload") @@ -112,6 +115,7 @@ def test_on_postgresql_pebble_ready( __, _push_tls_files_to_workload, _member_started, + _create_resources, _postgresql, ___, _primary_endpoint_ready, @@ -134,6 +138,10 @@ def test_on_postgresql_pebble_ready( _create_pgdata.assert_called_once() self.assertTrue(isinstance(self.harness.model.unit.status, WaitingStatus)) + # Check for a Blocked status when a failure happens . + self.harness.container_pebble_ready(self._postgresql_container) + self.assertTrue(isinstance(self.harness.model.unit.status, BlockedStatus)) + # Check for the Active status. _push_tls_files_to_workload.reset_mock() self.harness.container_pebble_ready(self._postgresql_container)