diff --git a/docs/changelog.md b/docs/changelog.md index 10739029c..d5abeab22 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,10 @@ # Changelog + +### 2024-11-15 + +- Catch ReconcileError and set appropriate message in unit status. + ### 2024-11-13 - Align the README with the one in https://github.com/canonical/is-charms-template-repo. diff --git a/pyproject.toml b/pyproject.toml index f4a49bd2a..8a430a2e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ omit = [ ] [tool.coverage.report] -fail_under = 83 +fail_under = 84 show_missing = true [tool.pytest.ini_options] diff --git a/requirements.txt b/requirements.txt index 8fbcdedc8..e08303adf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ cosl ==0.0.15 # juju 3.1.2.0 depends on pyyaml<=6.0 and >=5.1.2 PyYAML ==6.0.* pyOpenSSL==24.2.1 -github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@6f276fd0fc95ed3550005a0b394225d1b506f7dd +github_runner_manager @ git+https://github.com/canonical/github-runner-manager.git@cfb8f74b93d6f29acce36cfc87ccab6d7c19a344 diff --git a/src/charm.py b/src/charm.py index f3c6858a3..8a53c824a 100755 --- a/src/charm.py +++ b/src/charm.py @@ -7,7 +7,6 @@ # pylint: disable=too-many-lines """Charm for creating and managing GitHub self-hosted runner instances.""" - from utilities import bytes_with_unit_to_kib, execute_command, remove_residual_venv_dirs, retry # This is a workaround for https://bugs.launchpad.net/juju/+bug/2058335 @@ -30,6 +29,7 @@ import ops from charms.data_platform_libs.v0.data_interfaces import DatabaseRequires from charms.grafana_agent.v0.cos_agent import COSAgentProvider +from github_runner_manager.errors import ReconcileError from github_runner_manager.manager.cloud_runner_manager import ( GitHubRunnerConfig, SupportServiceConfig, @@ -114,6 +114,13 @@ RUNNER_MANAGER_USER = "runner-manager" RUNNER_MANAGER_GROUP = "runner-manager" +ACTIVE_STATUS_RECONCILIATION_FAILED_MSG = "Last reconciliation failed." +FAILED_TO_RECONCILE_RUNNERS_MSG = "Failed to reconcile runners" +FAILED_RECONCILE_ACTION_ERR_MSG = ( + "Failed to reconcile runners. Look at the juju logs for more information." +) + + logger = logging.getLogger(__name__) @@ -527,8 +534,7 @@ def _on_start(self, _: StartEvent) -> None: if not self._get_set_image_ready_status(): return runner_scaler = self._get_runner_scaler(state) - runner_scaler.reconcile(state.runner_config.virtual_machines) - self.unit.status = ActiveStatus() + self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines) return runner_manager = self._get_runner_manager(state) @@ -540,14 +546,14 @@ def _on_start(self, _: StartEvent) -> None: self.unit.status = MaintenanceStatus("Starting runners") try: runner_manager.flush(LXDFlushMode.FLUSH_IDLE) - self._reconcile_runners( + self._reconcile_lxd_runners( runner_manager, state.runner_config.virtual_machines, state.runner_config.virtual_machine_resources, ) except RunnerError as err: logger.exception("Failed to start runners") - self.unit.status = MaintenanceStatus(f"Failed to start runners: {err}") + self.unit.status = ActiveStatus(f"Failed to start runners: {err}") return self.unit.status = ActiveStatus() @@ -623,7 +629,7 @@ def _on_upgrade_charm(self, _: UpgradeCharmEvent) -> None: runner_manager = self._get_runner_manager(state) logger.info("Flushing the runners...") runner_manager.flush(LXDFlushMode.FLUSH_BUSY_WAIT_REPO_CHECK) - self._reconcile_runners( + self._reconcile_lxd_runners( runner_manager, state.runner_config.virtual_machines, state.runner_config.virtual_machine_resources, @@ -668,9 +674,10 @@ def _on_config_changed(self, _: ConfigChangedEvent) -> None: # noqa: C901 if state.charm_config.token != self._stored.token: runner_scaler = self._get_runner_scaler(state) runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE) - runner_scaler.reconcile(state.runner_config.virtual_machines) + self._reconcile_openstack_runners( + runner_scaler, state.runner_config.virtual_machines + ) # TODO: 2024-04-12: Flush on token changes. - self.unit.status = ActiveStatus() return self._refresh_firewall(state) @@ -679,7 +686,7 @@ def _on_config_changed(self, _: ConfigChangedEvent) -> None: # noqa: C901 if state.charm_config.token != self._stored.token: runner_manager.flush(LXDFlushMode.FORCE_FLUSH_WAIT_REPO_CHECK) self._stored.token = state.charm_config.token - self._reconcile_runners( + self._reconcile_lxd_runners( runner_manager, state.runner_config.virtual_machines, state.runner_config.virtual_machine_resources, @@ -768,8 +775,7 @@ def _trigger_reconciliation(self) -> None: if not self._get_set_image_ready_status(): return runner_scaler = self._get_runner_scaler(state) - runner_scaler.reconcile(state.runner_config.virtual_machines) - self.unit.status = ActiveStatus() + self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines) return runner_manager = self._get_runner_manager(state) @@ -782,7 +788,7 @@ def _trigger_reconciliation(self) -> None: if all(not info.busy for info in runner_info): self._update_kernel(now=True) - self._reconcile_runners( + self._reconcile_lxd_runners( runner_manager, state.runner_config.virtual_machines, state.runner_config.virtual_machine_resources, @@ -859,7 +865,15 @@ def _on_reconcile_runners_action(self, event: ActionEvent) -> None: return runner_scaler = self._get_runner_scaler(state) - delta = runner_scaler.reconcile(state.runner_config.virtual_machines) + self.unit.status = MaintenanceStatus("Reconciling runners") + try: + delta = runner_scaler.reconcile(state.runner_config.virtual_machines) + except ReconcileError: + logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG) + self.unit.status = ActiveStatus(ACTIVE_STATUS_RECONCILIATION_FAILED_MSG) + event.fail(FAILED_RECONCILE_ACTION_ERR_MSG) + return + self.unit.status = ActiveStatus() event.set_results({"delta": {"virtual-machines": delta}}) return @@ -870,7 +884,7 @@ def _on_reconcile_runners_action(self, event: ActionEvent) -> None: runner_manager, state.charm_config.token, state.proxy_config ) - delta = self._reconcile_runners( + delta = self._reconcile_lxd_runners( runner_manager, state.runner_config.virtual_machines, state.runner_config.virtual_machine_resources, @@ -893,14 +907,22 @@ def _on_flush_runners_action(self, event: ActionEvent) -> None: runner_scaler = self._get_runner_scaler(state) flushed = runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE) logger.info("Flushed %s runners", flushed) - delta = runner_scaler.reconcile(state.runner_config.virtual_machines) + self.unit.status = MaintenanceStatus("Reconciling runners") + try: + delta = runner_scaler.reconcile(state.runner_config.virtual_machines) + except ReconcileError: + logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG) + self.unit.status = ActiveStatus(ACTIVE_STATUS_RECONCILIATION_FAILED_MSG) + event.fail(FAILED_RECONCILE_ACTION_ERR_MSG) + return + self.unit.status = ActiveStatus() event.set_results({"delta": {"virtual-machines": delta}}) return runner_manager = self._get_runner_manager(state) runner_manager.flush(LXDFlushMode.FLUSH_BUSY_WAIT_REPO_CHECK) - delta = self._reconcile_runners( + delta = self._reconcile_lxd_runners( runner_manager, state.runner_config.virtual_machines, state.runner_config.virtual_machine_resources, @@ -946,10 +968,10 @@ def _on_stop(self, _: StopEvent) -> None: runner_manager = self._get_runner_manager(state) runner_manager.flush(LXDFlushMode.FLUSH_BUSY) - def _reconcile_runners( + def _reconcile_lxd_runners( self, runner_manager: LXDRunnerManager, num: int, resources: VirtualMachineResources ) -> Dict[str, Any]: - """Reconcile the current runners state and intended runner state. + """Reconcile the current runners state and intended runner state for LXD mode. Args: runner_manager: For querying and managing the runner state. @@ -975,6 +997,22 @@ def _reconcile_runners( self.unit.status = ActiveStatus() return {"delta": {"virtual-machines": delta_virtual_machines}} + def _reconcile_openstack_runners(self, runner_scaler: RunnerScaler, num: int) -> None: + """Reconcile the current runners state and intended runner state for OpenStack mode. + + Args: + runner_scaler: Scaler used to scale the amount of runners. + num: Target number of runners. + """ + self.unit.status = MaintenanceStatus("Reconciling runners") + try: + runner_scaler.reconcile(num) + except ReconcileError: + logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG) + self.unit.status = ActiveStatus(ACTIVE_STATUS_RECONCILIATION_FAILED_MSG) + else: + self.unit.status = ActiveStatus() + def _install_repo_policy_compliance(self, proxy_config: ProxyConfig) -> bool: """Install latest version of repo_policy_compliance service. @@ -1193,15 +1231,17 @@ def _on_debug_ssh_relation_changed(self, _: ops.RelationChangedEvent) -> None: if not self._get_set_image_ready_status(): return runner_scaler = self._get_runner_scaler(state) - # TODO: 2024-04-12: Should be flush idle. runner_scaler.flush() - runner_scaler.reconcile(state.runner_config.virtual_machines) + try: + runner_scaler.reconcile(state.runner_config.virtual_machines) + except ReconcileError: + logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG) return self._refresh_firewall(state) runner_manager = self._get_runner_manager(state) runner_manager.flush(LXDFlushMode.FLUSH_IDLE) - self._reconcile_runners( + self._reconcile_lxd_runners( runner_manager, state.runner_config.virtual_machines, state.runner_config.virtual_machine_resources, @@ -1240,9 +1280,7 @@ def _on_image_relation_changed(self, _: ops.RelationChangedEvent) -> None: runner_scaler = self._get_runner_scaler(state) runner_scaler.flush(flush_mode=FlushMode.FLUSH_IDLE) - runner_scaler.reconcile(state.runner_config.virtual_machines) - self.unit.status = ActiveStatus() - return + self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines) def _get_set_image_ready_status(self) -> bool: """Check if image is ready for Openstack and charm status accordingly. diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index f2451686f..256576a8f 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -12,11 +12,19 @@ import pytest import yaml +from github_runner_manager.errors import ReconcileError +from github_runner_manager.manager.runner_scaler import RunnerScaler from github_runner_manager.types_.github import GitHubOrg, GitHubRepo, GitHubRunnerStatus from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, StatusBase, WaitingStatus from ops.testing import Harness -from charm import GithubRunnerCharm, catch_action_errors, catch_charm_errors +from charm import ( + ACTIVE_STATUS_RECONCILIATION_FAILED_MSG, + FAILED_RECONCILE_ACTION_ERR_MSG, + GithubRunnerCharm, + catch_action_errors, + catch_charm_errors, +) from charm_state import ( GROUP_CONFIG_NAME, IMAGE_INTEGRATION_NAME, @@ -296,6 +304,27 @@ def test_on_flush_runners_action_fail(harness: Harness, runner_binary_path: Path ) +def test_on_flush_runners_reconcile_error_fail(harness: Harness): + """ + arrange: Set up charm with Openstack mode and ReconcileError. + act: Run flush runner action. + assert: Action fails with generic message and goes in ActiveStatus. + """ + state_mock = MagicMock() + state_mock.instance_type = InstanceType.OPENSTACK + harness.charm._setup_state = MagicMock(return_value=state_mock) + + runner_scaler_mock = MagicMock(spec=RunnerScaler) + runner_scaler_mock.reconcile.side_effect = ReconcileError("mock error") + harness.charm._get_runner_scaler = MagicMock(return_value=runner_scaler_mock) + + mock_event = MagicMock() + harness.charm._on_flush_runners_action(mock_event) + mock_event.fail.assert_called_with(FAILED_RECONCILE_ACTION_ERR_MSG) + assert harness.charm.unit.status.name == ActiveStatus.name + assert harness.charm.unit.status.message == ACTIVE_STATUS_RECONCILIATION_FAILED_MSG + + def test_on_flush_runners_action_success(harness: Harness, runner_binary_path: Path): """ arrange: Set up charm without runner binary downloaded. @@ -308,6 +337,61 @@ def test_on_flush_runners_action_success(harness: Harness, runner_binary_path: P mock_event.set_results.assert_called() +def test_on_reconcile_runners_action_reconcile_error_fail( + harness: Harness, monkeypatch: pytest.MonkeyPatch +): + """ + arrange: Set up charm with Openstack mode, ready image, and ReconcileError. + act: Run reconcile runners action. + assert: Action fails with generic message and goes in ActiveStatus + """ + state_mock = MagicMock() + state_mock.instance_type = InstanceType.OPENSTACK + harness.charm._setup_state = MagicMock(return_value=state_mock) + + runner_scaler_mock = MagicMock(spec=RunnerScaler) + runner_scaler_mock.reconcile.side_effect = ReconcileError("mock error") + harness.charm._get_runner_scaler = MagicMock(return_value=runner_scaler_mock) + monkeypatch.setattr( + OpenstackImage, + "from_charm", + MagicMock(return_value=OpenstackImage(id="test", tags=["test"])), + ) + + mock_event = MagicMock() + harness.charm._on_reconcile_runners_action(mock_event) + + mock_event.fail.assert_called_with(FAILED_RECONCILE_ACTION_ERR_MSG) + assert harness.charm.unit.status.name == ActiveStatus.name + assert harness.charm.unit.status.message == ACTIVE_STATUS_RECONCILIATION_FAILED_MSG + + +def test_on_reconcile_runners_reconcile_error(harness: Harness, monkeypatch: pytest.MonkeyPatch): + """ + arrange: Set up charm with Openstack mode, ready image, and ReconcileError. + act: Trigger reconcile_runners event. + assert: Unit goes into ActiveStatus with error message. + """ + state_mock = MagicMock() + state_mock.instance_type = InstanceType.OPENSTACK + harness.charm._setup_state = MagicMock(return_value=state_mock) + + runner_scaler_mock = MagicMock(spec=RunnerScaler) + runner_scaler_mock.reconcile.side_effect = ReconcileError("mock error") + harness.charm._get_runner_scaler = MagicMock(return_value=runner_scaler_mock) + monkeypatch.setattr( + OpenstackImage, + "from_charm", + MagicMock(return_value=OpenstackImage(id="test", tags=["test"])), + ) + + mock_event = MagicMock() + harness.charm._on_reconcile_runners(mock_event) + + assert harness.charm.unit.status.name == ActiveStatus.name + assert harness.charm.unit.status.message == ACTIVE_STATUS_RECONCILIATION_FAILED_MSG + + @pytest.mark.parametrize( "hook", [ @@ -653,11 +737,9 @@ def test_on_start_failure(self, run, wt, mkdir, rm): harness.update_config({PATH_CONFIG_NAME: "mockorg/repo", TOKEN_CONFIG_NAME: "mocktoken"}) harness.begin() - harness.charm._reconcile_runners = raise_runner_error + harness.charm._reconcile_lxd_runners = raise_runner_error harness.charm.on.start.emit() - assert harness.charm.unit.status == MaintenanceStatus( - "Failed to start runners: mock error" - ) + assert harness.charm.unit.status == ActiveStatus("Failed to start runners: mock error") @patch("charm.LXDRunnerManager") @patch("charm.RunnerScaler")