Skip to content

Commit

Permalink
* Remove host_health alert rule file
Browse files Browse the repository at this point in the history
* Inject generic alert rules via cos_agent
  • Loading branch information
MichaelThamm committed Jan 13, 2025
1 parent 20af835 commit 0b814d4
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 26 deletions.
36 changes: 35 additions & 1 deletion lib/charms/grafana_agent/v0/cos_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ class _MetricsEndpointDict(TypedDict):

LIBID = "dc15fa84cef84ce58155fb84f6c6213a"
LIBAPI = 0
LIBPATCH = 12
LIBPATCH = 13

PYDEPS = ["cosl", "pydantic"]

Expand All @@ -266,6 +266,39 @@ class _MetricsEndpointDict(TypedDict):
logger = logging.getLogger(__name__)
SnapEndpoint = namedtuple("SnapEndpoint", "owner, name")

GENERIC_ALERT_RULES_GROUP = {
"groups": [
{
"name": "HostHealth",
"rules": [
{
"alert": "HostDown",
"expr": "up < 1",
"for": "5m",
"labels": {"severity": "critical"},
"annotations": {
"summary": "Host '{{ $labels.instance }}' is down.",
"description": """Host '{{ $labels.instance }}' is down, failed to scrape.
VALUE = {{ $value }}
LABELS = {{ $labels }}""",
},
},
{
"alert": "HostMetricsMissing",
"expr": "absent(up)",
"for": "5m",
"labels": {"severity": "critical"},
"annotations": {
"summary": "Metrics not received from host '{{ $labels.instance }}', failed to remote write.",
"description": """Metrics not received from host '{{ $labels.instance }}', failed to remote write.
VALUE = {{ $value }}
LABELS = {{ $labels }}""",
},
},
],
}
]
}

# Note: MutableMapping is imported from the typing module and not collections.abc
# because subscripting collections.abc.MutableMapping was added in python 3.9, but
Expand Down Expand Up @@ -726,6 +759,7 @@ def _metrics_alert_rules(self) -> Dict:
query_type="promql", topology=JujuTopology.from_charm(self._charm)
)
alert_rules.add_path(self._metrics_rules, recursive=self._recursive)
alert_rules.add(GENERIC_ALERT_RULES_GROUP, group_name_prefix=JujuTopology.from_charm(self._charm).identifier)
return alert_rules.as_dict()

@property
Expand Down
25 changes: 0 additions & 25 deletions src/prometheus_alert_rules/host_health.rules

This file was deleted.

0 comments on commit 0b814d4

Please sign in to comment.