From ad1cdfe80ea4f6a1478a7a4cb65029a00cf2922e Mon Sep 17 00:00:00 2001 From: Leon <82407168+sed-i@users.noreply.github.com> Date: Fri, 20 May 2022 19:14:55 +0000 Subject: [PATCH] Add tests covering multi-unit prometheus deployments (#274) * Delete repeating tests * Add upgrade, rescale tests * Add tests for rescaling while upgrading * Verify timeseries continuity Co-authored-by: Ryan Barry --- .github/workflows/release-edge.yaml | 2 +- metadata.yaml | 2 +- tests/integration/helpers.py | 37 +- .../prometheus-tester/src/charm.py | 22 +- tests/integration/test_alternative_images.py | 33 -- tests/integration/test_charm.py | 12 +- .../test_prometheus_scrape_multiunit.py | 385 ++++++++++++++++++ tests/integration/test_prometheus_tester.py | 30 -- tests/integration/workload.py | 98 ++++- tox.ini | 11 +- 10 files changed, 546 insertions(+), 86 deletions(-) delete mode 100644 tests/integration/test_alternative_images.py create mode 100644 tests/integration/test_prometheus_scrape_multiunit.py delete mode 100644 tests/integration/test_prometheus_tester.py diff --git a/.github/workflows/release-edge.yaml b/.github/workflows/release-edge.yaml index 4e2090ff..3d6a62f9 100644 --- a/.github/workflows/release-edge.yaml +++ b/.github/workflows/release-edge.yaml @@ -13,7 +13,7 @@ jobs: - name: Checkout uses: actions/checkout@v2 with: - fetch-depth: 0 + fetch-depth: 0 - name: Check libs uses: canonical/charming-actions/check-libraries@1.0.1-rc with: diff --git a/metadata.yaml b/metadata.yaml index 09896d60..bbe3d0a5 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -10,7 +10,7 @@ description: | supports aggregating high dimensional data and exposes a powerful query language PromQL. This charm deploys and operates Prometheus on Kubernetes clusters. Prometheus can raise alerts through a relation - with the Altermanager charm. Alerting rules for Prometheus need to + with the Alertmanager charm. Alerting rules for Prometheus need to be provided through a relation with the application that requires alerting. Prometheus provides its own dashboard for data visualization but a richer visualization interface may be obtained diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index 8a0a420a..dd51ca68 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -4,6 +4,7 @@ import logging from pathlib import Path +from typing import List import yaml from pytest_operator.plugin import OpsTest @@ -47,6 +48,22 @@ async def check_prometheus_is_ready(ops_test: OpsTest, app_name: str, unit_num: assert is_ready +async def get_head_stats(ops_test: OpsTest, app_name: str, unit_num: int) -> dict: + """Get prometheus head stats. + + Args: + ops_test: pytest-operator plugin + app_name: string name of Prometheus application + unit_num: integer number of a Prometheus juju unit + + Returns: + A dict of headStats. + """ + host = await unit_address(ops_test, app_name, unit_num) + prometheus = Prometheus(host=host) + return await prometheus.tsdb_head_stats() + + async def get_prometheus_config(ops_test: OpsTest, app_name: str, unit_num: int) -> str: """Fetch Prometheus configuration. @@ -64,6 +81,25 @@ async def get_prometheus_config(ops_test: OpsTest, app_name: str, unit_num: int) return config +async def get_prometheus_active_targets( + ops_test: OpsTest, app_name: str, unit_num: int +) -> List[dict]: + """Fetch Prometheus active scrape targets. + + Args: + ops_test: pytest-operator plugin + app_name: string name of Prometheus application + unit_num: integer number of a Prometheus juju unit + + Returns: + Prometheus YAML configuration in string format. + """ + host = await unit_address(ops_test, app_name, unit_num) + prometheus = Prometheus(host=host) + targets = await prometheus.active_targets() + return targets + + async def run_promql(ops_test: OpsTest, promql_query: str, app_name: str, unit_num: int = 0): """Run a PromQL query in Prometheus. @@ -207,7 +243,6 @@ def remove_tester_alert_rule_file(name): """Remove an alert rule file from Prometheus Tester. Args: - rule: a string containing Prometheus alert rule in YAML format. name: a string name of alert rule file """ rules_path = Path(TESTER_ALERT_RULES_PATH).joinpath(name) diff --git a/tests/integration/prometheus-tester/src/charm.py b/tests/integration/prometheus-tester/src/charm.py index 3acf43e2..8cb86ba8 100755 --- a/tests/integration/prometheus-tester/src/charm.py +++ b/tests/integration/prometheus-tester/src/charm.py @@ -11,7 +11,7 @@ from ops.charm import CharmBase from ops.main import main from ops.model import ActiveStatus, BlockedStatus -from ops.pebble import Layer +from ops.pebble import ChangeError, ExecError, Layer logger = logging.getLogger(__name__) @@ -98,8 +98,24 @@ def _install_prometheus_client(self): return process = container.exec([self._pip_path, "install", "prometheus_client"]) - process.wait() - logger.debug("Installed prometheus client") + try: + _, stderr = process.wait_output() + logger.debug("Installed prometheus client") + if stderr: + logger.warning(stderr) + return + + except ExecError as e: + logger.error( + "Failed to install prometheus client: exited with code %d. Stderr:", e.exit_code + ) + for line in e.stderr.splitlines(): + logger.error(" %s", line) + self.unit.status = BlockedStatus("Failed to install prometheus client (see debug-log)") + + except ChangeError as e: + logger.error("Failed to install prometheus client: %s", str(e)) + self.unit.status = BlockedStatus("Failed to install prometheus client (see debug-log)") def _metrics_exporter(self): """Generate the metrics exporter script.""" diff --git a/tests/integration/test_alternative_images.py b/tests/integration/test_alternative_images.py deleted file mode 100644 index e2c6c284..00000000 --- a/tests/integration/test_alternative_images.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. - -import pytest -from helpers import check_prometheus_is_ready, oci_image - -tester_resources = { - "prometheus-tester-image": oci_image( - "./tests/integration/prometheus-tester/metadata.yaml", "prometheus-tester-image" - ) -} -prometheus_resources = {"prometheus-image": oci_image("./metadata.yaml", "prometheus-image")} - - -@pytest.mark.abort_on_fail -async def test_build_and_deploy_with_alternative_images(ops_test, prometheus_charm): - """Test that the Prometheus charm can be deployed successfully.""" - app_name = "prometheus-ubuntu" - - await ops_test.model.deploy( - prometheus_charm, resources=prometheus_resources, application_name=app_name - ) - await ops_test.model.wait_for_idle(apps=[app_name], status="active") - await ops_test.model.block_until(lambda: len(ops_test.model.applications[app_name].units) > 0) - - assert ops_test.model.applications[app_name].units[0].workload_status == "active" - - await check_prometheus_is_ready(ops_test, app_name, 0) - - await ops_test.model.applications[app_name].remove() - await ops_test.model.block_until(lambda: app_name not in ops_test.model.applications) - await ops_test.model.reset() diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py index f564c6dd..40b244b9 100644 --- a/tests/integration/test_charm.py +++ b/tests/integration/test_charm.py @@ -47,17 +47,7 @@ async def test_prometheus_scrape_relation_with_prometheus_tester( ), ) - await ops_test.model.wait_for_idle(apps=app_names, status="active") - - # TODO: Should not be needed. - # Drop once https://github.com/juju/python-libjuju/issues/574 is resolved - # - SA 2021-11-23 - await ops_test.model.block_until( - lambda: ( - len(ops_test.model.applications[prometheus_app_name].units) > 0 - and len(ops_test.model.applications[tester_app_name].units) > 0 - ) - ) + await ops_test.model.wait_for_idle(apps=app_names, status="active", wait_for_units=1) assert initial_workload_is_ready(ops_test, app_names) await check_prometheus_is_ready(ops_test, prometheus_app_name, 0) diff --git a/tests/integration/test_prometheus_scrape_multiunit.py b/tests/integration/test_prometheus_scrape_multiunit.py new file mode 100644 index 00000000..93b13b61 --- /dev/null +++ b/tests/integration/test_prometheus_scrape_multiunit.py @@ -0,0 +1,385 @@ +#!/usr/bin/env python3 +# Copyright 2021 Canonical Ltd. +# See LICENSE file for licensing details. + +"""This test module tests the prometheus_scrape interface with multiple-to-multiple units related. + +This test scaling up/down both sides of the relation, and upgrading. + +1. Deploy several units of prometheus and several units of a "provider" charm, and relate them. +2. Confirm all units of prometheus have the correct and same targets and rules. +3. Upgrade prometheus. +4. Scale prometheus up and down. +5. Scale the "provider" charm up and down. +""" + +import asyncio +import logging + +import pytest +import yaml +from deepdiff import DeepDiff +from helpers import ( + check_prometheus_is_ready, + get_prometheus_active_targets, + get_prometheus_config, + get_prometheus_rules, + oci_image, + run_promql, +) +from pytest_operator.plugin import OpsTest + +logger = logging.getLogger(__name__) + +prometheus_app_name = "prometheus" +# prometheus_resources = {"prometheus-image": oci_image("./metadata.yaml", "prometheus-image")} +prometheus_resources = {"prometheus-image": "prom/prometheus:v2.35.0"} +tester_app_name = "tester" +tester_resources = { + "prometheus-tester-image": oci_image( + "./tests/integration/prometheus-tester/metadata.yaml", + "prometheus-tester-image", + ) +} +num_units = 2 # Using the same number of units for both prometheus and the tester + +# The period of time required to be idle before `wait_for_idle` returns is set to 90 sec because +# unit upgrades were observed to take place 40-70 seconds apart. +idle_period = 90 + + +async def test_setup_env(ops_test: OpsTest): + await ops_test.model.set_config( + {"logging-config": "=WARNING; unit=DEBUG", "update-status-hook-interval": "60m"} + ) + + +@pytest.mark.abort_on_fail +async def test_prometheus_scrape_relation_with_prometheus_tester( + ops_test: OpsTest, prometheus_charm, prometheus_tester_charm +): + """Relate several units of prometheus and several units of the tester charm. + + - Deploy several units of prometheus and several units of a "provider" charm, and relate them. + - Confirm all units of prometheus have the correct and same targets and rules. + """ + app_names = [prometheus_app_name, tester_app_name] + + # GIVEN prometheus and the tester charm are deployed with two units each + + await asyncio.gather( + ops_test.model.deploy( + prometheus_charm, + resources=prometheus_resources, + application_name=prometheus_app_name, + num_units=num_units, + ), + ops_test.model.deploy( + prometheus_tester_charm, + resources=tester_resources, + application_name=tester_app_name, + num_units=num_units, + ), + ) + + await ops_test.model.wait_for_idle(apps=app_names, status="active", wait_for_units=num_units) + await asyncio.gather( + *[check_prometheus_is_ready(ops_test, prometheus_app_name, u) for u in range(num_units)] + ) + + # WHEN prometheus is not related to anything + # THEN all prometheus units should have only one scrape target (self-scraping) + for unit_num in range(num_units): + targets = await get_prometheus_active_targets(ops_test, prometheus_app_name, unit_num) + assert len(targets) == 1 + self_scrape = next(iter(targets)) + assert self_scrape["labels"]["job"] == "prometheus" + assert self_scrape["labels"]["instance"] == "localhost:9090" + + # WHEN prometheus is related to the tester + await ops_test.model.add_relation(prometheus_app_name, tester_app_name) + await ops_test.model.wait_for_idle(apps=app_names, status="active") + + # THEN all prometheus units should have all tester units as targets (as well as self-scraping) + # `targets_by_unit` is a List[List[dict]]: every unit has a List[dict] targets. + targets_by_unit = await asyncio.gather( + *[ + get_prometheus_active_targets(ops_test, prometheus_app_name, u) + for u in range(num_units) + ] + ) + assert all(len(targets) == num_units + 1 for targets in targets_by_unit) + + # AND all prometheus units have the exact same targets + # Only comparing the `labels` because comparing the entire `targets` dict would be cumbersome: + # would need to pop 'lastScrape', 'lastScrapeDuration', whose values may differ across units. + labels = [[{"labels": d["labels"]} for d in unit_targets] for unit_targets in targets_by_unit] + for u in range(1, len(targets_by_unit)): + assert DeepDiff(labels[0], labels[u], ignore_order=True) == {} + # Could use `set`, but that would produce unhelpful error messages. + # assert len(set(map(lambda x: json.dumps(x, sort_keys=True), targets_by_unit))) == 1 + + # AND all prometheus units have the exact same config + config_by_unit = await asyncio.gather( + *[get_prometheus_config(ops_test, prometheus_app_name, u) for u in range(num_units)] + ) + # Convert the yaml strings into dicts + config_by_unit = list(map(yaml.safe_load, config_by_unit)) + + # assert all(config_by_unit[0] == config_by_unit[u] for u in range(1, num_units)) + for u in range(1, num_units): + # assert config_by_unit[0] == config_by_unit[u] + # Exclude 'static_configs.targets', which are expected to differ (different IP address per + # unit) + assert ( + DeepDiff( + config_by_unit[0], + config_by_unit[u], + ignore_order=True, + exclude_regex_paths=r"\['static_configs'\]\['targets'\]|\['labels'\]\['juju_unit'\]", + ) + == {} + ) + + # AND all prometheus units have the exact same rules + rules_by_unit = await asyncio.gather( + *[get_prometheus_rules(ops_test, prometheus_app_name, u) for u in range(num_units)] + ) + for u in range(1, len(rules_by_unit)): + # Some fields will most likely differ, such as "evaluationTime" and "lastEvaluation". + assert ( + DeepDiff( + rules_by_unit[0], + rules_by_unit[u], + ignore_order=True, + exclude_regex_paths=r"evaluationTime|lastEvaluation|activeAt", + ) + == {} + ) + + +@pytest.mark.abort_on_fail +async def test_upgrade_prometheus(ops_test: OpsTest, prometheus_charm): + """Upgrade prometheus and confirm all is still green (see also test_upgrade_charm.py).""" + # GIVEN an existing "up" timeseries + query = 'count_over_time(up{instance="localhost:9090",job="prometheus"}[1y])' + up_before = await asyncio.gather( + *[run_promql(ops_test, query, prometheus_app_name, u) for u in range(num_units)] + ) + # Each response looks like this: + # [ + # { + # "metric":{"instance":"localhost:9090","job":"prometheus"}, + # "value":[1652985131.383,"711"] + # } + # ] + # Extract the count value and convert it to int + up_before = [int(next(iter(response))["value"][1]) for response in up_before] + # Sanity check: make sure it's not empty + assert len(up_before) > 0 + assert all(up_before) + + # WHEN prometheus is upgraded + await ops_test.model.applications[prometheus_app_name].refresh( + path=prometheus_charm, resources=prometheus_resources + ) + + # THEN nothing breaks + await ops_test.model.wait_for_idle(status="active", idle_period=idle_period, timeout=300) + await asyncio.gather( + *[check_prometheus_is_ready(ops_test, prometheus_app_name, u) for u in range(num_units)] + ) + + # AND series continuity is maintained + up_after = await asyncio.gather( + *[run_promql(ops_test, query, prometheus_app_name, u) for u in range(num_units)] + ) + up_after = [int(next(iter(response))["value"][1]) for response in up_after] + # The count after an upgrade must be greater than or equal to the count before the upgrade, for + # every prometheus unit (units start at different times so the count across units may differ). + assert all([up_before[i] <= up_after[i] for i in range(num_units)]) + + +@pytest.mark.abort_on_fail +async def test_rescale_prometheus(ops_test: OpsTest): + # WHEN prometheus is scaled up + num_additional_units = 1 + await ops_test.model.applications[prometheus_app_name].scale(scale_change=num_additional_units) + new_num_units = num_units + num_additional_units + + # THEN nothing breaks + await ops_test.model.wait_for_idle( + apps=[prometheus_app_name], + status="active", + timeout=120, + wait_for_exact_units=new_num_units, + ) + await ops_test.model.wait_for_idle(status="active") + await asyncio.gather( + *[ + check_prometheus_is_ready(ops_test, prometheus_app_name, u) + for u in range(new_num_units) + ] + ) + + # WHEN prometheus is scaled back down + await ops_test.model.applications[prometheus_app_name].scale( + scale_change=-num_additional_units + ) + + # THEN nothing breaks + await ops_test.model.wait_for_idle( + apps=[prometheus_app_name], status="active", timeout=120, wait_for_exact_units=num_units + ) + await ops_test.model.wait_for_idle(status="active") + await asyncio.gather( + *[check_prometheus_is_ready(ops_test, prometheus_app_name, u) for u in range(num_units)] + ) + + +@pytest.mark.abort_on_fail +async def test_rescale_tester(ops_test: OpsTest): + # WHEN tester is scaled up + num_additional_units = 1 + await ops_test.model.applications[tester_app_name].scale(scale_change=num_additional_units) + new_num_units = num_units + num_additional_units + + # THEN nothing breaks + await ops_test.model.wait_for_idle( + apps=[tester_app_name], + status="active", + timeout=120, + wait_for_exact_units=new_num_units, + ) + await ops_test.model.wait_for_idle(status="active") + await asyncio.gather( + *[check_prometheus_is_ready(ops_test, prometheus_app_name, u) for u in range(num_units)] + ) + + # WHEN tester is scaled back down + await ops_test.model.applications[tester_app_name].scale(scale_change=-num_additional_units) + + # THEN nothing breaks + await ops_test.model.wait_for_idle( + apps=[tester_app_name], status="active", timeout=120, wait_for_exact_units=num_units + ) + await ops_test.model.wait_for_idle(status="active") + await asyncio.gather( + *[check_prometheus_is_ready(ops_test, prometheus_app_name, u) for u in range(num_units)] + ) + + +@pytest.mark.abort_on_fail +async def test_upgrade_prometheus_while_rescaling_tester(ops_test: OpsTest, prometheus_charm): + """Upgrade prometheus and rescale tester at the same time (without waiting for idle).""" + # WHEN prometheus is upgraded at the same time that the tester is scaled up + num_additional_units = 1 + + logger.info("Upgrading prometheus and scaling-up tester at the same time...") + await asyncio.gather( + ops_test.model.applications[prometheus_app_name].refresh( + path=prometheus_charm, resources=prometheus_resources + ), + ops_test.model.applications[tester_app_name].scale(scale_change=num_additional_units), + ) + new_num_units = num_units + num_additional_units + + # AND tester becomes active/idle after scale-up + logger.info("Waiting for tester to become active/idle...") + await ops_test.model.wait_for_idle( + apps=[tester_app_name], + status="active", + timeout=300, + wait_for_exact_units=new_num_units, + ) + + # AND all apps become idle after prometheus upgrade + logger.info("Waiting for all apps to become active/idle...") + await ops_test.model.wait_for_idle(status="active", idle_period=idle_period, timeout=300) + + # THEN nothing breaks + await asyncio.gather( + *[check_prometheus_is_ready(ops_test, prometheus_app_name, u) for u in range(num_units)] + ) + + # WHEN prometheus is upgraded at the same time that the tester is scaled back down + logger.info("Upgrading prometheus and scaling-down tester at the same time...") + await asyncio.gather( + ops_test.model.applications[prometheus_app_name].refresh( + path=prometheus_charm, resources=prometheus_resources + ), + ops_test.model.applications[tester_app_name].scale(scale_change=-num_additional_units), + ) + + # AND tester becomes active/idle after scale-down + logger.info("Waiting for tester to become active/idle...") + await ops_test.model.wait_for_idle( + apps=[tester_app_name], status="active", timeout=300, wait_for_exact_units=num_units + ) + + # AND all apps become idle after prometheus upgrade + logger.info("Waiting for all apps to become active/idle...") + await ops_test.model.wait_for_idle(status="active", idle_period=idle_period, timeout=300) + + # THEN nothing breaks + await asyncio.gather( + *[check_prometheus_is_ready(ops_test, prometheus_app_name, u) for u in range(num_units)] + ) + + +@pytest.mark.abort_on_fail +async def test_rescale_prometheus_while_upgrading_tester( + ops_test: OpsTest, prometheus_tester_charm +): + # WHEN prometheus is scaled up at the same time the tester is upgraded + num_additional_units = 1 + await asyncio.gather( + ops_test.model.applications[tester_app_name].refresh( + path=prometheus_tester_charm, resources=tester_resources + ), + ops_test.model.applications[prometheus_app_name].scale(scale_change=num_additional_units), + ) + new_num_units = num_units + num_additional_units + + # AND prometheus becomes active/idle after scale-up + await ops_test.model.wait_for_idle( + apps=[prometheus_app_name], + status="active", + timeout=300, + wait_for_exact_units=new_num_units, + ) + + # AND all apps become idle after tester upgrade + await ops_test.model.wait_for_idle(status="active", idle_period=idle_period, timeout=300) + + # THEN nothing breaks + await asyncio.gather( + *[ + check_prometheus_is_ready(ops_test, prometheus_app_name, u) + for u in range(new_num_units) + ] + ) + + # WHEN prometheus is scaled back down at the same time the tester is upgraded + await asyncio.gather( + ops_test.model.applications[tester_app_name].refresh( + path=prometheus_tester_charm, resources=tester_resources + ), + ops_test.model.applications[prometheus_app_name].scale(scale_change=-num_additional_units), + ) + + # AND prometheus becomes active/idle after scale-down + await ops_test.model.wait_for_idle( + apps=[tester_app_name], + status="active", + timeout=300, + wait_for_exact_units=num_units, + ) + + # AND all apps become idle after tester upgrade + await ops_test.model.wait_for_idle(status="active", idle_period=idle_period, timeout=300) + + # THEN nothing breaks + await asyncio.gather( + *[check_prometheus_is_ready(ops_test, prometheus_app_name, u) for u in range(num_units)] + ) diff --git a/tests/integration/test_prometheus_tester.py b/tests/integration/test_prometheus_tester.py deleted file mode 100644 index c6ebbf30..00000000 --- a/tests/integration/test_prometheus_tester.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2021 Canonical Ltd. -# See LICENSE file for licensing details. - -import pytest -from helpers import oci_image - -tester_resources = { - "prometheus-tester-image": oci_image( - "./tests/integration/prometheus-tester/metadata.yaml", "prometheus-tester-image" - ) -} - - -@pytest.mark.abort_on_fail -async def test_build_and_deploy_prometheus_tester(ops_test, prometheus_tester_charm): - """Test that Prometheus tester charm can be deployed successfully.""" - app_name = "prometheus-tester" - - await ops_test.model.deploy( - prometheus_tester_charm, resources=tester_resources, application_name=app_name - ) - await ops_test.model.wait_for_idle(apps=[app_name], status="active") - await ops_test.model.block_until(lambda: len(ops_test.model.applications[app_name].units) > 0) - - assert ops_test.model.applications[app_name].units[0].workload_status == "active" - - await ops_test.model.applications[app_name].remove() - await ops_test.model.block_until(lambda: app_name not in ops_test.model.applications) - await ops_test.model.reset() diff --git a/tests/integration/workload.py b/tests/integration/workload.py index 3473df5d..cbb1e968 100644 --- a/tests/integration/workload.py +++ b/tests/integration/workload.py @@ -23,6 +23,10 @@ def __init__(self, host="localhost", port=9090): """ self.base_url = f"http://{host}:{port}" + # Set a timeout of 5 second - should be sufficient for all the checks here. + # The default (5 min) prolongs itests unnecessarily. + self.timeout = aiohttp.ClientTimeout(total=5) + async def is_ready(self) -> bool: """Send a GET request to check readiness. @@ -30,7 +34,8 @@ async def is_ready(self) -> bool: True if Prometheus is ready (returned 200 OK); False otherwise. """ url = f"{self.base_url}/-/ready" - async with aiohttp.ClientSession() as session: + + async with aiohttp.ClientSession(timeout=self.timeout) as session: async with session.get(url) as response: return response.status == 200 @@ -41,6 +46,28 @@ async def config(self) -> str: YAML config in string format or empty string """ url = f"{self.base_url}/api/v1/status/config" + # Response looks like this: + # { + # "status": "success", + # "data": { + # "yaml": "global:\n + # scrape_interval: 1m\n + # scrape_timeout: 10s\n + # evaluation_interval: 1m\n + # rule_files:\n + # - /etc/prometheus/rules/juju_*.rules\n + # scrape_configs:\n + # - job_name: prometheus\n + # honor_timestamps: true\n + # scrape_interval: 5s\n + # scrape_timeout: 5s\n + # metrics_path: /metrics\n + # scheme: http\n + # static_configs:\n + # - targets:\n + # - localhost:9090\n" + # } + # } async with aiohttp.ClientSession() as session: async with session.get(url) as response: result = await response.json() @@ -130,6 +157,75 @@ async def alerts(self) -> List[dict]: # } return result["data"]["alerts"] if result["status"] == "success" else [] + async def active_targets(self) -> List[dict]: + """Send a GET request to get active scrape targets. + + Returns: + A lists of targets. + """ + url = f"{self.base_url}/api/v1/targets" + async with aiohttp.ClientSession() as session: + async with session.get(url) as response: + result = await response.json() + # response looks like this: + # + # { + # "status": "success", + # "data": { + # "activeTargets": [ + # { + # "discoveredLabels": { + # "__address__": "localhost:9090", + # "__metrics_path__": "/metrics", + # "__scheme__": "http", + # "job": "prometheus" + # }, + # "labels": { + # "instance": "localhost:9090", + # "job": "prometheus" + # }, + # "scrapePool": "prometheus", + # "scrapeUrl": "http://localhost:9090/metrics", + # "globalUrl": "http://prom-0....local:9090/metrics", + # "lastError": "", + # "lastScrape": "2022-05-12T16:54:19.019386006Z", + # "lastScrapeDuration": 0.003985463, + # "health": "up" + # } + # ], + # "droppedTargets": [] + # } + # } + return result["data"]["activeTargets"] if result["status"] == "success" else [] + + async def tsdb_head_stats(self) -> dict: + """Send a GET request to get the TSDB headStats. + + Returns: + The headStats dict. + """ + url = f"{self.base_url}/api/v1/status/tsdb" + async with aiohttp.ClientSession(timeout=self.timeout) as session: + async with session.get(url) as response: + result = await response.json() + # response looks like this: + # + # { + # "status": "success", + # "data": { + # "headStats": { + # "numSeries": 610, + # "numLabelPairs": 367, + # "chunkCount": 5702, + # "minTime": 1652720232481, + # "maxTime": 1652724527481 + # }, + # "seriesCountByMetricName": [ ... ] + # ... + # } + # } + return result["data"]["headStats"] if result["status"] == "success" else {} + async def run_promql(self, query: str, disable_ssl: bool = True) -> list: prometheus = PrometheusConnect(url=self.base_url, disable_ssl=disable_ssl) return prometheus.custom_query(query=query) diff --git a/tox.ini b/tox.ini index 98147faf..94fb8157 100644 --- a/tox.ini +++ b/tox.ini @@ -85,28 +85,29 @@ deps = -r{toxinidir}/requirements.txt deepdiff commands = - sh -c 'stat promql-transform > /dev/null 2>&1 || curl "https://github.com/canonical/promql-transform/releases/download/2.25.2-1rc1/promql-transform_2.25.2-1rc1_linux_amd64.tar.gz" -L -s | tar zxv promql-transform' + /usr/bin/env sh -c 'stat promql-transform > /dev/null 2>&1 || curl "https://github.com/canonical/promql-transform/releases/download/2.25.2-1rc1/promql-transform_2.25.2-1rc1_linux_amd64.tar.gz" -L -s | tar zxv promql-transform' coverage run \ --source={[vars]src_path},{[vars]lib_path} \ -m pytest -v --tb native --log-cli-level=INFO -s {posargs} {[vars]tst_path}/unit coverage report allowlist_externals = - sh + /usr/bin/env [testenv:integration] description = Run integration tests deps = aiohttp + deepdiff juju pytest pytest-operator prometheus-api-client tenacity commands = - sh -c 'stat promql-transform > /dev/null 2>&1 || curl "https://github.com/canonical/promql-transform/releases/download/2.25.2-1rc1/promql-transform_2.25.2-1rc1_linux_amd64.tar.gz" -L -s | tar zxv promql-transform' - pytest -v --tb native --log-cli-level=INFO --color=yes -s {posargs} {toxinidir}/tests/integration + /usr/bin/env sh -c 'stat promql-transform > /dev/null 2>&1 || curl "https://github.com/canonical/promql-transform/releases/download/2.25.2-1rc1/promql-transform_2.25.2-1rc1_linux_amd64.tar.gz" -L -s | tar zxv promql-transform' + pytest -vv --tb native --log-cli-level=INFO --color=yes -s {posargs} {toxinidir}/tests/integration allowlist_externals = - sh + /usr/bin/env [testenv:integration-lma] description = Run lma bundle integration tests but with prometheus built from source