-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #83 from scality/feature/COSI-65-instrument-cosi-d…
…rover-with-gprc-metrics COSI-65, COSI-46, COSI-21: Add GRPC Metrics Instrumentation and Documentation Updates
- Loading branch information
Showing
20 changed files
with
985 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
LOG_FILE=".github/e2e_tests/artifacts/logs/e2e_tests/metrics_service.log" | ||
mkdir -p "$(dirname "$LOG_FILE")" | ||
|
||
NAMESPACE="scality-object-storage" | ||
SERVICE="scality-cosi-driver-metrics" | ||
LOCAL_PORT=8080 | ||
TARGET_PORT=8080 | ||
|
||
# Declare expected values for each metric as environment variables | ||
EXPECTED_CREATE_BUCKET=${1:-0} | ||
EXPECTED_DELETE_BUCKET=${2:-0} | ||
EXPECTED_GET_INFO=${3:-1} | ||
EXPECTED_GRANT_ACCESS=${4:-0} | ||
EXPECTED_REVOKE_ACCESS=${5:-0} | ||
GRPC_METHOD_TO_TEST="grpc_server_msg_sent_total" | ||
|
||
# Error handling function | ||
error_handler() { | ||
echo "An error occurred during the metrics test. Check the log file for details." | tee -a "$LOG_FILE" | ||
echo "Failed command: $BASH_COMMAND" | tee -a "$LOG_FILE" | ||
exit 1 | ||
} | ||
|
||
# Trap errors and call the error handler | ||
trap 'error_handler' ERR | ||
|
||
# Logging and command execution function | ||
log_and_run() { | ||
echo "Running: $*" | tee -a "$LOG_FILE" | ||
"$@" 2>&1 | tee -a "$LOG_FILE" | ||
} | ||
|
||
# Fetch services and validate the target service exists | ||
log_and_run kubectl get svc --all-namespaces | ||
|
||
# Port-forward the metrics service | ||
log_and_run kubectl port-forward -n "$NAMESPACE" svc/"$SERVICE" "$LOCAL_PORT":"$TARGET_PORT" & | ||
PORT_FORWARD_PID=$! | ||
|
||
# Wait a few seconds to ensure port-forward is established | ||
while ! nc -vz localhost $LOCAL_PORT > /dev/null 2>&1 ; do | ||
# echo sleeping | ||
sleep 0.1 | ||
done | ||
|
||
# Fetch metrics | ||
log_and_run curl -s http://localhost:$LOCAL_PORT/metrics > /tmp/metrics_output.log | ||
log_and_run cat /tmp/metrics_output.log | ||
|
||
log_and_run kill "$PORT_FORWARD_PID" | ||
|
||
|
||
METRICS_OUTPUT=$(cat /tmp/metrics_output.log | grep $GRPC_METHOD_TO_TEST) | ||
echo "gRPC Metrics fetched successfully:" | tee -a "$LOG_FILE" | ||
echo "$METRICS_OUTPUT" | tee -a "$LOG_FILE" | ||
|
||
# Validate metrics | ||
echo "Validating gRPC Server Metrics..." | tee -a "$LOG_FILE" | ||
echo "$METRICS_OUTPUT" | while read -r line; do | ||
# Extract the grpc_method and value | ||
method=$(echo "$line" | sed -n 's/.*grpc_method="\([^"]*\)".*/\1/p') # Extract method name | ||
value=$(echo "$line" | awk '{print $NF}') # Extract value | ||
|
||
# Determine the expected value based on the grpc_method | ||
case "$method" in | ||
"DriverCreateBucket") | ||
expected_value=$EXPECTED_CREATE_BUCKET | ||
;; | ||
"DriverDeleteBucket") | ||
expected_value=$EXPECTED_DELETE_BUCKET | ||
;; | ||
"DriverGetInfo") | ||
expected_value=$EXPECTED_GET_INFO | ||
;; | ||
"DriverGrantBucketAccess") | ||
expected_value=$EXPECTED_GRANT_ACCESS | ||
;; | ||
"DriverRevokeBucketAccess") | ||
expected_value=$EXPECTED_REVOKE_ACCESS | ||
;; | ||
*) | ||
echo "Unknown method: $method. Skipping validation." | tee -a "$LOG_FILE" | ||
continue | ||
;; | ||
esac | ||
|
||
# Display method, value, and expected value | ||
echo "Method: $method, Value: $value, Expected: $expected_value" | tee -a "$LOG_FILE" | ||
|
||
# Perform validation | ||
if [[ "$value" -ne "$expected_value" ]]; then | ||
echo "Error: $method has an unexpected value ($value). Expected: $expected_value" | tee -a "$LOG_FILE" | ||
exit 1 | ||
fi | ||
done | ||
|
||
echo "Metrics validation successful!" | tee -a "$LOG_FILE" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -116,6 +116,18 @@ jobs: | |
run: | | ||
.github/scripts/e2e_tests_brownfield_use_case.sh | ||
# the script accepts number of requests for APIs: CREATE_BUCKET, DELETE_BUCKET, GET_INFO | ||
# GRANT_ACCESS and REVOKE_ACCESS in order | ||
# Example below we are testing for those API counts: | ||
# - 2 CREATE_BUCKET | ||
# - 1 DELETE_BUCKET | ||
# - 1 GET_INFO | ||
# - 2 GRANT_ACCESS | ||
# - 2 REVOKE_ACCESS | ||
- name: E2E tests for metrics using API call metrics generated from above tests | ||
run: | | ||
.github/scripts/e2e_tests_metrics.sh 2 1 1 2 2 | ||
- name: "Delay completion" | ||
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} | ||
uses: scality/actions/[email protected] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,6 +72,18 @@ jobs: | |
run: | | ||
.github/scripts/verify_helm_install.sh | ||
# the script accepts number of requests for APIs: CREATE_BUCKET, DELETE_BUCKET, GET_INFO | ||
# GRANT_ACCESS and REVOKE_ACCESS in order | ||
# Example below we are testing for those API counts: | ||
# - 0 CREATE_BUCKET | ||
# - 0 DELETE_BUCKET | ||
# - 1 GET_INFO | ||
# - 0 GRANT_ACCESS | ||
# - 0 REVOKE_ACCESS | ||
- name: Verify metrics for healthcheck route | ||
run: | | ||
.github/scripts/e2e_tests_metrics.sh 0 0 1 0 0 | ||
- name: "Delay completion" | ||
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} | ||
uses: scality/actions/[email protected] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# COSI Driver Metrics Documentation | ||
|
||
This document provides an overview of the Prometheus metrics exposed by the COSI driver. These metrics are designed to help open-source users monitor the performance and operations of the COSI driver. The metrics cover gRPC server calls. | ||
|
||
## Metrics Overview | ||
|
||
Metrics are exposed at the `/metrics` endpoint on the address configured via the `--metrics-address` flag (default: `:8080`). These metrics are Prometheus-compatible and can be used to create dashboards for observability. | ||
|
||
--- | ||
|
||
## gRPC Default Metrics | ||
|
||
The COSI driver exposes default gRPC server metrics to monitor RPC activity. | ||
|
||
| Metric Name | Description | Labels | | ||
|---------------------------------|------------------------------------------------------------|--------------------------------------------| | ||
| `grpc_server_started_total` | Total number of RPCs started on the server. | `grpc_method`, `grpc_service`, `grpc_type` | | ||
| `grpc_server_handled_total` | Total number of RPCs completed on the server. | `grpc_method`, `grpc_service`, `grpc_code` | | ||
| `grpc_server_msg_received_total`| Total number of messages received by the server. | `grpc_method`, `grpc_service` | | ||
| `grpc_server_msg_sent_total` | Total number of messages sent by the server. | `grpc_method`, `grpc_service` | | ||
| `grpc_server_handling_seconds` | Time taken for RPC calls to be handled by the server. | `grpc_method`, `grpc_service` | | ||
|
||
### Example gRPC Methods | ||
|
||
- Methods: `DriverCreateBucket`, `DriverDeleteBucket`, `DriverGetInfo`, `DriverGrantBucketAccess`, `DriverRevokeBucketAccess` | ||
- Services: `cosi.v1alpha1.Provisioner`, `cosi.v1alpha1.Identity` | ||
|
||
```sh | ||
grpc_server_started_total{grpc_method="DriverGetInfo",grpc_service="cosi.v1alpha1.Identity",grpc_type="unary"} 2 | ||
``` | ||
|
||
## Additional Resource | ||
|
||
- [gRPC-Go Prometheus Metrics](https://github.com/grpc-ecosystem/go-grpc-middleware) | ||
- [Default Prometheus Metrics](https://pkg.go.dev/github.com/prometheus/client_golang/prometheus#pkg-subdirectories) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.