This repository has been archived by the owner on Dec 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial work towards observability (#174)
This is the first step towards improving observability of Gevulot node. This change adds some initial basic Prometheus metrics and a simple optional HTTP server that serves the metrics.
- Loading branch information
Showing
9 changed files
with
286 additions
and
8 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
use eyre::Result; | ||
use prometheus_hyper::Server; | ||
use std::{net::SocketAddr, sync::Arc}; | ||
|
||
use lazy_static::lazy_static; | ||
use prometheus::{HistogramOpts, HistogramVec, IntCounter, IntGauge, Registry}; | ||
|
||
lazy_static! { | ||
pub static ref REGISTRY: Arc<Registry> = Arc::new(Registry::new()); | ||
|
||
// RPC metrics. | ||
pub static ref RPC_INCOMING_REQUESTS: IntCounter = | ||
IntCounter::new("rpc_incoming_requests", "Incoming RPC Requests") | ||
.expect("metric can be created"); | ||
pub static ref RPC_RESPONSE_TIME_COLLECTOR: HistogramVec = HistogramVec::new( | ||
HistogramOpts::new("rpc_response_time", "RPC Response Times"), | ||
&["method"] | ||
) | ||
.expect("metric can be created"); | ||
|
||
// P2P metrics. | ||
pub static ref P2P_PROTOCOL_VERSION: IntGauge = | ||
IntGauge::new("p2p_protocol_version", "P2P Protocol Version").expect("metric can be created"); | ||
pub static ref P2P_CONNECTED_PEERS: IntGauge = | ||
IntGauge::new("p2p_connected_peers", "Connected P2P Peers").expect("metric can be created"); | ||
pub static ref P2P_INCOMING_MESSAGES: IntCounter = | ||
IntCounter::new("p2p_incoming_messages", "Incoming P2P Messages") | ||
.expect("metric can be created"); | ||
|
||
|
||
// Transaction metrics. | ||
pub static ref TX_EXECUTION_TIME_COLLECTOR: HistogramVec = HistogramVec::new( | ||
HistogramOpts::new("tx_execution_time", "Transaction Execution Times (ms)"), | ||
&["kind","status"] | ||
) | ||
.expect("metric can be created"); | ||
pub static ref TX_SCHEDULING_REQUEUED: IntCounter = | ||
IntCounter::new("tx_scheduling_requeued", "Transaction Requeued in Scheduling") | ||
.expect("metric can be created"); | ||
|
||
// Resources metrics. | ||
pub static ref CPUS_AVAILABLE: IntGauge = | ||
IntGauge::new("gevulot_cpus_available", "Available CPUs in Gevulot") | ||
.expect("metric can be created"); | ||
pub static ref MEM_AVAILABLE: IntGauge = | ||
IntGauge::new("gevulot_mem_available", "Available MEM in Gevulot") | ||
.expect("metric can be created"); | ||
pub static ref GPUS_AVAILABLE: IntGauge = | ||
IntGauge::new("gevulot_gpus_available", "Available GPUs in Gevulot") | ||
.expect("metric can be created"); | ||
pub static ref CPUS_TOTAL: IntGauge = | ||
IntGauge::new("gevulot_cpus_total", "Total number of CPUs in Gevulot") | ||
.expect("metric can be created"); | ||
pub static ref MEM_TOTAL: IntGauge = | ||
IntGauge::new("gevulot_mem_total", "Total amount of MEM in Gevulot") | ||
.expect("metric can be created"); | ||
pub static ref GPUS_TOTAL: IntGauge = | ||
IntGauge::new("gevulot_gpus_total", "Total number of GPUs in Gevulot") | ||
.expect("metric can be created"); | ||
} | ||
|
||
pub(crate) fn register_metrics() { | ||
REGISTRY | ||
.register(Box::new(RPC_INCOMING_REQUESTS.clone())) | ||
.expect("collector can be registered"); | ||
|
||
REGISTRY | ||
.register(Box::new(RPC_RESPONSE_TIME_COLLECTOR.clone())) | ||
.expect("collector can be registered"); | ||
|
||
REGISTRY | ||
.register(Box::new(P2P_PROTOCOL_VERSION.clone())) | ||
.expect("collector can be registered"); | ||
REGISTRY | ||
.register(Box::new(P2P_CONNECTED_PEERS.clone())) | ||
.expect("collector can be registered"); | ||
REGISTRY | ||
.register(Box::new(P2P_INCOMING_MESSAGES.clone())) | ||
.expect("collector can be registered"); | ||
|
||
REGISTRY | ||
.register(Box::new(TX_EXECUTION_TIME_COLLECTOR.clone())) | ||
.expect("collector can be registered"); | ||
REGISTRY | ||
.register(Box::new(TX_SCHEDULING_REQUEUED.clone())) | ||
.expect("collector can be registered"); | ||
|
||
REGISTRY | ||
.register(Box::new(CPUS_AVAILABLE.clone())) | ||
.expect("collector can be registered"); | ||
REGISTRY | ||
.register(Box::new(MEM_AVAILABLE.clone())) | ||
.expect("collector can be registered"); | ||
REGISTRY | ||
.register(Box::new(GPUS_AVAILABLE.clone())) | ||
.expect("collector can be registered"); | ||
REGISTRY | ||
.register(Box::new(CPUS_TOTAL.clone())) | ||
.expect("collector can be registered"); | ||
REGISTRY | ||
.register(Box::new(MEM_TOTAL.clone())) | ||
.expect("collector can be registered"); | ||
REGISTRY | ||
.register(Box::new(GPUS_TOTAL.clone())) | ||
.expect("collector can be registered"); | ||
} | ||
|
||
pub(crate) async fn serve_metrics(bind_addr: SocketAddr) -> Result<()> { | ||
// Start Server endlessly. | ||
tokio::spawn(async move { | ||
Server::run(REGISTRY.clone(), bind_addr, futures_util::future::pending()).await | ||
}); | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.