Skip to content

Commit

Permalink
add metrics descriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
pascal-fischer committed Jan 23, 2025
1 parent 69f48db commit 0b83866
Show file tree
Hide file tree
Showing 10 changed files with 213 additions and 66 deletions.
8 changes: 3 additions & 5 deletions management/server/peer.go
Original file line number Diff line number Diff line change
Expand Up @@ -1130,11 +1130,6 @@ func (am *DefaultAccountManager) UpdateAccountPeers(ctx context.Context, account
}

start := time.Now()
defer func() {
if am.metrics != nil {
am.metrics.AccountManagerMetrics().CountUpdateAccountPeersDuration(time.Since(start))
}
}()

approvedPeersMap, err := am.integratedPeerValidator.GetValidatedPeers(account.Id, maps.Values(account.Groups), maps.Values(account.Peers), account.Settings.Extra)
if err != nil {
Expand Down Expand Up @@ -1175,6 +1170,9 @@ func (am *DefaultAccountManager) UpdateAccountPeers(ctx context.Context, account
}

wg.Wait()
if am.metrics != nil {
am.metrics.AccountManagerMetrics().CountUpdateAccountPeersDuration(time.Since(start))
}
}

// UpdateAccountPeer updates a single peer that belongs to an account.
Expand Down
13 changes: 9 additions & 4 deletions management/server/telemetry/accountmanager_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ func NewAccountManagerMetrics(ctx context.Context, meter metric.Meter) (*Account
metric.WithUnit("milliseconds"),
metric.WithExplicitBucketBoundaries(
0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000, 30000,
))
),
metric.WithDescription("Duration of triggering the account peers update and preparing the required data for the network map being send to the clients"))
if err != nil {
return nil, err
}
Expand All @@ -31,7 +32,8 @@ func NewAccountManagerMetrics(ctx context.Context, meter metric.Meter) (*Account
metric.WithUnit("milliseconds"),
metric.WithExplicitBucketBoundaries(
0.1, 0.5, 1, 2.5, 5, 10, 25, 50, 100, 250, 500, 1000,
))
),
metric.WithDescription("Duration of calculating the peer network map that is send to the clients"))
if err != nil {
return nil, err
}
Expand All @@ -40,12 +42,15 @@ func NewAccountManagerMetrics(ctx context.Context, meter metric.Meter) (*Account
metric.WithUnit("objects"),
metric.WithExplicitBucketBoundaries(
50, 100, 200, 500, 1000, 2500, 5000, 10000,
))
),
metric.WithDescription("Number of objects in the network map like peers, routes, firewall rules, etc. that are send to the clients"))
if err != nil {
return nil, err
}

peerMetaUpdateCount, err := meter.Int64Counter("management.account.peer.meta.update.counter", metric.WithUnit("1"))
peerMetaUpdateCount, err := meter.Int64Counter("management.account.peer.meta.update.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of updates with new meta data from the peers"))
if err != nil {
return nil, err
}
Expand Down
32 changes: 25 additions & 7 deletions management/server/telemetry/grpc_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,32 +22,50 @@ type GRPCMetrics struct {

// NewGRPCMetrics creates new GRPCMetrics struct and registers common metrics of the gRPC server
func NewGRPCMetrics(ctx context.Context, meter metric.Meter) (*GRPCMetrics, error) {
syncRequestsCounter, err := meter.Int64Counter("management.grpc.sync.request.counter", metric.WithUnit("1"))
syncRequestsCounter, err := meter.Int64Counter("management.grpc.sync.request.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of sync gRPC requests from the peers to establish a connection an receive network map updates (update channel)"),
)
if err != nil {
return nil, err
}

loginRequestsCounter, err := meter.Int64Counter("management.grpc.login.request.counter", metric.WithUnit("1"))
loginRequestsCounter, err := meter.Int64Counter("management.grpc.login.request.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of login gRPC requests from the peers to authenticate and receive initial configuration and relay credentials"),
)
if err != nil {
return nil, err
}

getKeyRequestsCounter, err := meter.Int64Counter("management.grpc.key.request.counter", metric.WithUnit("1"))
getKeyRequestsCounter, err := meter.Int64Counter("management.grpc.key.request.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of key gRPC requests from the peers o get the servers public WireGuard key"),
)
if err != nil {
return nil, err
}

activeStreamsGauge, err := meter.Int64ObservableGauge("management.grpc.connected.streams", metric.WithUnit("1"))
activeStreamsGauge, err := meter.Int64ObservableGauge("management.grpc.connected.streams",
metric.WithUnit("1"),
metric.WithDescription("Number of active peer streams connected to the gRPC server"),
)
if err != nil {
return nil, err
}

syncRequestDuration, err := meter.Int64Histogram("management.grpc.sync.request.duration.ms", metric.WithUnit("milliseconds"))
syncRequestDuration, err := meter.Int64Histogram("management.grpc.sync.request.duration.ms",
metric.WithUnit("milliseconds"),
metric.WithDescription("Duration of the sync gRPC requests from the peers to establish a connection an receive network map updates (update channel)"),
)
if err != nil {
return nil, err
}

loginRequestDuration, err := meter.Int64Histogram("management.grpc.login.request.duration.ms", metric.WithUnit("milliseconds"))
loginRequestDuration, err := meter.Int64Histogram("management.grpc.login.request.duration.ms",
metric.WithUnit("milliseconds"),
metric.WithDescription("Duration of the login gRPC requests from the peers to authenticate and receive initial configuration and relay credentials"),
)
if err != nil {
return nil, err
}
Expand All @@ -57,7 +75,7 @@ func NewGRPCMetrics(ctx context.Context, meter metric.Meter) (*GRPCMetrics, erro
// TODO(yury): This needs custom bucketing as we are interested in the values from 0 to server.channelBufferSize (100)
channelQueue, err := meter.Int64Histogram(
"management.grpc.updatechannel.queue",
metric.WithDescription("Number of update messages in the channel queue"),
metric.WithDescription("Number of update messages piling up in the update channel queue"),
metric.WithUnit("length"),
)
if err != nil {
Expand Down
35 changes: 28 additions & 7 deletions management/server/telemetry/http_api_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,37 +74,58 @@ type HTTPMiddleware struct {

// NewMetricsMiddleware creates a new HTTPMiddleware
func NewMetricsMiddleware(ctx context.Context, meter metric.Meter) (*HTTPMiddleware, error) {
httpRequestCounter, err := meter.Int64Counter(httpRequestCounterPrefix, metric.WithUnit("1"))
httpRequestCounter, err := meter.Int64Counter(httpRequestCounterPrefix,
metric.WithUnit("1"),
metric.WithDescription("Number of incoming HTTP requests by endpoint and method"),
)
if err != nil {
return nil, err
}

httpResponseCounter, err := meter.Int64Counter(httpResponseCounterPrefix, metric.WithUnit("1"))
httpResponseCounter, err := meter.Int64Counter(httpResponseCounterPrefix,
metric.WithUnit("1"),
metric.WithDescription("Number of outgoing HTTP responses by endpoint, method and returned status code"),
)
if err != nil {
return nil, err
}

totalHTTPRequestsCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpRequestCounterPrefix), metric.WithUnit("1"))
totalHTTPRequestsCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpRequestCounterPrefix),
metric.WithUnit("1"),
metric.WithDescription("Number of incoming HTTP requests"),
)
if err != nil {
return nil, err
}

totalHTTPResponseCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpResponseCounterPrefix), metric.WithUnit("1"))
totalHTTPResponseCounter, err := meter.Int64Counter(fmt.Sprintf("%s.total", httpResponseCounterPrefix),
metric.WithUnit("1"),
metric.WithDescription("Number of outgoing HTTP responses"),
)
if err != nil {
return nil, err
}

totalHTTPResponseCodeCounter, err := meter.Int64Counter(fmt.Sprintf("%s.code.total", httpResponseCounterPrefix), metric.WithUnit("1"))
totalHTTPResponseCodeCounter, err := meter.Int64Counter(fmt.Sprintf("%s.code.total", httpResponseCounterPrefix),
metric.WithUnit("1"),
metric.WithDescription("Number of outgoing HTTP responses by status code"),
)
if err != nil {
return nil, err
}

httpRequestDuration, err := meter.Int64Histogram(httpRequestDurationPrefix, metric.WithUnit("milliseconds"))
httpRequestDuration, err := meter.Int64Histogram(httpRequestDurationPrefix,
metric.WithUnit("milliseconds"),
metric.WithDescription("Duration of incoming HTTP requests by endpoint and method"),
)
if err != nil {
return nil, err
}

totalHTTPRequestDuration, err := meter.Int64Histogram(fmt.Sprintf("%s.total", httpRequestDurationPrefix), metric.WithUnit("milliseconds"))
totalHTTPRequestDuration, err := meter.Int64Histogram(fmt.Sprintf("%s.total", httpRequestDurationPrefix),
metric.WithUnit("milliseconds"),
metric.WithDescription("Duration of incoming HTTP requests"),
)
if err != nil {
return nil, err
}
Expand Down
50 changes: 40 additions & 10 deletions management/server/telemetry/idp_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,43 +23,73 @@ type IDPMetrics struct {

// NewIDPMetrics creates new IDPMetrics struct and registers common
func NewIDPMetrics(ctx context.Context, meter metric.Meter) (*IDPMetrics, error) {
metaUpdateCounter, err := meter.Int64Counter("management.idp.update.user.meta.counter", metric.WithUnit("1"))
metaUpdateCounter, err := meter.Int64Counter("management.idp.update.user.meta.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of updates of user metadata send to the configured identity provider"),
)
if err != nil {
return nil, err
}
getUserByEmailCounter, err := meter.Int64Counter("management.idp.get.user.by.email.counter", metric.WithUnit("1"))
getUserByEmailCounter, err := meter.Int64Counter("management.idp.get.user.by.email.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of requests to get a user by email from the configured identity provider"),
)
if err != nil {
return nil, err
}
getAllAccountsCounter, err := meter.Int64Counter("management.idp.get.accounts.counter", metric.WithUnit("1"))
getAllAccountsCounter, err := meter.Int64Counter("management.idp.get.accounts.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of requests to get all accounts from the configured identity provider"),
)
if err != nil {
return nil, err
}
createUserCounter, err := meter.Int64Counter("management.idp.create.user.counter", metric.WithUnit("1"))
createUserCounter, err := meter.Int64Counter("management.idp.create.user.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of requests to create a new user in the configured identity provider"),
)
if err != nil {
return nil, err
}
deleteUserCounter, err := meter.Int64Counter("management.idp.delete.user.counter", metric.WithUnit("1"))
deleteUserCounter, err := meter.Int64Counter("management.idp.delete.user.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of requests to delete a user from the configured identity provider"),
)
if err != nil {
return nil, err
}
getAccountCounter, err := meter.Int64Counter("management.idp.get.account.counter", metric.WithUnit("1"))
getAccountCounter, err := meter.Int64Counter("management.idp.get.account.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of requests to get all users in an account from the configured identity provider"),
)
if err != nil {
return nil, err
}
getUserByIDCounter, err := meter.Int64Counter("management.idp.get.user.by.id.counter", metric.WithUnit("1"))
getUserByIDCounter, err := meter.Int64Counter("management.idp.get.user.by.id.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of requests to get a user by ID from the configured identity provider"),
)
if err != nil {
return nil, err
}
authenticateRequestCounter, err := meter.Int64Counter("management.idp.authenticate.request.counter", metric.WithUnit("1"))
authenticateRequestCounter, err := meter.Int64Counter("management.idp.authenticate.request.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of requests to authenticate the server with the configured identity provider"),
)
if err != nil {
return nil, err
}
requestErrorCounter, err := meter.Int64Counter("management.idp.request.error.counter", metric.WithUnit("1"))
requestErrorCounter, err := meter.Int64Counter("management.idp.request.error.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of errors that happened when doing http request to the configured identity provider"),
)
if err != nil {
return nil, err
}
requestStatusErrorCounter, err := meter.Int64Counter("management.idp.request.status.error.counter", metric.WithUnit("1"))
requestStatusErrorCounter, err := meter.Int64Counter("management.idp.request.status.error.counter",
metric.WithUnit("1"),
metric.WithDescription("Number of responses that came from the configured identity provider with non success status code"),
)
if err != nil {
return nil, err
}
Expand Down
23 changes: 18 additions & 5 deletions management/server/telemetry/store_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,41 @@ type StoreMetrics struct {
// NewStoreMetrics creates an instance of StoreMetrics
func NewStoreMetrics(ctx context.Context, meter metric.Meter) (*StoreMetrics, error) {
globalLockAcquisitionDurationMicro, err := meter.Int64Histogram("management.store.global.lock.acquisition.duration.micro",
metric.WithUnit("microseconds"))
metric.WithUnit("microseconds"),
metric.WithDescription("Duration of how long it takes to acquire the global lock in the store to block all other requests to the store"),
)
if err != nil {
return nil, err
}

globalLockAcquisitionDurationMs, err := meter.Int64Histogram("management.store.global.lock.acquisition.duration.ms")
globalLockAcquisitionDurationMs, err := meter.Int64Histogram("management.store.global.lock.acquisition.duration.ms",
metric.WithUnit("milliseconds"),
metric.WithDescription("Duration of how long a process holds the acquired global lock in the store"),
)
if err != nil {
return nil, err
}

persistenceDurationMicro, err := meter.Int64Histogram("management.store.persistence.duration.micro",
metric.WithUnit("microseconds"))
metric.WithUnit("microseconds"),
metric.WithDescription("Duration of how long it takes to save or delete an account in the store"),
)
if err != nil {
return nil, err
}

persistenceDurationMs, err := meter.Int64Histogram("management.store.persistence.duration.ms")
persistenceDurationMs, err := meter.Int64Histogram("management.store.persistence.duration.ms",
metric.WithUnit("milliseconds"),
metric.WithDescription("Duration of how long it takes to save or delete an account in the store"),
)
if err != nil {
return nil, err
}

transactionDurationMs, err := meter.Int64Histogram("management.store.transaction.duration.ms")
transactionDurationMs, err := meter.Int64Histogram("management.store.transaction.duration.ms",
metric.WithUnit("milliseconds"),
metric.WithDescription("Duration of how long it takes to execute a transaction in the store"),
)
if err != nil {
return nil, err
}
Expand Down
42 changes: 34 additions & 8 deletions management/server/telemetry/updatechannel_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,42 +23,68 @@ type UpdateChannelMetrics struct {

// NewUpdateChannelMetrics creates an instance of UpdateChannel
func NewUpdateChannelMetrics(ctx context.Context, meter metric.Meter) (*UpdateChannelMetrics, error) {
createChannelDurationMicro, err := meter.Int64Histogram("management.updatechannel.create.duration.micro")
createChannelDurationMicro, err := meter.Int64Histogram("management.updatechannel.create.duration.micro",
metric.WithUnit("microseconds"),
metric.WithDescription("Duration of how long it takes to create a new peer update channel"),
)
if err != nil {
return nil, err
}

closeChannelDurationMicro, err := meter.Int64Histogram("management.updatechannel.close.one.duration.micro")
closeChannelDurationMicro, err := meter.Int64Histogram("management.updatechannel.close.one.duration.micro",
metric.WithUnit("microseconds"),
metric.WithDescription("Duration of how long it takes to close a peer update channel"),
)
if err != nil {
return nil, err
}

closeChannelsDurationMicro, err := meter.Int64Histogram("management.updatechannel.close.multiple.duration.micro")
closeChannelsDurationMicro, err := meter.Int64Histogram("management.updatechannel.close.multiple.duration.micro",
metric.WithUnit("microseconds"),
metric.WithDescription("Duration of how long it takes to close a set of peer update channels"),
)

if err != nil {
return nil, err
}

closeChannels, err := meter.Int64Histogram("management.updatechannel.close.multiple.channels")
closeChannels, err := meter.Int64Histogram("management.updatechannel.close.multiple.channels",
metric.WithUnit("1"),
metric.WithDescription("Number of peer update channels that have been closed"),
)

if err != nil {
return nil, err
}

sendUpdateDurationMicro, err := meter.Int64Histogram("management.updatechannel.send.duration.micro")
sendUpdateDurationMicro, err := meter.Int64Histogram("management.updatechannel.send.duration.micro",
metric.WithUnit("microseconds"),
metric.WithDescription("Duration of how long it takes to send an network map update to a peer"),
)
if err != nil {
return nil, err
}

getAllConnectedPeersDurationMicro, err := meter.Int64Histogram("management.updatechannel.get.all.duration.micro")
getAllConnectedPeersDurationMicro, err := meter.Int64Histogram("management.updatechannel.get.all.duration.micro",
metric.WithUnit("microseconds"),
metric.WithDescription("Duration of how long it takes to get all connected peers"),
)
if err != nil {
return nil, err
}

getAllConnectedPeers, err := meter.Int64Histogram("management.updatechannel.get.all.peers")
getAllConnectedPeers, err := meter.Int64Histogram("management.updatechannel.get.all.peers",
metric.WithUnit("1"),
metric.WithDescription("Number of connected peers"),
)
if err != nil {
return nil, err
}

hasChannelDurationMicro, err := meter.Int64Histogram("management.updatechannel.haschannel.duration.micro")
hasChannelDurationMicro, err := meter.Int64Histogram("management.updatechannel.haschannel.duration.micro",
metric.WithUnit("microseconds"),
metric.WithDescription("Duration of how long it takes to check if a peer has a channel"),
)
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit 0b83866

Please sign in to comment.