diff --git a/cmd/root.go b/cmd/root.go index b4a9e33..693308a 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -13,13 +13,13 @@ import ( var cfgFile string var ( gitVersion string - buildTime string + buildTime string ) // rootCmd represents the base command when called without any subcommands var rootCmd = &cobra.Command{ - Use: "chia-exporter", - Short: "Prometheus metric exporter for Chia Blockchain", + Use: "chia-exporter", + Short: "Prometheus metric exporter for Chia Blockchain", Version: fmt.Sprintf("%s (%s)", gitVersion, buildTime), } diff --git a/go.mod b/go.mod index 2f29cd0..650d20f 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/chia-network/chia-exporter go 1.21 require ( - github.com/chia-network/go-chia-libs v0.6.0 + github.com/chia-network/go-chia-libs v0.7.0 github.com/chia-network/go-modules v0.0.4 github.com/go-sql-driver/mysql v1.8.0 github.com/oschwald/maxminddb-golang v1.12.0 @@ -19,6 +19,7 @@ require ( github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/google/go-querystring v1.1.0 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect diff --git a/go.sum b/go.sum index 8f3db5e..7b33109 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chia-network/go-chia-libs v0.6.0 h1:HF9OHb2oLRvsVkM5umH11m085UrMp5Pg/xMO43EHmBE= -github.com/chia-network/go-chia-libs v0.6.0/go.mod h1:uqlNmpIlkRd4vtE+cXoUIAEdbDgCKK0Gp9XzrPgpkEM= +github.com/chia-network/go-chia-libs v0.7.0 h1:mTTw1A9Vi9rrzoh08uBAzVRt5lwbIN1KNy/m2uWyL5o= +github.com/chia-network/go-chia-libs v0.7.0/go.mod h1:Y/ND/A4O1RTVfAy6jlK3ATS584VyiNqOKbHSMwrl21k= github.com/chia-network/go-modules v0.0.4 h1:XlCcuT4j1krLvsFT1Y49Un5xORwcTc8jjE4SHih7OTI= github.com/chia-network/go-modules v0.0.4/go.mod h1:JP8mG/9ieE76VcGIbzD5G3/4YDmvNhRryiQwp8GQr1U= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= @@ -24,6 +24,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/QY= github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= diff --git a/internal/metrics/crawler.go b/internal/metrics/crawler.go index 263587e..92bee48 100644 --- a/internal/metrics/crawler.go +++ b/internal/metrics/crawler.go @@ -28,6 +28,9 @@ type CrawlerServiceMetrics struct { // Holds a reference to the main metrics container this is a part of metrics *Metrics + // Current network + network *string + // Interfaces with Maxmind maxMindCountryDB *maxminddb.Reader maxMindASNDB *maxminddb.Reader @@ -42,13 +45,11 @@ type CrawlerServiceMetrics struct { // Debug Metric debug *prometheus.GaugeVec - - // Current network - network *string } // InitMetrics sets all the metrics properties -func (s *CrawlerServiceMetrics) InitMetrics() { +func (s *CrawlerServiceMetrics) InitMetrics(network *string) { + s.network = network // Crawler Metrics s.totalNodes5Days = s.metrics.newGauge(chiaServiceCrawler, "total_nodes_5_days", "Total number of nodes that have been gossiped around the network with a timestamp in the last 5 days. The crawler did not necessarily connect to all of these peers itself.") s.reliableNodes = s.metrics.newGauge(chiaServiceCrawler, "reliable_nodes", "reliable nodes are nodes that have port 8444 open and have available space for more peer connections") @@ -252,18 +253,10 @@ func (s *CrawlerServiceMetrics) ProcessIPASNMapping(ips *rpc.GetIPsAfterTimestam return } if s.network == nil { - netInfo, _, err := s.metrics.httpClient.CrawlerService.GetNetworkInfo(&rpc.GetNetworkInfoOptions{}) - if err != nil { - log.Errorf("Could not get network information to store with ASN data: %s\n", err.Error()) - return - } - if netInfo.NetworkName.IsAbsent() { - log.Error("network name was absent in get_network_info request. Can't store ASNs without network name") - return - } - netName := netInfo.NetworkName.MustGet() - s.network = &netName + log.Errorln("Network information missing. Can't store ASN data without network") + return } + type countStruct struct { ASN uint32 Organization string diff --git a/internal/metrics/farmer.go b/internal/metrics/farmer.go index e76043a..917fc79 100644 --- a/internal/metrics/farmer.go +++ b/internal/metrics/farmer.go @@ -57,7 +57,7 @@ type FarmerServiceMetrics struct { } // InitMetrics sets all the metrics properties -func (s *FarmerServiceMetrics) InitMetrics() { +func (s *FarmerServiceMetrics) InitMetrics(network *string) { s.totalPlotsValue = map[types.Bytes32]uint64{} s.nodeIDToHostname = map[types.Bytes32]string{} diff --git a/internal/metrics/fullnode.go b/internal/metrics/fullnode.go index 135058d..9af042b 100644 --- a/internal/metrics/fullnode.go +++ b/internal/metrics/fullnode.go @@ -93,7 +93,7 @@ type FullNodeServiceMetrics struct { } // InitMetrics sets all the metrics properties -func (s *FullNodeServiceMetrics) InitMetrics() { +func (s *FullNodeServiceMetrics) InitMetrics(network *string) { // BlockchainState Metrics s.difficulty = s.metrics.newGauge(chiaServiceFullNode, "difficulty", "Current network difficulty") s.mempoolCost = s.metrics.newGauge(chiaServiceFullNode, "mempool_cost", "Current mempool size in cost") diff --git a/internal/metrics/harvester.go b/internal/metrics/harvester.go index 000d9ae..b1d9849 100644 --- a/internal/metrics/harvester.go +++ b/internal/metrics/harvester.go @@ -47,7 +47,7 @@ type HarvesterServiceMetrics struct { } // InitMetrics sets all the metrics properties -func (s *HarvesterServiceMetrics) InitMetrics() { +func (s *HarvesterServiceMetrics) InitMetrics(network *string) { // Connection Metrics s.connectionCount = s.metrics.newGaugeVec(chiaServiceHarvester, "connection_count", "Number of active connections for each type of peer", []string{"node_type"}) diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 6167d7a..3efdf47 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -34,7 +34,7 @@ const ( // serviceMetrics defines methods that must be on all metrics services type serviceMetrics interface { // InitMetrics registers any metrics (gauges, counters, etc) on creation of the metrics object - InitMetrics() + InitMetrics(network *string) // InitialData is called after the websocket connection is opened to allow each service // to load any initial data that should be reported @@ -58,6 +58,7 @@ type serviceMetrics interface { type Metrics struct { metricsPort uint16 client *rpc.Client + network *string // httpClient is another instance of the rpc.Client in HTTP mode // This is used rarely, to request data in response to a websocket event that is too large to fit on a single @@ -65,7 +66,8 @@ type Metrics struct { httpClient *rpc.Client // This holds a custom prometheus registry so that only our metrics are exported, and not the default go metrics - registry *prometheus.Registry + registry *prometheus.Registry + dynamicPromHandler *dynamicPromHandler // Holds a MySQL DB Instance if configured mysqlClient *sql.DB @@ -123,9 +125,13 @@ func NewMetrics(port uint16, logLevel log.Level) (*Metrics, error) { metrics.serviceMetrics[chiaServiceHarvester] = &HarvesterServiceMetrics{metrics: metrics} metrics.serviceMetrics[chiaServiceFarmer] = &FarmerServiceMetrics{metrics: metrics} + // See if we can get the network now + // If not, the reconnect handler will handle it later + _, _ = metrics.checkNetwork() + // Init each service's metrics for _, service := range metrics.serviceMetrics { - service.InitMetrics() + service.InitMetrics(metrics.network) } return metrics, nil @@ -154,6 +160,40 @@ func (m *Metrics) createDBClient() error { return nil } +// Returns boolean indicating if network changed from previously known value +func (m *Metrics) checkNetwork() (bool, error) { + var currentNetwork string + var newNetwork string + if m.network == nil { + currentNetwork = "" + } else { + currentNetwork = *m.network + } + + m.client.SetSyncMode() + defer m.client.SetAsyncMode() + + netInfo, _, err := m.client.DaemonService.GetNetworkInfo(&rpc.GetNetworkInfoOptions{}) + if err != nil { + return false, fmt.Errorf("error checking network info: %w", err) + } + + if netInfo != nil && netInfo.NetworkName.IsPresent() { + network := netInfo.NetworkName.MustGet() + m.network = &network + newNetwork = network + } else { + m.network = nil + newNetwork = "" + } + + if currentNetwork != newNetwork { + return true, nil + } + + return false, nil +} + // newGauge returns a lazy gauge that follows naming conventions func (m *Metrics) newGauge(service chiaService, name string, help string) *wrappedPrometheus.LazyGauge { opts := prometheus.GaugeOpts{ @@ -163,6 +203,12 @@ func (m *Metrics) newGauge(service chiaService, name string, help string) *wrapp Help: help, } + if m.network != nil { + opts.ConstLabels = map[string]string{ + "network": *m.network, + } + } + gm := prometheus.NewGauge(opts) lg := &wrappedPrometheus.LazyGauge{ @@ -183,6 +229,12 @@ func (m *Metrics) newGaugeVec(service chiaService, name string, help string, lab Help: help, } + if m.network != nil { + opts.ConstLabels = map[string]string{ + "network": *m.network, + } + } + gm := prometheus.NewGaugeVec(opts, labels) m.registry.MustRegister(gm) @@ -199,6 +251,12 @@ func (m *Metrics) newCounter(service chiaService, name string, help string) *wra Help: help, } + if m.network != nil { + opts.ConstLabels = map[string]string{ + "network": *m.network, + } + } + cm := prometheus.NewCounter(opts) lc := &wrappedPrometheus.LazyCounter{ @@ -218,6 +276,12 @@ func (m *Metrics) newCounterVec(service chiaService, name string, help string, l Help: help, } + if m.network != nil { + opts.ConstLabels = map[string]string{ + "network": *m.network, + } + } + gm := prometheus.NewCounterVec(opts, labels) m.registry.MustRegister(gm) @@ -237,7 +301,7 @@ func (m *Metrics) OpenWebsocket() error { return err } - err = m.client.AddHandler(m.websocketReceive) + _, err = m.client.AddHandler(m.websocketReceive) if err != nil { return err } @@ -245,6 +309,19 @@ func (m *Metrics) OpenWebsocket() error { m.client.AddDisconnectHandler(m.disconnectHandler) m.client.AddReconnectHandler(m.reconnectHandler) + // First, we check the network and see if it changed + // If changed, we completely replace the prometheus registry with a new registry and re-init all metrics + // otherwise, we call the reconnected handlers + changed, _ := m.checkNetwork() + if changed { + m.registry = prometheus.NewRegistry() + m.dynamicPromHandler.updateHandler(promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})) + // Init each service's metrics + for _, service := range m.serviceMetrics { + service.InitMetrics(m.network) + } + } + for _, service := range m.serviceMetrics { service.InitialData() service.SetupPollingMetrics() @@ -264,7 +341,9 @@ func (m *Metrics) CloseWebsocket() error { func (m *Metrics) StartServer() error { log.Printf("Starting metrics server on port %d", m.metricsPort) - http.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})) + m.dynamicPromHandler = &dynamicPromHandler{} + m.dynamicPromHandler.updateHandler(promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})) + http.Handle("/metrics", m.dynamicPromHandler) http.HandleFunc("/healthz", healthcheckEndpoint) return http.ListenAndServe(fmt.Sprintf(":%d", m.metricsPort), nil) } @@ -303,8 +382,30 @@ func (m *Metrics) disconnectHandler() { func (m *Metrics) reconnectHandler() { log.Debug("Calling reconnect handlers") - for _, service := range m.serviceMetrics { - service.Reconnected() + + // First, we check the network and see if it changed + // If changed, we completely replace the prometheus registry with a new registry and re-init all metrics + // otherwise, we call the reconnected handlers + changed, err := m.checkNetwork() + if changed || err != nil { + if err != nil { + m.network = nil + log.Errorf("Error checking network. Assuming network changed and resetting metrics: %s\n", err.Error()) + } + + log.Info("Network Changed, resetting all metrics") + m.registry = prometheus.NewRegistry() + m.dynamicPromHandler.updateHandler(promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{})) + // Init each service's metrics + for _, service := range m.serviceMetrics { + service.InitMetrics(m.network) + service.InitialData() + } + } else { + log.Debug("Network did not change") + for _, service := range m.serviceMetrics { + service.Reconnected() + } } } diff --git a/internal/metrics/promhandler.go b/internal/metrics/promhandler.go new file mode 100644 index 0000000..05cdee0 --- /dev/null +++ b/internal/metrics/promhandler.go @@ -0,0 +1,22 @@ +package metrics +// Everything in here is to support making a new prometheus registry and having the http endpoint pick it up seamlessly + +import ( + "net/http" + "sync/atomic" +) + +type dynamicPromHandler struct { + handler atomic.Value // Stores the current http.Handler +} + +func (d *dynamicPromHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + // Retrieve the current handler and serve the request + handler := d.handler.Load().(http.Handler) + handler.ServeHTTP(w, r) +} + +// Function to update the handler +func (d *dynamicPromHandler) updateHandler(newHandler http.Handler) { + d.handler.Store(newHandler) +} diff --git a/internal/metrics/timelord.go b/internal/metrics/timelord.go index 91bf740..653051c 100644 --- a/internal/metrics/timelord.go +++ b/internal/metrics/timelord.go @@ -31,7 +31,7 @@ type TimelordServiceMetrics struct { } // InitMetrics sets all the metrics properties -func (s *TimelordServiceMetrics) InitMetrics() { +func (s *TimelordServiceMetrics) InitMetrics(network *string) { s.fastestTimelord = s.metrics.newCounter(chiaServiceTimelord, "fastest_timelord", "Counter for how many times this timelord has been fastest since the exporter has been running") s.slowTimelord = s.metrics.newCounter(chiaServiceTimelord, "slow_timelord", "Counter for how many times this timelord has NOT been the fastest since the exporter has been running") s.estimatedIPS = s.metrics.newGauge(chiaServiceTimelord, "estimated_ips", "Current estimated IPS. Updated every time a new PoT Challenge is complete") diff --git a/internal/metrics/wallet.go b/internal/metrics/wallet.go index 688f9db..56cfcca 100644 --- a/internal/metrics/wallet.go +++ b/internal/metrics/wallet.go @@ -39,7 +39,7 @@ type WalletServiceMetrics struct { } // InitMetrics sets all the metrics properties -func (s *WalletServiceMetrics) InitMetrics() { +func (s *WalletServiceMetrics) InitMetrics(network *string) { // Connection Metrics s.connectionCount = s.metrics.newGaugeVec(chiaServiceWallet, "connection_count", "Number of active connections for each type of peer", []string{"node_type"})